forked from Grow-with-Open-Source/Python-Projects
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconvert_to_html_tables.py
More file actions
207 lines (169 loc) · 6.53 KB
/
convert_to_html_tables.py
File metadata and controls
207 lines (169 loc) · 6.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!/usr/bin/env python
import os
import json
'''
This script requires following environment variables:
- REPO_NAME:
> example: 'iamwatchdogs/test'
> GitHub action variable: ${{ github.repository }}
'''
def find_table_points(lines):
"""
Find table points within a given list of lines.
The table points are determined by the presence of the markers:
<!-- TABLE BEGINS -->
<!-- TABLE ENDS -->
Args:
lines (list): List of lines to search in.
Returns:
tuple: A tuple of two integers containing the start and end indices of
the table points.
Raises:
SystemExit: If the table markers are not found or if the table end
marker appears before the table start marker.
"""
# Setting default return values
table_start = None
table_end = None
# Setting the markers
table_start_marker = '<!-- TABLE BEGINS -->'
table_end_marker = '<!-- TABLE ENDS -->'
# Iterating over lines to find the markers
for index, line in enumerate(lines):
if table_start is None and table_start_marker in line:
table_start = index
elif table_end is None and table_end_marker in line:
table_end = index
if table_start is not None and table_end is not None:
break
# Checking for possible errors
if table_start is None or table_end is None:
print('Table not found in the file.')
exit(1)
elif table_start >= table_end:
print('Invaild use of table markers.')
exit(2)
return (table_start, table_end)
def main():
import re
import html
import urllib.parse
import sys
"""
Update the index.md file with the latest contributors data.
This function retrieves the REPO_NAME environment variable and the
CONTRIBUTORS_LOG file path. It then reads the log file and extracts the
data from it. The function then reads the index.md file and calculates
the table points. If the table does not exist, it creates the table
header. The function then iterates over the log data and updates the
table with the latest data. Finally, it updates the index.md file with
the updated data and prints a success message.
"""
# Retrieving Environmental variables
REPO_NAME = os.environ.get('REPO_NAME')
# Setting path for the log JSON file
TARGET_FILE = 'index.md'
CONTRIBUTORS_LOG = '.github/data/contributors-log.json'
# Load JSON safely with error handling
try:
with open(CONTRIBUTORS_LOG, 'r') as json_file:
data = json.load(json_file)
except FileNotFoundError:
print(f"Contributors log not found: {CONTRIBUTORS_LOG}")
sys.exit(1)
except ValueError:
print(f"Invalid JSON in {CONTRIBUTORS_LOG}")
sys.exit(1)
# Reading lines from the file
try:
with open(TARGET_FILE, 'r') as file:
lines = file.readlines()
except FileNotFoundError:
print(f"Target file not found: {TARGET_FILE}")
sys.exit(1)
# Calculating Stating and ending points of the targeted table
table_start, table_end = find_table_points(lines)
# Creating HTML table header to replace md table
table_header = list()
table_header.append('<table>\n')
table_header.append('\t<tr align="center">\n')
table_header.append('\t\t<th>Project Title</th>\n')
table_header.append('\t\t<th>Contributor Names</th>\n')
table_header.append('\t\t<th>Pull Requests</th>\n')
table_header.append('\t\t<th>Demo</th>\n')
table_header.append('\t</tr>\n')
# Initializing empty list for lines
updated_lines = list()
# Regular expressions / helpers
username_re = re.compile(r'^[A-Za-z0-9-]+$')
# Iterating over log to update target file
for title, details in data.items():
# Processing contributors-names
contributors_names = details.get('contributor-name', [])
contributors_names_list = []
for name in contributors_names:
name_str = str(name)
display = html.escape(name_str)
# PRECOGS_FIX: Build a safe href and HTML-escape attribute and content to prevent attribute/context breakout
if username_re.match(name_str):
href = f"https://github.com/{urllib.parse.quote(name_str)}"
else:
# encode anything unsafe; do not allow raw user content to inject attributes
href = f"https://github.com/{urllib.parse.quote(name_str)}"
contributors_names_list.append(f'<a href="{html.escape(href, quote=True)}" title="goto {display} profile">{display}</a>')
contributors_names_output = ', '.join(contributors_names_list)
# Processing pull-requests
pull_requests = details.get('pull-request-number', [])
pull_requests_list = []
for pr in pull_requests:
pr_str = str(pr)
# PRECOGS_FIX: Only allow numeric PR identifiers to form links; sanitize otherwise
if pr_str.isdigit():
if REPO_NAME:
href = f"https://github.com/{urllib.parse.quote(REPO_NAME)}/pull/{pr_str}"
else:
href = f"/pull/{pr_str}"
else:
href = '#'
pull_requests_list.append(f'<a href="{html.escape(href, quote=True)}" title="visit pr #{html.escape(pr_str)}">{html.escape(pr_str)}</a>')
pull_requests_output = ', '.join(pull_requests_list)
# Processing demo-path
demo_path = str(details.get('demo-path', ''))
# Normalize spaces
demo_path = demo_path.replace(' ', '%20')
parsed = urllib.parse.urlparse(demo_path)
# Allow only http(s) or relative paths (no javascript:, data:, vbscript:, etc.)
if parsed.scheme in ('', 'http', 'https'):
href = demo_path
else:
href = '#'
# Build display label (escaped)
display_title = html.escape(str(title))
display_repo = html.escape(str(REPO_NAME)) if REPO_NAME else ''
if title == 'root' or title == '{init}':
label = f'/{display_repo}/'
elif title == '{workflows}':
label = f'/{display_repo}/.github/workflows'
elif title == '{scripts}':
label = f'/{display_repo}/.github/scripts'
elif title == '{others}':
label = f'/{display_repo}/.github'
else:
label = f'/{display_repo}/{display_title}/'
demo_path_output = f'<a href="{html.escape(href, quote=True)}" title="view the result of {display_title}">{label}</a>'
# Appending all data together (escape title displayed in cell)
updated_lines.append('\t<tr align="center">\n')
updated_lines.append(f'\t\t<td>{html.escape(str(title))}</td>\n')
updated_lines.append(f'\t\t<td>{contributors_names_output}</td>\n')
updated_lines.append(f'\t\t<td>{pull_requests_output}</td>\n')
updated_lines.append(f'\t\t<td>{demo_path_output}</td>\n')
updated_lines.append(f'\t</tr>\n')
# Table footer
table_footer = ['</table>\n']
# Updating the lines with updated data
lines[table_start+1:table_end] = table_header + updated_lines + table_footer
# Updating the target file
with open(TARGET_FILE, 'w') as file:
file.writelines(lines)
# Printing Success Message
print(f"Updated '{TARGET_FILE}' Successfully")