|
1 | 1 | #!/usr/bin/env python |
2 | 2 | # |
3 | 3 | # Routines that interface with the Commitfest app. |
4 | | -# For now these use webscraping, but they could become real API calls. |
| 4 | + |
| 5 | +from datetime import datetime |
5 | 6 |
|
6 | 7 | import cfbot_config |
7 | 8 | import cfbot_util |
8 | | -import html |
9 | | - |
10 | | -# from html.parser import HTMLParser |
11 | 9 | import re |
12 | 10 |
|
13 | 11 |
|
@@ -108,89 +106,46 @@ def get_latest_patches_from_thread_url(thread_url): |
108 | 106 | def get_thread_url_for_submission(commitfest_id, submission_id): |
109 | 107 | """Given a Commitfest ID and a submission ID, return the URL of the 'whole |
110 | 108 | thread' page in the mailing list archives.""" |
111 | | - # find all the threads and latest message times |
112 | | - result = None |
113 | | - url = f"{cfbot_config.COMMITFEST_HOST}/patch/{submission_id}/" |
114 | | - candidates = [] |
115 | | - candidate = None |
116 | | - submission_page = cfbot_util.slow_fetch(url, none_for_404=True) |
117 | | - |
118 | | - if submission_page is None: |
| 109 | + url = f"{cfbot_config.COMMITFEST_HOST}/api/v1/patches/{submission_id}/threads" |
| 110 | + data = cfbot_util.slow_fetch_json(url, none_for_404=True) |
| 111 | + |
| 112 | + if data is None: |
119 | 113 | return None |
120 | 114 |
|
121 | | - for line in submission_page.splitlines(): |
122 | | - groups = re.search( |
123 | | - """Latest at <a href="https://www.postgresql.org/message-id/([^"]+)">(2[^<]+)""", |
124 | | - line, |
125 | | - ) |
126 | | - if groups: |
127 | | - candidate = (groups.group(2), groups.group(1)) |
128 | | - # we'll only take threads that are followed by evidence that there is at least one attachment |
129 | | - groups = re.search("""Latest attachment .* <button type="button" """, line) |
130 | | - if groups: |
131 | | - candidates.append(candidate) |
132 | | - # take the one with the most recent email |
133 | | - if len(candidates) > 0: |
134 | | - candidates.sort() |
135 | | - result = "https://www.postgresql.org/message-id/flat/" + candidates[-1][1] |
136 | | - return result |
| 115 | + # Filter to threads that have attachments, then pick the one with the most |
| 116 | + # recent message |
| 117 | + candidates = [ |
| 118 | + (t["latest_message_time"], t["messageid"]) |
| 119 | + for t in data["threads"] |
| 120 | + if t["has_attachment"] |
| 121 | + ] |
| 122 | + |
| 123 | + if not candidates: |
| 124 | + return None |
| 125 | + |
| 126 | + candidates.sort() |
| 127 | + return "https://www.postgresql.org/message-id/flat/" + candidates[-1][1] |
137 | 128 |
|
138 | 129 |
|
139 | 130 | def get_submissions_for_commitfest(commitfest_id): |
140 | 131 | """Given a Commitfest ID, return a list of Submission objects.""" |
141 | | - result = [] |
142 | | - # parser = HTMLParser() |
143 | | - url = f"{cfbot_config.COMMITFEST_HOST}/{commitfest_id}/" |
144 | | - state = None |
145 | | - latest_email = None |
146 | | - authors = "" |
147 | | - td_count = 0 |
148 | | - body = cfbot_util.slow_fetch(url, True) |
149 | | - if body is None: |
| 132 | + url = f"{cfbot_config.COMMITFEST_HOST}/api/v1/commitfests/{commitfest_id}/patches" |
| 133 | + data = cfbot_util.slow_fetch_json(url, none_for_404=True) |
| 134 | + |
| 135 | + if data is None: |
150 | 136 | return [] |
151 | | - for line in body.splitlines(): |
152 | | - # maybe it's easier to count rows and columns |
153 | | - if re.search("<tr>", line): |
154 | | - td_count = 0 |
155 | | - continue |
156 | | - if re.search("<td[^>]*>", line): |
157 | | - td_count += 1 |
158 | | - |
159 | | - groups = re.search('<a href="/patch/([0-9]+)/">([^<]+)</a>', line) |
160 | | - if groups: |
161 | | - submission_id = groups.group(1) |
162 | | - name = html.unescape(groups.group(2)) |
163 | | - continue |
164 | | - if td_count == 8: |
165 | | - groups = re.search("<td>([^<]*)</td>", line) |
166 | | - if groups: |
167 | | - authors = groups.group(1) |
168 | | - authors = re.sub(" *\\([^)]*\\)", "", authors) |
169 | | - continue |
170 | | - if td_count == 3: |
171 | | - groups = re.search( |
172 | | - '<td><span class="badge[^"]*">([^<]+)</span></td>', |
173 | | - line, |
174 | | - # '<td><span class="label label-[^"]*">([^<]+)</span></td>', line |
175 | | - ) |
176 | | - if groups and not state: |
177 | | - state = groups.group(1) |
178 | | - continue |
179 | | - groups = re.search('<td style="white-space: nowrap;" title="([^"]+)">', line) |
180 | | - if groups: |
181 | | - latest_email = groups.group(1) |
182 | | - result.append( |
183 | | - Submission( |
184 | | - submission_id, |
185 | | - commitfest_id, |
186 | | - name, |
187 | | - state, |
188 | | - authors.split(", "), |
189 | | - latest_email, |
190 | | - ) |
191 | | - ) |
192 | | - state = None |
193 | | - return result |
| 137 | + |
| 138 | + return [ |
| 139 | + Submission( |
| 140 | + p["id"], |
| 141 | + commitfest_id, |
| 142 | + p["name"], |
| 143 | + p["status"], |
| 144 | + p["authors"], |
| 145 | + p["last_email_time"], |
| 146 | + ) |
| 147 | + for p in data["patches"] |
| 148 | + ] |
194 | 149 |
|
195 | 150 |
|
196 | 151 | def get_current_commitfests(): |
|
0 commit comments