Skip to content

Commit 937639f

Browse files
committed
tools/get_release_contributors: Add user lookup by email
This extends the release contributor scripts to look up users by email via Github search/users API. This is helpful for the Weblate process which only adds the original authors as Co-authored-by. In addition, the output changes to fall back to realnames (from the Co-authored-by lines) if not Github login is found. To make that mixed output more readable, commas are added between user names now.
1 parent 7294942 commit 937639f

1 file changed

Lines changed: 74 additions & 27 deletions

File tree

tools/get_release_contributors.py

Lines changed: 74 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ class UnexpectedGithubStatus(RuntimeError):
3434
pass
3535

3636

37-
# List of user names which should be ignored (such as bots):
38-
ignore_list = ['github-actions[bot]', 'imgbot[bot]', 'actions-bot', 'actions-user', 'ImgBotApp', 'dependabot[bot]', 'weblate']
37+
# List of contributor names which should be ignored (such as bots):
38+
ignore_list = ['@github-actions[bot]', '@imgbot[bot]', '@actions-bot', '@actions-user', '@ImgBotApp', '@dependabot[bot]', '@weblate']
3939

4040
CHARSET = 'utf-8'
4141

@@ -62,7 +62,7 @@ def set_repo(self, repo):
6262
def set_github_token(self, token):
6363
self.token = token
6464

65-
def get_login(self, key, commit_hash):
65+
def _get_login(self, key, commit_hash):
6666
"""
6767
Returns the Github login associated with the given name+email key.
6868
A related commit hash is required in order to have a reference
@@ -71,24 +71,36 @@ def get_login(self, key, commit_hash):
7171
Once looked up, the results are cached in a local file.
7272
"""
7373
if key not in self.keys_to_user:
74-
if commit_hash:
75-
self.keys_to_user[key] = self.get_user_by_commit(commit_hash)
74+
user = self.get_user_by_email(key)
75+
if not user and commit_hash:
76+
user = self.get_user_by_commit(commit_hash)
77+
78+
if user:
79+
self.keys_to_user[key] = user
7680
self.save()
77-
else:
78-
return None
79-
return self.keys_to_user[key]
81+
82+
return self.keys_to_user.get(key, None)
83+
84+
def get_login_or_realname(self, key, commit_hash):
85+
"""
86+
Returns the Github login (@-prefixed) or, if not found, the real name from
87+
the given name+email key.
88+
"""
89+
login = self._get_login(key, commit_hash)
90+
if login:
91+
return f'@{login}'
92+
m = re.match('\A([^@<>]+) <.*>\Z', key)
93+
if m:
94+
return m.group(1)
95+
logger.warning(f'unable to extract github login or real name from {repr(key)}')
96+
return None
8097

8198
def get_user_by_commit(self, hash):
8299
"""
83100
Retrieves the associated Github user name for the given commit
84101
hash.
85102
"""
86-
headers = {
87-
'Accept': 'application/vnd.github.v3+json',
88-
}
89-
if self.token:
90-
headers['Authorization'] = 'token %s' % self.token
91-
r = requests.get('https://api.github.com/repos/jamulussoftware/%s/commits/%s' % (self.repo, hash), headers=headers)
103+
r = self._github_api_get(f'repos/jamulussoftware/{self.repo}/commits/{hash}')
92104
if 200 <= r.status_code < 300:
93105
try:
94106
return r.json()['author']['login']
@@ -100,6 +112,38 @@ def get_user_by_commit(self, hash):
100112
return ''
101113
raise UnexpectedGithubStatus('status was %d' % r.status_code)
102114

115+
def _github_api_get(self, path, *args, **kwargs):
116+
headers = {
117+
'Accept': 'application/vnd.github.v3+json',
118+
}
119+
if self.token:
120+
headers['Authorization'] = 'token %s' % self.token
121+
r = requests.get(f'https://api.github.com/{path}', *args, headers=headers, **kwargs)
122+
return r
123+
124+
def get_user_by_email(self, key):
125+
m = re.match(r'\A[^<]+<([^<> ]+@[^<> ]+)>\Z', key)
126+
if not m:
127+
return None
128+
email = m.group(1)
129+
# Handle Github-generated email addresses via static matching:
130+
m = re.match(r'\A(\d+\+)?([^+@]+)\@users\.noreply\.github\.com\Z', email)
131+
if m:
132+
return m.group(2)
133+
r = self._github_api_get('search/users', params={'q': f'{email} in:email'})
134+
if r.status_code < 200 or r.status_code >= 300:
135+
logger.warning(f'search/users for {email} failed with code {r.status_code}')
136+
return None
137+
items = r.json().get('items', [])
138+
for item in items:
139+
login = item['login']
140+
u = self._github_api_get(f'users/{login}').json()
141+
if u.get('email', '') == email:
142+
return login
143+
144+
logger.warning(f'unable to find a github profile with public email {email}')
145+
return None
146+
103147
def save(self):
104148
"""
105149
Saves the cache to disk.
@@ -149,8 +193,8 @@ def print_website_contributors(from_, to):
149193

150194

151195
def print_contributors(title, git_log_selector, from_, to):
152-
contributors = ['@%s' % u for u in find_contributors(git_log_selector, from_, to) if u and u not in ignore_list]
153-
contributors_str = ' '.join(contributors)
196+
contributors = [u for u in find_contributors(git_log_selector, from_, to) if u and u not in ignore_list]
197+
contributors_str = ', '.join(contributors)
154198
print('%s: %s' % (title, contributors_str))
155199

156200

@@ -170,26 +214,29 @@ def find_contributors(git_log_selector, from_, to):
170214
if not commit:
171215
continue
172216
hash, author_key = commit.split('\n', 1)[0].split(' ', 1)
173-
login = authors.get_login(author_key, hash)
174-
contributors.add(login)
175-
co_authors = re.findall('Co-authored-by:\s*(\S.*(<[^ >]+>))\s*\n', commit, re.I)
217+
contributor = authors.get_login_or_realname(author_key, hash)
218+
contributors.add(contributor)
219+
co_authors = re.findall('Co-authored-by:\s*(\S.*(<[^ >]+>))\s*(?:$|\n)', commit, re.I)
176220
for co_author_full, co_author_email in co_authors:
177-
login = authors.get_login(co_author_full, None)
178-
if not login:
221+
contributor = authors.get_login_or_realname(co_author_full, None)
222+
if not contributor or not contributor.startswith('@'):
179223
# try to find a previous commit by this mail address
180-
# and pass this commit id to get_login() to retrieve the
224+
# and pass this commit id to get_login_or_realname() to retrieve the
181225
# associated handle from the github API.
182226
commit = subprocess.check_output(['git', 'log', '--format=%H', '--max-count=1', '--author=%s' % re.escape(co_author_email)]).strip().decode(CHARSET)
183227
if commit:
184-
login = authors.get_login(co_author_full, commit)
185-
if login:
186-
contributors.add(login)
228+
contributor = authors.get_login_or_realname(co_author_full, commit)
229+
if contributor:
230+
contributors.add(contributor)
187231

188232
# Resolve co-authors last because we have to rely on having seen the
189233
# email-to-login mapping via some other commit.
190234
for co_author in co_author_keys:
191-
login = authors.get_login(co_author, None)
192-
contributors.add(login)
235+
contributor = authors.get_login_or_realname(co_author, None)
236+
if contributor:
237+
contributors.add(contributor)
238+
else:
239+
contributors.add(realname)
193240
return sorted(contributors, key=str.casefold)
194241

195242

0 commit comments

Comments
 (0)