@@ -34,8 +34,8 @@ class UnexpectedGithubStatus(RuntimeError):
3434 pass
3535
3636
37- # List of user names which should be ignored (such as bots):
38- ignore_list = ['github-actions[bot]' , 'imgbot[bot]' , 'actions-bot' , 'actions-user' , 'ImgBotApp' , 'dependabot[bot]' , 'weblate' ]
37+ # List of contributor names which should be ignored (such as bots):
38+ ignore_list = ['@ github-actions[bot]' , '@ imgbot[bot]' , '@ actions-bot' , '@ actions-user' , '@ ImgBotApp' , '@ dependabot[bot]' , '@ weblate' ]
3939
4040CHARSET = 'utf-8'
4141
@@ -62,7 +62,7 @@ def set_repo(self, repo):
6262 def set_github_token (self , token ):
6363 self .token = token
6464
65- def get_login (self , key , commit_hash ):
65+ def _get_login (self , key , commit_hash ):
6666 """
6767 Returns the Github login associated with the given name+email key.
6868 A related commit hash is required in order to have a reference
@@ -71,24 +71,36 @@ def get_login(self, key, commit_hash):
7171 Once looked up, the results are cached in a local file.
7272 """
7373 if key not in self .keys_to_user :
74- if commit_hash :
75- self .keys_to_user [key ] = self .get_user_by_commit (commit_hash )
74+ user = self .get_user_by_email (key )
75+ if not user and commit_hash :
76+ user = self .get_user_by_commit (commit_hash )
77+
78+ if user :
79+ self .keys_to_user [key ] = user
7680 self .save ()
77- else :
78- return None
79- return self .keys_to_user [key ]
81+
82+ return self .keys_to_user .get (key , None )
83+
84+ def get_login_or_realname (self , key , commit_hash ):
85+ """
86+ Returns the Github login (@-prefixed) or, if not found, the real name from
87+ the given name+email key.
88+ """
89+ login = self ._get_login (key , commit_hash )
90+ if login :
91+ return f'@{ login } '
92+ m = re .match ('\A([^@<>]+) <.*>\Z' , key )
93+ if m :
94+ return m .group (1 )
95+ logger .warning (f'unable to extract github login or real name from { repr (key )} ' )
96+ return None
8097
8198 def get_user_by_commit (self , hash ):
8299 """
83100 Retrieves the associated Github user name for the given commit
84101 hash.
85102 """
86- headers = {
87- 'Accept' : 'application/vnd.github.v3+json' ,
88- }
89- if self .token :
90- headers ['Authorization' ] = 'token %s' % self .token
91- r = requests .get ('https://api.github.com/repos/jamulussoftware/%s/commits/%s' % (self .repo , hash ), headers = headers )
103+ r = self ._github_api_get (f'repos/jamulussoftware/{ self .repo } /commits/{ hash } ' )
92104 if 200 <= r .status_code < 300 :
93105 try :
94106 return r .json ()['author' ]['login' ]
@@ -100,6 +112,38 @@ def get_user_by_commit(self, hash):
100112 return ''
101113 raise UnexpectedGithubStatus ('status was %d' % r .status_code )
102114
115+ def _github_api_get (self , path , * args , ** kwargs ):
116+ headers = {
117+ 'Accept' : 'application/vnd.github.v3+json' ,
118+ }
119+ if self .token :
120+ headers ['Authorization' ] = 'token %s' % self .token
121+ r = requests .get (f'https://api.github.com/{ path } ' , * args , headers = headers , ** kwargs )
122+ return r
123+
124+ def get_user_by_email (self , key ):
125+ m = re .match (r'\A[^<]+<([^<> ]+@[^<> ]+)>\Z' , key )
126+ if not m :
127+ return None
128+ email = m .group (1 )
129+ # Handle Github-generated email addresses via static matching:
130+ m = re .match (r'\A(\d+\+)?([^+@]+)\@users\.noreply\.github\.com\Z' , email )
131+ if m :
132+ return m .group (2 )
133+ r = self ._github_api_get ('search/users' , params = {'q' : f'{ email } in:email' })
134+ if r .status_code < 200 or r .status_code >= 300 :
135+ logger .warning (f'search/users for { email } failed with code { r .status_code } ' )
136+ return None
137+ items = r .json ().get ('items' , [])
138+ for item in items :
139+ login = item ['login' ]
140+ u = self ._github_api_get (f'users/{ login } ' ).json ()
141+ if u .get ('email' , '' ) == email :
142+ return login
143+
144+ logger .warning (f'unable to find a github profile with public email { email } ' )
145+ return None
146+
103147 def save (self ):
104148 """
105149 Saves the cache to disk.
@@ -149,8 +193,8 @@ def print_website_contributors(from_, to):
149193
150194
151195def print_contributors (title , git_log_selector , from_ , to ):
152- contributors = ['@%s' % u for u in find_contributors (git_log_selector , from_ , to ) if u and u not in ignore_list ]
153- contributors_str = ' ' .join (contributors )
196+ contributors = [u for u in find_contributors (git_log_selector , from_ , to ) if u and u not in ignore_list ]
197+ contributors_str = ', ' .join (contributors )
154198 print ('%s: %s' % (title , contributors_str ))
155199
156200
@@ -170,26 +214,29 @@ def find_contributors(git_log_selector, from_, to):
170214 if not commit :
171215 continue
172216 hash , author_key = commit .split ('\n ' , 1 )[0 ].split (' ' , 1 )
173- login = authors .get_login (author_key , hash )
174- contributors .add (login )
175- co_authors = re .findall ('Co-authored-by:\s*(\S.*(<[^ >]+>))\s*\n ' , commit , re .I )
217+ contributor = authors .get_login_or_realname (author_key , hash )
218+ contributors .add (contributor )
219+ co_authors = re .findall ('Co-authored-by:\s*(\S.*(<[^ >]+>))\s*(?:$| \n ) ' , commit , re .I )
176220 for co_author_full , co_author_email in co_authors :
177- login = authors .get_login (co_author_full , None )
178- if not login :
221+ contributor = authors .get_login_or_realname (co_author_full , None )
222+ if not contributor or not contributor . startswith ( '@' ) :
179223 # try to find a previous commit by this mail address
180- # and pass this commit id to get_login () to retrieve the
224+ # and pass this commit id to get_login_or_realname () to retrieve the
181225 # associated handle from the github API.
182226 commit = subprocess .check_output (['git' , 'log' , '--format=%H' , '--max-count=1' , '--author=%s' % re .escape (co_author_email )]).strip ().decode (CHARSET )
183227 if commit :
184- login = authors .get_login (co_author_full , commit )
185- if login :
186- contributors .add (login )
228+ contributor = authors .get_login_or_realname (co_author_full , commit )
229+ if contributor :
230+ contributors .add (contributor )
187231
188232 # Resolve co-authors last because we have to rely on having seen the
189233 # email-to-login mapping via some other commit.
190234 for co_author in co_author_keys :
191- login = authors .get_login (co_author , None )
192- contributors .add (login )
235+ contributor = authors .get_login_or_realname (co_author , None )
236+ if contributor :
237+ contributors .add (contributor )
238+ else :
239+ contributors .add (realname )
193240 return sorted (contributors , key = str .casefold )
194241
195242
0 commit comments