55
66"""
77
8+ import re
89import warnings
910import hashlib
1011from typing import Any , Dict
1516
1617_TYPO_DOMAINS = {
1718 # gmail.com
18- "35gmai.com" : "gmail.com" ,
19- "636gmail.com" : "gmail.com" ,
19+ "gmai.com" : "gmail.com" ,
2020 "gamil.com" : "gmail.com" ,
21- "gmail.comu " : "gmail.com" ,
21+ "gmali.com " : "gmail.com" ,
2222 "gmial.com" : "gmail.com" ,
2323 "gmil.com" : "gmail.com" ,
24+ "gmaill.com" : "gmail.com" ,
25+ "gmailm.com" : "gmail.com" ,
26+ "gmailo.com" : "gmail.com" ,
27+ "gmailyhoo.com" : "gmail.com" ,
2428 "yahoogmail.com" : "gmail.com" ,
2529 # outlook.com
2630 "putlook.com" : "outlook.com" ,
2731}
2832
33+ _EQUIVALENT_DOMAINS = {
34+ "googlemail.com" : "gmail.com" ,
35+ "pm.me" : "protonmail.com" ,
36+ "proton.me" : "protonmail.com" ,
37+ "yandex.by" : "yandex.ru" ,
38+ "yandex.com" : "yandex.ru" ,
39+ "yandex.kz" : "yandex.ru" ,
40+ "yandex.ua" : "yandex.ru" ,
41+ "ya.ru" : "yandex.ru" ,
42+ }
43+
44+ _FASTMAIL_DOMAINS = {
45+ "123mail.org" ,
46+ "150mail.com" ,
47+ "150ml.com" ,
48+ "16mail.com" ,
49+ "2-mail.com" ,
50+ "4email.net" ,
51+ "50mail.com" ,
52+ "airpost.net" ,
53+ "allmail.net" ,
54+ "bestmail.us" ,
55+ "cluemail.com" ,
56+ "elitemail.org" ,
57+ "emailcorner.net" ,
58+ "emailengine.net" ,
59+ "emailengine.org" ,
60+ "emailgroups.net" ,
61+ "emailplus.org" ,
62+ "emailuser.net" ,
63+ "eml.cc" ,
64+ "f-m.fm" ,
65+ "fast-email.com" ,
66+ "fast-mail.org" ,
67+ "fastem.com" ,
68+ "fastemail.us" ,
69+ "fastemailer.com" ,
70+ "fastest.cc" ,
71+ "fastimap.com" ,
72+ "fastmail.cn" ,
73+ "fastmail.co.uk" ,
74+ "fastmail.com" ,
75+ "fastmail.com.au" ,
76+ "fastmail.de" ,
77+ "fastmail.es" ,
78+ "fastmail.fm" ,
79+ "fastmail.fr" ,
80+ "fastmail.im" ,
81+ "fastmail.in" ,
82+ "fastmail.jp" ,
83+ "fastmail.mx" ,
84+ "fastmail.net" ,
85+ "fastmail.nl" ,
86+ "fastmail.org" ,
87+ "fastmail.se" ,
88+ "fastmail.to" ,
89+ "fastmail.tw" ,
90+ "fastmail.uk" ,
91+ "fastmail.us" ,
92+ "fastmailbox.net" ,
93+ "fastmessaging.com" ,
94+ "fea.st" ,
95+ "fmail.co.uk" ,
96+ "fmailbox.com" ,
97+ "fmgirl.com" ,
98+ "fmguy.com" ,
99+ "ftml.net" ,
100+ "h-mail.us" ,
101+ "hailmail.net" ,
102+ "imap-mail.com" ,
103+ "imap.cc" ,
104+ "imapmail.org" ,
105+ "inoutbox.com" ,
106+ "internet-e-mail.com" ,
107+ "internet-mail.org" ,
108+ "internetemails.net" ,
109+ "internetmailing.net" ,
110+ "jetemail.net" ,
111+ "justemail.net" ,
112+ "letterboxes.org" ,
113+ "mail-central.com" ,
114+ "mail-page.com" ,
115+ "mailandftp.com" ,
116+ "mailas.com" ,
117+ "mailbolt.com" ,
118+ "mailc.net" ,
119+ "mailcan.com" ,
120+ "mailforce.net" ,
121+ "mailftp.com" ,
122+ "mailhaven.com" ,
123+ "mailingaddress.org" ,
124+ "mailite.com" ,
125+ "mailmight.com" ,
126+ "mailnew.com" ,
127+ "mailsent.net" ,
128+ "mailservice.ms" ,
129+ "mailup.net" ,
130+ "mailworks.org" ,
131+ "ml1.net" ,
132+ "mm.st" ,
133+ "myfastmail.com" ,
134+ "mymacmail.com" ,
135+ "nospammail.net" ,
136+ "ownmail.net" ,
137+ "petml.com" ,
138+ "postinbox.com" ,
139+ "postpro.net" ,
140+ "proinbox.com" ,
141+ "promessage.com" ,
142+ "realemail.net" ,
143+ "reallyfast.biz" ,
144+ "reallyfast.info" ,
145+ "rushpost.com" ,
146+ "sent.as" ,
147+ "sent.at" ,
148+ "sent.com" ,
149+ "speedpost.net" ,
150+ "speedymail.org" ,
151+ "ssl-mail.com" ,
152+ "swift-mail.com" ,
153+ "the-fastest.net" ,
154+ "the-quickest.com" ,
155+ "theinternetemail.com" ,
156+ "veryfast.biz" ,
157+ "veryspeedy.net" ,
158+ "warpmail.net" ,
159+ "xsmail.com" ,
160+ "yepmail.net" ,
161+ "your-mail.com" ,
162+ }
163+
164+ _YAHOO_DOMAINS = {
165+ "y7mail.com" ,
166+ "yahoo.at" ,
167+ "yahoo.be" ,
168+ "yahoo.bg" ,
169+ "yahoo.ca" ,
170+ "yahoo.cl" ,
171+ "yahoo.co.id" ,
172+ "yahoo.co.il" ,
173+ "yahoo.co.in" ,
174+ "yahoo.co.kr" ,
175+ "yahoo.co.nz" ,
176+ "yahoo.co.th" ,
177+ "yahoo.co.uk" ,
178+ "yahoo.co.za" ,
179+ "yahoo.com" ,
180+ "yahoo.com.ar" ,
181+ "yahoo.com.au" ,
182+ "yahoo.com.br" ,
183+ "yahoo.com.co" ,
184+ "yahoo.com.hk" ,
185+ "yahoo.com.hr" ,
186+ "yahoo.com.mx" ,
187+ "yahoo.com.my" ,
188+ "yahoo.com.pe" ,
189+ "yahoo.com.ph" ,
190+ "yahoo.com.sg" ,
191+ "yahoo.com.tr" ,
192+ "yahoo.com.tw" ,
193+ "yahoo.com.ua" ,
194+ "yahoo.com.ve" ,
195+ "yahoo.com.vn" ,
196+ "yahoo.cz" ,
197+ "yahoo.de" ,
198+ "yahoo.dk" ,
199+ "yahoo.ee" ,
200+ "yahoo.es" ,
201+ "yahoo.fi" ,
202+ "yahoo.fr" ,
203+ "yahoo.gr" ,
204+ "yahoo.hu" ,
205+ "yahoo.ie" ,
206+ "yahoo.in" ,
207+ "yahoo.it" ,
208+ "yahoo.lt" ,
209+ "yahoo.lv" ,
210+ "yahoo.nl" ,
211+ "yahoo.no" ,
212+ "yahoo.pl" ,
213+ "yahoo.pt" ,
214+ "yahoo.ro" ,
215+ "yahoo.se" ,
216+ "yahoo.sk" ,
217+ "ymail.com" ,
218+ }
219+
29220
30221def prepare_report (request : Dict [str , Any ], validate : bool ):
31222 """Validate and prepare minFraud report"""
@@ -91,29 +282,42 @@ def maybe_hash_email(transaction):
91282 if address is None :
92283 return
93284
94- address = address .lower ().strip ()
95-
96- at_idx = address .rfind ("@" )
97- if at_idx == - 1 :
285+ address , domain = _clean_email (address )
286+ if not address :
98287 return
99288
100- domain = _clean_domain (address [at_idx + 1 :]) # noqa
101- local_part = address [:at_idx ]
102-
103289 if domain != "" and "domain" not in email :
104290 email ["domain" ] = domain
105291
106- email ["address" ] = _hash_email ( local_part , domain )
292+ email ["address" ] = hashlib . md5 ( address . encode ( "UTF-8" )). hexdigest ( )
107293
108294
109295def _clean_domain (domain ):
110296 domain = domain .strip ().rstrip ("." ).encode ("idna" ).decode ("ASCII" )
111- return _TYPO_DOMAINS .get (domain , domain )
112297
298+ domain = re .sub (r"(?:\.com){2,}$" , ".com" , domain )
299+ domain = re .sub (r"\.com[^.]+$" , ".com" , domain )
300+ domain = re .sub (r"(?:\.(?:com|c[a-z]{1,2}m|co[ln]|[dsvx]o[mn]|))$" , ".com" , domain )
301+ domain = re .sub (r"^\d+(?:gmail?\.com)$" , "gmail.com" , domain )
113302
114- def _hash_email (local_part , domain ):
115- # Strip off aliased part of email address
116- if domain == "yahoo.com" :
303+ domain = _TYPO_DOMAINS .get (domain , domain )
304+ domain = _EQUIVALENT_DOMAINS .get (domain , domain )
305+
306+ return domain
307+
308+
309+ def _clean_email (address ):
310+ address = address .lower ().strip ()
311+
312+ at_idx = address .rfind ("@" )
313+ if at_idx == - 1 :
314+ return None , None
315+
316+ domain = _clean_domain (address [at_idx + 1 :]) # noqa
317+ local_part = address [:at_idx ]
318+
319+ # Strip off aliased part of email address.
320+ if domain in _YAHOO_DOMAINS :
117321 divider = "-"
118322 else :
119323 divider = "+"
@@ -122,4 +326,15 @@ def _hash_email(local_part, domain):
122326 if alias_idx > 0 :
123327 local_part = local_part [:alias_idx ]
124328
125- return hashlib .md5 (f"{ local_part } @{ domain } " .encode ("UTF-8" )).hexdigest ()
329+ if domain == "gmail.com" :
330+ local_part = local_part .replace ("." , "" )
331+
332+ domain_parts = domain .split ("." )
333+ if len (domain_parts ) > 2 :
334+ possible_domain = "." .join (domain_parts [1 :])
335+ if possible_domain in _FASTMAIL_DOMAINS :
336+ domain = possible_domain
337+ if local_part != "" :
338+ local_part = domain_parts [0 ]
339+
340+ return f"{ local_part } @{ domain } " , domain
0 commit comments