Skip to content

Commit 6cec0a3

Browse files
committed
feat: Complete DOAJ integration with API v4 and country code generation
- Migrate from DOAJ API v3 to v4 for enhanced metadata access - Add comprehensive CC license analysis for academic journals - Implement publisher and geographic distribution analysis - Add programmatic ISO 3166-1 alpha-2 country code generation - Include automatic dependency resolution and error handling - Apply date filtering (default ≥2002) to prevent false positives - Generate 5 CSV files plus provenance for comprehensive analysis - Ensure static analysis compliance and comprehensive testing This integration enables quantification of institutional commitment to Creative Commons licensing in the scholarly publishing ecosystem.
1 parent 74a099c commit 6cec0a3

2 files changed

Lines changed: 877 additions & 0 deletions

File tree

dev/generate_country_codes.py

Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
#!/usr/bin/env python
2+
"""
3+
Generate ISO 3166-1 alpha-2 country codes YAML file for DOAJ fetch script.
4+
"""
5+
import os
6+
import sys
7+
import yaml
8+
9+
# Add parent directory so shared can be imported
10+
sys.path.append(os.path.join(os.path.dirname(__file__), "..", "scripts"))
11+
import shared
12+
13+
# ISO 3166-1 alpha-2 country codes (official list)
14+
COUNTRIES = [
15+
{"code": "AD", "name": "Andorra"},
16+
{"code": "AE", "name": "United Arab Emirates"},
17+
{"code": "AF", "name": "Afghanistan"},
18+
{"code": "AG", "name": "Antigua and Barbuda"},
19+
{"code": "AI", "name": "Anguilla"},
20+
{"code": "AL", "name": "Albania"},
21+
{"code": "AM", "name": "Armenia"},
22+
{"code": "AO", "name": "Angola"},
23+
{"code": "AQ", "name": "Antarctica"},
24+
{"code": "AR", "name": "Argentina"},
25+
{"code": "AS", "name": "American Samoa"},
26+
{"code": "AT", "name": "Austria"},
27+
{"code": "AU", "name": "Australia"},
28+
{"code": "AW", "name": "Aruba"},
29+
{"code": "AX", "name": "Åland Islands"},
30+
{"code": "AZ", "name": "Azerbaijan"},
31+
{"code": "BA", "name": "Bosnia and Herzegovina"},
32+
{"code": "BB", "name": "Barbados"},
33+
{"code": "BD", "name": "Bangladesh"},
34+
{"code": "BE", "name": "Belgium"},
35+
{"code": "BF", "name": "Burkina Faso"},
36+
{"code": "BG", "name": "Bulgaria"},
37+
{"code": "BH", "name": "Bahrain"},
38+
{"code": "BI", "name": "Burundi"},
39+
{"code": "BJ", "name": "Benin"},
40+
{"code": "BL", "name": "Saint Barthélemy"},
41+
{"code": "BM", "name": "Bermuda"},
42+
{"code": "BN", "name": "Brunei"},
43+
{"code": "BO", "name": "Bolivia"},
44+
{"code": "BQ", "name": "Caribbean Netherlands"},
45+
{"code": "BR", "name": "Brazil"},
46+
{"code": "BS", "name": "Bahamas"},
47+
{"code": "BT", "name": "Bhutan"},
48+
{"code": "BV", "name": "Bouvet Island"},
49+
{"code": "BW", "name": "Botswana"},
50+
{"code": "BY", "name": "Belarus"},
51+
{"code": "BZ", "name": "Belize"},
52+
{"code": "CA", "name": "Canada"},
53+
{"code": "CC", "name": "Cocos Islands"},
54+
{"code": "CD", "name": "Democratic Republic of the Congo"},
55+
{"code": "CF", "name": "Central African Republic"},
56+
{"code": "CG", "name": "Republic of the Congo"},
57+
{"code": "CH", "name": "Switzerland"},
58+
{"code": "CI", "name": "Côte d'Ivoire"},
59+
{"code": "CK", "name": "Cook Islands"},
60+
{"code": "CL", "name": "Chile"},
61+
{"code": "CM", "name": "Cameroon"},
62+
{"code": "CN", "name": "China"},
63+
{"code": "CO", "name": "Colombia"},
64+
{"code": "CR", "name": "Costa Rica"},
65+
{"code": "CU", "name": "Cuba"},
66+
{"code": "CV", "name": "Cape Verde"},
67+
{"code": "CW", "name": "Curaçao"},
68+
{"code": "CX", "name": "Christmas Island"},
69+
{"code": "CY", "name": "Cyprus"},
70+
{"code": "CZ", "name": "Czech Republic"},
71+
{"code": "DE", "name": "Germany"},
72+
{"code": "DJ", "name": "Djibouti"},
73+
{"code": "DK", "name": "Denmark"},
74+
{"code": "DM", "name": "Dominica"},
75+
{"code": "DO", "name": "Dominican Republic"},
76+
{"code": "DZ", "name": "Algeria"},
77+
{"code": "EC", "name": "Ecuador"},
78+
{"code": "EE", "name": "Estonia"},
79+
{"code": "EG", "name": "Egypt"},
80+
{"code": "EH", "name": "Western Sahara"},
81+
{"code": "ER", "name": "Eritrea"},
82+
{"code": "ES", "name": "Spain"},
83+
{"code": "ET", "name": "Ethiopia"},
84+
{"code": "FI", "name": "Finland"},
85+
{"code": "FJ", "name": "Fiji"},
86+
{"code": "FK", "name": "Falkland Islands"},
87+
{"code": "FM", "name": "Micronesia"},
88+
{"code": "FO", "name": "Faroe Islands"},
89+
{"code": "FR", "name": "France"},
90+
{"code": "GA", "name": "Gabon"},
91+
{"code": "GB", "name": "United Kingdom"},
92+
{"code": "GD", "name": "Grenada"},
93+
{"code": "GE", "name": "Georgia"},
94+
{"code": "GF", "name": "French Guiana"},
95+
{"code": "GG", "name": "Guernsey"},
96+
{"code": "GH", "name": "Ghana"},
97+
{"code": "GI", "name": "Gibraltar"},
98+
{"code": "GL", "name": "Greenland"},
99+
{"code": "GM", "name": "Gambia"},
100+
{"code": "GN", "name": "Guinea"},
101+
{"code": "GP", "name": "Guadeloupe"},
102+
{"code": "GQ", "name": "Equatorial Guinea"},
103+
{"code": "GR", "name": "Greece"},
104+
{"code": "GS", "name": "South Georgia"},
105+
{"code": "GT", "name": "Guatemala"},
106+
{"code": "GU", "name": "Guam"},
107+
{"code": "GW", "name": "Guinea-Bissau"},
108+
{"code": "GY", "name": "Guyana"},
109+
{"code": "HK", "name": "Hong Kong"},
110+
{"code": "HM", "name": "Heard Island"},
111+
{"code": "HN", "name": "Honduras"},
112+
{"code": "HR", "name": "Croatia"},
113+
{"code": "HT", "name": "Haiti"},
114+
{"code": "HU", "name": "Hungary"},
115+
{"code": "ID", "name": "Indonesia"},
116+
{"code": "IE", "name": "Ireland"},
117+
{"code": "IL", "name": "Israel"},
118+
{"code": "IM", "name": "Isle of Man"},
119+
{"code": "IN", "name": "India"},
120+
{"code": "IO", "name": "British Indian Ocean Territory"},
121+
{"code": "IQ", "name": "Iraq"},
122+
{"code": "IR", "name": "Iran"},
123+
{"code": "IS", "name": "Iceland"},
124+
{"code": "IT", "name": "Italy"},
125+
{"code": "JE", "name": "Jersey"},
126+
{"code": "JM", "name": "Jamaica"},
127+
{"code": "JO", "name": "Jordan"},
128+
{"code": "JP", "name": "Japan"},
129+
{"code": "KE", "name": "Kenya"},
130+
{"code": "KG", "name": "Kyrgyzstan"},
131+
{"code": "KH", "name": "Cambodia"},
132+
{"code": "KI", "name": "Kiribati"},
133+
{"code": "KM", "name": "Comoros"},
134+
{"code": "KN", "name": "Saint Kitts and Nevis"},
135+
{"code": "KP", "name": "North Korea"},
136+
{"code": "KR", "name": "South Korea"},
137+
{"code": "KW", "name": "Kuwait"},
138+
{"code": "KY", "name": "Cayman Islands"},
139+
{"code": "KZ", "name": "Kazakhstan"},
140+
{"code": "LA", "name": "Laos"},
141+
{"code": "LB", "name": "Lebanon"},
142+
{"code": "LC", "name": "Saint Lucia"},
143+
{"code": "LI", "name": "Liechtenstein"},
144+
{"code": "LK", "name": "Sri Lanka"},
145+
{"code": "LR", "name": "Liberia"},
146+
{"code": "LS", "name": "Lesotho"},
147+
{"code": "LT", "name": "Lithuania"},
148+
{"code": "LU", "name": "Luxembourg"},
149+
{"code": "LV", "name": "Latvia"},
150+
{"code": "LY", "name": "Libya"},
151+
{"code": "MA", "name": "Morocco"},
152+
{"code": "MC", "name": "Monaco"},
153+
{"code": "MD", "name": "Moldova"},
154+
{"code": "ME", "name": "Montenegro"},
155+
{"code": "MF", "name": "Saint Martin"},
156+
{"code": "MG", "name": "Madagascar"},
157+
{"code": "MH", "name": "Marshall Islands"},
158+
{"code": "MK", "name": "North Macedonia"},
159+
{"code": "ML", "name": "Mali"},
160+
{"code": "MM", "name": "Myanmar"},
161+
{"code": "MN", "name": "Mongolia"},
162+
{"code": "MO", "name": "Macao"},
163+
{"code": "MP", "name": "Northern Mariana Islands"},
164+
{"code": "MQ", "name": "Martinique"},
165+
{"code": "MR", "name": "Mauritania"},
166+
{"code": "MS", "name": "Montserrat"},
167+
{"code": "MT", "name": "Malta"},
168+
{"code": "MU", "name": "Mauritius"},
169+
{"code": "MV", "name": "Maldives"},
170+
{"code": "MW", "name": "Malawi"},
171+
{"code": "MX", "name": "Mexico"},
172+
{"code": "MY", "name": "Malaysia"},
173+
{"code": "MZ", "name": "Mozambique"},
174+
{"code": "NA", "name": "Namibia"},
175+
{"code": "NC", "name": "New Caledonia"},
176+
{"code": "NE", "name": "Niger"},
177+
{"code": "NF", "name": "Norfolk Island"},
178+
{"code": "NG", "name": "Nigeria"},
179+
{"code": "NI", "name": "Nicaragua"},
180+
{"code": "NL", "name": "Netherlands"},
181+
{"code": "NO", "name": "Norway"},
182+
{"code": "NP", "name": "Nepal"},
183+
{"code": "NR", "name": "Nauru"},
184+
{"code": "NU", "name": "Niue"},
185+
{"code": "NZ", "name": "New Zealand"},
186+
{"code": "OM", "name": "Oman"},
187+
{"code": "PA", "name": "Panama"},
188+
{"code": "PE", "name": "Peru"},
189+
{"code": "PF", "name": "French Polynesia"},
190+
{"code": "PG", "name": "Papua New Guinea"},
191+
{"code": "PH", "name": "Philippines"},
192+
{"code": "PK", "name": "Pakistan"},
193+
{"code": "PL", "name": "Poland"},
194+
{"code": "PM", "name": "Saint Pierre and Miquelon"},
195+
{"code": "PN", "name": "Pitcairn Islands"},
196+
{"code": "PR", "name": "Puerto Rico"},
197+
{"code": "PS", "name": "Palestine"},
198+
{"code": "PT", "name": "Portugal"},
199+
{"code": "PW", "name": "Palau"},
200+
{"code": "PY", "name": "Paraguay"},
201+
{"code": "QA", "name": "Qatar"},
202+
{"code": "RE", "name": "Réunion"},
203+
{"code": "RO", "name": "Romania"},
204+
{"code": "RS", "name": "Serbia"},
205+
{"code": "RU", "name": "Russia"},
206+
{"code": "RW", "name": "Rwanda"},
207+
{"code": "SA", "name": "Saudi Arabia"},
208+
{"code": "SB", "name": "Solomon Islands"},
209+
{"code": "SC", "name": "Seychelles"},
210+
{"code": "SD", "name": "Sudan"},
211+
{"code": "SE", "name": "Sweden"},
212+
{"code": "SG", "name": "Singapore"},
213+
{"code": "SH", "name": "Saint Helena"},
214+
{"code": "SI", "name": "Slovenia"},
215+
{"code": "SJ", "name": "Svalbard and Jan Mayen"},
216+
{"code": "SK", "name": "Slovakia"},
217+
{"code": "SL", "name": "Sierra Leone"},
218+
{"code": "SM", "name": "San Marino"},
219+
{"code": "SN", "name": "Senegal"},
220+
{"code": "SO", "name": "Somalia"},
221+
{"code": "SR", "name": "Suriname"},
222+
{"code": "SS", "name": "South Sudan"},
223+
{"code": "ST", "name": "São Tomé and Príncipe"},
224+
{"code": "SV", "name": "El Salvador"},
225+
{"code": "SX", "name": "Sint Maarten"},
226+
{"code": "SY", "name": "Syria"},
227+
{"code": "SZ", "name": "Eswatini"},
228+
{"code": "TC", "name": "Turks and Caicos Islands"},
229+
{"code": "TD", "name": "Chad"},
230+
{"code": "TF", "name": "French Southern Territories"},
231+
{"code": "TG", "name": "Togo"},
232+
{"code": "TH", "name": "Thailand"},
233+
{"code": "TJ", "name": "Tajikistan"},
234+
{"code": "TK", "name": "Tokelau"},
235+
{"code": "TL", "name": "Timor-Leste"},
236+
{"code": "TM", "name": "Turkmenistan"},
237+
{"code": "TN", "name": "Tunisia"},
238+
{"code": "TO", "name": "Tonga"},
239+
{"code": "TR", "name": "Turkey"},
240+
{"code": "TT", "name": "Trinidad and Tobago"},
241+
{"code": "TV", "name": "Tuvalu"},
242+
{"code": "TW", "name": "Taiwan"},
243+
{"code": "TZ", "name": "Tanzania"},
244+
{"code": "UA", "name": "Ukraine"},
245+
{"code": "UG", "name": "Uganda"},
246+
{"code": "UM", "name": "U.S. Minor Outlying Islands"},
247+
{"code": "US", "name": "United States"},
248+
{"code": "UY", "name": "Uruguay"},
249+
{"code": "UZ", "name": "Uzbekistan"},
250+
{"code": "VA", "name": "Vatican City"},
251+
{"code": "VC", "name": "Saint Vincent and the Grenadines"},
252+
{"code": "VE", "name": "Venezuela"},
253+
{"code": "VG", "name": "British Virgin Islands"},
254+
{"code": "VI", "name": "U.S. Virgin Islands"},
255+
{"code": "VN", "name": "Vietnam"},
256+
{"code": "VU", "name": "Vanuatu"},
257+
{"code": "WF", "name": "Wallis and Futuna"},
258+
{"code": "WS", "name": "Samoa"},
259+
{"code": "YE", "name": "Yemen"},
260+
{"code": "YT", "name": "Mayotte"},
261+
{"code": "ZA", "name": "South Africa"},
262+
{"code": "ZM", "name": "Zambia"},
263+
{"code": "ZW", "name": "Zimbabwe"},
264+
]
265+
266+
267+
def main():
268+
"""Generate ISO country codes YAML file."""
269+
repo_path = shared.path_join(os.path.dirname(__file__), "..")
270+
output_file = shared.path_join(repo_path, "data", "iso_country_codes.yaml")
271+
272+
header = [
273+
"# ISO 3166-1 alpha-2 country codes to country names mapping",
274+
"# Used by DOAJ API for publisher country identification",
275+
"# Generated programmatically by dev/generate_country_codes.py",
276+
]
277+
278+
with open(output_file, "w", encoding="utf-8") as f:
279+
f.write("\n".join(header) + "\n")
280+
yaml.dump(COUNTRIES, f, default_flow_style=False, allow_unicode=True)
281+
282+
print(f"Generated {output_file} with {len(COUNTRIES)} country codes")
283+
284+
285+
if __name__ == "__main__":
286+
main()

0 commit comments

Comments
 (0)