Skip to content

Commit ac5bb3e

Browse files
committed
Made changes to wikipedia_fetch.py
1 parent 975794e commit ac5bb3e

1 file changed

Lines changed: 13 additions & 13 deletions

File tree

scripts/1-fetch/Wikipedia_fetch.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
HEADER_LANGUAGES = ["LANGUAGE_CODE", "LANGUAGE_NAME", "COUNT"]
3333
QUARTER = os.path.basename(PATHS["data_quarter"])
3434
WIKIPEDIA_BASE_URL = "https://en.wikipedia.org/w/api.php"
35+
WIKIPEDIA_MATRIX_URL = "https://meta.wikimedia.org/w/api.php"
3536
WIKIPEDIA_RETRY_STATUS_FORCELIST = [
3637
408, # Request Timeout
3738
422, # Unprocessable Content (Validation failed, or endpoint spammed)
@@ -99,29 +100,28 @@ def query_wikipedia_languages(session):
99100
LOGGER.info("Fetching article counts from all language Wikipedias")
100101
tool_data = []
101102

102-
# Get all language wikipedias
103-
site_matrix_url = "https://meta.wikimedia.org/w/api.php"
103+
# Gets all language wikipedias
104104
params = {"action": "sitematrix", "format": "json"}
105-
r = session.get(site_matrix_url, params=params, timeout=30)
105+
r = session.get(WIKIPEDIA_MATRIX_URL, params=params, timeout=30)
106106
data = r.json()["sitematrix"]
107107

108-
langs = []
108+
languages = []
109109
for key, val in data.items():
110110
if key.isdigit():
111-
lang_code = val.get("code")
112-
lang_name = val.get("name")
111+
language_code = val.get("code")
112+
language_name = val.get("name")
113113
for site in val.get("site", []):
114114
if "wikipedia.org" in site["url"]:
115-
langs.append(
115+
languages.append(
116116
{
117-
"lang": lang_code,
118-
"name": lang_name,
117+
"code": language_code,
118+
"name": language_name,
119119
"url": site["url"],
120120
}
121121
)
122122

123123
# For each language wikipedia, fetch statistics.
124-
for site in langs:
124+
for site in languages:
125125
base_url = f"{site['url']}/w/api.php"
126126
params = {
127127
"action": "query",
@@ -139,16 +139,16 @@ def query_wikipedia_languages(session):
139139

140140
tool_data.append(
141141
{
142-
"LANGUAGE_CODE": site["lang"],
142+
"LANGUAGE_CODE": site["code"],
143143
"LANGUAGE_NAME": site["name"],
144144
"COUNT": article_count,
145145
}
146146
)
147-
LOGGER.info(f"{site['lang']} ({site['name']}): {article_count}")
147+
LOGGER.info(f"{site['code']} ({site['name']}): {article_count}")
148148

149149
except Exception as e:
150150
LOGGER.warning(
151-
f"Failed to fetch for {site['lang']} ({site['name']}): {e}"
151+
f"Failed to fetch for {site['code']} ({site['name']}): {e}"
152152
)
153153

154154
return tool_data

0 commit comments

Comments
 (0)