Skip to content

Commit 975794e

Browse files
committed
Added count by language wikipedias csv
1 parent 87bc6d4 commit 975794e

2 files changed

Lines changed: 359 additions & 0 deletions

File tree

Lines changed: 358 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,358 @@
1+
"LANGUAGE_CODE","LANGUAGE_NAME","COUNT"
2+
"aa","Qafár af","0"
3+
"ab","аԥсшәа","6472"
4+
"ace","Acèh","12999"
5+
"ady","адыгабзэ","599"
6+
"af","Afrikaans","126595"
7+
"ak","","0"
8+
"alt","алтай тил","1101"
9+
"am","አማርኛ","15451"
10+
"ami","Pangcah","1144"
11+
"an","aragonés","50226"
12+
"ang","Ænglisc","4957"
13+
"ann","Obolo","433"
14+
"anp","अंगिका","1646"
15+
"ar","العربية","1282820"
16+
"arc","ܐܪܡܝܐ","1915"
17+
"ary","الدارجة","10825"
18+
"arz","مصرى","1629311"
19+
"as","অসমীয়া","19767"
20+
"ast","asturianu","137611"
21+
"atj","Atikamekw","2076"
22+
"av","авар","3854"
23+
"avk","Kotava","29897"
24+
"awa","अवधी","2516"
25+
"ay","Aymar aru","5223"
26+
"az","azərbaycanca","208056"
27+
"azb","تۆرکجه","244326"
28+
"ba","башҡортса","63908"
29+
"ban","Basa Bali","33278"
30+
"bar","Boarisch","27200"
31+
"bbc","Batak Toba","1189"
32+
"bcl","Bikol Central","21048"
33+
"bdr","Bajau Sama","236"
34+
"be","беларуская","256444"
35+
"be-tarask","беларуская (тарашкевіца)","89749"
36+
"bew","Betawi","3026"
37+
"bg","български","306056"
38+
"bh","भोजपुरी","8851"
39+
"bi","Bislama","1480"
40+
"bjn","Banjar","11400"
41+
"blk","ပအိုဝ်ႏဘာႏသာႏ","2907"
42+
"bm","bamanankan","898"
43+
"bn","বাংলা","176479"
44+
"bo","བོད་ཡིག","7213"
45+
"bpy","বিষ্ণুপ্রিয়া মণিপুরী","25092"
46+
"br","brezhoneg","89494"
47+
"bs","bosanski","95910"
48+
"btm","Batak Mandailing","1179"
49+
"bug","Basa Ugi","15954"
50+
"bxr","буряад","2910"
51+
"ca","català","782222"
52+
"cbk-zam","Chavacano de Zamboanga","3236"
53+
"cdo","閩東語 / Mìng-dĕ̤ng-ngṳ̄","16683"
54+
"ce","нохчийн","638164"
55+
"ceb","Cebuano","6115948"
56+
"ch","Chamoru","558"
57+
"cho","Chahta anumpa","6"
58+
"chr","ᏣᎳᎩ","1001"
59+
"chy","Tsetsêhestâhese","724"
60+
"ckb","کوردی","78902"
61+
"co","corsu","8561"
62+
"cr","Nēhiyawēwin / ᓀᐦᐃᔭᐍᐏᐣ","14"
63+
"crh","qırımtatarca","29602"
64+
"cs","čeština","578366"
65+
"csb","kaszëbsczi","5495"
66+
"cu","словѣньскъ / ⰔⰎⰑⰂⰡⰐⰠⰔⰍⰟ","1309"
67+
"cv","чӑвашла","57971"
68+
"cy","Cymraeg","283604"
69+
"da","dansk","310995"
70+
"dag","dagbanli","13245"
71+
"de","Deutsch","3058904"
72+
"dga","Dagaare","2599"
73+
"din","Thuɔŋjäŋ","338"
74+
"diq","Zazaki","42338"
75+
"dsb","dolnoserbski","3426"
76+
"dtp","Kadazandusun","1697"
77+
"dty","डोटेली","3623"
78+
"dv","ދިވެހިބަސް","3189"
79+
"dz","ཇོང་ཁ","366"
80+
"ee","eʋegbe","1265"
81+
"el","Ελληνικά","259342"
82+
"eml","emiliàn e rumagnòl","13639"
83+
"en","English","7073120"
84+
"eo","Esperanto","376402"
85+
"es","español","2067074"
86+
"et","eesti","255202"
87+
"eu","euskara","473330"
88+
"ext","estremeñu","4024"
89+
"fa","فارسی","1058528"
90+
"fat","mfantse","1116"
91+
"ff","Fulfulde","11945"
92+
"fi","suomi","605523"
93+
"fj","Na Vosa Vakaviti","1615"
94+
"fo","føroyskt","14175"
95+
"fon","fɔ̀ngbè","3239"
96+
"fr","français","2713959"
97+
"frp","arpetan","5810"
98+
"frr","Nordfriisk","20549"
99+
"fur","furlan","4876"
100+
"fy","Frysk","58186"
101+
"ga","Gaeilge","62497"
102+
"gag","Gagauz","3015"
103+
"gan","贛語","6771"
104+
"gcr","kriyòl gwiyannen","1075"
105+
"gd","Gàidhlig","16007"
106+
"gl","galego","227035"
107+
"glk","گیلکی","48350"
108+
"gn","Avañe'ẽ","5965"
109+
"gom","गोंयची कोंकणी / Gõychi Konknni","3640"
110+
"gor","Bahasa Hulontalo","14828"
111+
"got","𐌲𐌿𐍄𐌹𐍃𐌺","975"
112+
"gpe","Ghanaian Pidgin","3848"
113+
"gsw","Alemannisch","31353"
114+
"gu","ગુજરાતી","30700"
115+
"guc","wayuunaiki","683"
116+
"gur","farefare","1167"
117+
"guw","gungbe","1550"
118+
"gv","Gaelg","6922"
119+
"ha","Hausa","70566"
120+
"hak","客家語 / Hak-kâ-ngî","10377"
121+
"haw","Hawaiʻi","2961"
122+
"he","עברית","384161"
123+
"hi","हिन्दी","166792"
124+
"hif","Fiji Hindi","11992"
125+
"ho","Hiri Motu","3"
126+
"hr","hrvatski","228010"
127+
"hsb","hornjoserbsce","14164"
128+
"ht","Kreyòl ayisyen","71059"
129+
"hu","magyar","561537"
130+
"hy","հայերեն","322693"
131+
"hyw","Արեւմտահայերէն","13142"
132+
"hz","Otsiherero","0"
133+
"ia","interlingua","30116"
134+
"iba","Jaku Iban","1807"
135+
"id","Bahasa Indonesia","746488"
136+
"ie","Interlingue","13324"
137+
"ig","Igbo","43614"
138+
"igl","Igala","927"
139+
"ii","ꆇꉙ","3"
140+
"ik","Iñupiatun","603"
141+
"ilo","Ilokano","15430"
142+
"inh","гӀалгӀай","2407"
143+
"io","Ido","59704"
144+
"is","íslenska","60716"
145+
"it","italiano","1939375"
146+
"iu","ᐃᓄᒃᑎᑐᑦ / inuktitut","424"
147+
"ja","日本語","1476214"
148+
"jam","Patois","1730"
149+
"jbo","la .lojban.","1348"
150+
"jv","Jawa","74656"
151+
"ka","ქართული","185666"
152+
"kaa","Qaraqalpaqsha","10653"
153+
"kab","Taqbaylit","6992"
154+
"kbd","адыгэбзэ","1637"
155+
"kbp","Kabɩyɛ","1714"
156+
"kcg","Tyap","1424"
157+
"kg","Kongo","1566"
158+
"kge","Kumoring","2660"
159+
"ki","Gĩkũyũ","1893"
160+
"kj","Kwanyama","4"
161+
"kk","қазақша","240355"
162+
"kl","kalaallisut","245"
163+
"km","ភាសាខ្មែរ","11718"
164+
"kn","ಕನ್ನಡ","34012"
165+
"knc","Yerwa Kanuri","1465"
166+
"ko","한국어","723557"
167+
"koi","перем коми","3469"
168+
"kr","kanuri","0"
169+
"krc","къарачай-малкъар","2606"
170+
"ks","کٲشُر","7966"
171+
"ksh","Ripoarisch","3036"
172+
"ku","kurdî","90872"
173+
"kus","Kʋsaal","1187"
174+
"kv","коми","5724"
175+
"kw","kernowek","7097"
176+
"ky","кыргызча","76024"
177+
"la","Latina","140604"
178+
"lad","Ladino","3865"
179+
"lb","Lëtzebuergesch","65877"
180+
"lbe","лакку","1263"
181+
"lez","лезги","4442"
182+
"lfn","Lingua Franca Nova","4472"
183+
"lg","Luganda","4197"
184+
"li","Limburgs","15127"
185+
"lij","Ligure","11451"
186+
"lld","Ladin","180799"
187+
"lmo","lombard","79689"
188+
"ln","lingála","4820"
189+
"lo","ລາວ","5203"
190+
"lrc","لۊری شومالی","1"
191+
"lt","lietuvių","223646"
192+
"ltg","latgaļu","1098"
193+
"lv","latviešu","137885"
194+
"lzh","文言","13816"
195+
"mad","Madhurâ","1941"
196+
"mai","मैथिली","14234"
197+
"map-bms","Basa Banyumasan","13937"
198+
"mdf","мокшень","7607"
199+
"mg","Malagasy","101121"
200+
"mh","Ebon","4"
201+
"mhr","олык марий","11330"
202+
"mi","Māori","8003"
203+
"min","Minangkabau","228653"
204+
"mk","македонски","155586"
205+
"ml","മലയാളം","87282"
206+
"mn","монгол","26331"
207+
"mni","ꯃꯤꯇꯩ ꯂꯣꯟ","10452"
208+
"mnw","ဘာသာမန်","1957"
209+
"mos","moore","1301"
210+
"mr","मराठी","100593"
211+
"mrj","кырык мары","10430"
212+
"ms","Bahasa Melayu","433564"
213+
"mt","Malti","7617"
214+
"mus","Mvskoke","1"
215+
"mwl","Mirandés","4280"
216+
"my","မြန်မာဘာသာ","109712"
217+
"myv","эрзянь","7866"
218+
"mzn","مازِرونی","64252"
219+
"na","Dorerin Naoero","0"
220+
"nah","Nāhuatl","4281"
221+
"nan","閩南語 / Bân-lâm-gí","433818"
222+
"nap","Napulitano","14934"
223+
"nds","Plattdüütsch","85742"
224+
"nds-nl","Nedersaksies","8043"
225+
"ne","नेपाली","29221"
226+
"new","नेपाल भाषा","72588"
227+
"ng","Oshiwambo","8"
228+
"nia","Li Niha","1764"
229+
"nl","Nederlands","2199179"
230+
"nn","norsk nynorsk","176391"
231+
"no","norsk","657421"
232+
"nov","Novial","1878"
233+
"nqo","ߒߞߏ","1554"
234+
"nr","isiNdebele seSewula","266"
235+
"nrm","Nouormand","5055"
236+
"nso","Sesotho sa Leboa","8780"
237+
"nup","Nupe","471"
238+
"nv","Diné bizaad","22664"
239+
"ny","Chi-Chewa","1099"
240+
"oc","occitan","90269"
241+
"olo","livvinkarjala","4617"
242+
"om","Oromoo","1959"
243+
"or","ଓଡ଼ିଆ","19871"
244+
"os","ирон","21352"
245+
"pa","ਪੰਜਾਬੀ","59115"
246+
"pag","Pangasinan","2616"
247+
"pam","Kapampangan","10137"
248+
"pap","Papiamentu","4948"
249+
"pcd","Picard","6000"
250+
"pcm","Naijá","1503"
251+
"pdc","Deitsch","2046"
252+
"pfl","Pälzisch","2826"
253+
"pi","पालि","2545"
254+
"pih","Norfuk / Pitkern","0"
255+
"pl","polski","1671489"
256+
"pms","Piemontèis","70557"
257+
"pnb","پنجابی","74626"
258+
"pnt","Ποντιακά","488"
259+
"ps","پښتو","20729"
260+
"pt","português","1157363"
261+
"pwn","pinayuanan","374"
262+
"qu","Runa Simi","24199"
263+
"rki","ရခိုင်","987"
264+
"rm","rumantsch","3795"
265+
"rmy","romani čhib","756"
266+
"rn","ikirundi","703"
267+
"ro","română","517057"
268+
"roa-tara","tarandíne","9492"
269+
"rsk","руски","897"
270+
"ru","русский","2066862"
271+
"rue","русиньскый","10137"
272+
"rup","armãneashti","1388"
273+
"rw","Ikinyarwanda","8514"
274+
"sa","संस्कृतम्","12392"
275+
"sah","саха тыла","17734"
276+
"sat","ᱥᱟᱱᱛᱟᱲᱤ","13743"
277+
"sc","sardu","7723"
278+
"scn","sicilianu","26243"
279+
"sco","Scots","34287"
280+
"sd","سنڌي","19502"
281+
"se","davvisámegiella","7904"
282+
"sg","Sängö","355"
283+
"sgs","žemaitėška","17274"
284+
"sh","srpskohrvatski / српскохрватски","461117"
285+
"shi","Taclḥit","10877"
286+
"shn","တႆး","14060"
287+
"si","සිංහල","24494"
288+
"simple","Simple English","274591"
289+
"sk","slovenčina","256043"
290+
"skr","سرائیکی","24270"
291+
"sl","slovenščina","195312"
292+
"sm","Gagana Samoa","1177"
293+
"smn","anarâškielâ","6460"
294+
"sn","chiShona","11494"
295+
"so","Soomaaliga","8562"
296+
"sq","shqip","104411"
297+
"sr","српски / srpski","711448"
298+
"srn","Sranantongo","1129"
299+
"ss","SiSwati","1130"
300+
"st","Sesotho","1505"
301+
"stq","Seeltersk","4129"
302+
"su","Sunda","62129"
303+
"sv","svenska","2617789"
304+
"sw","Kiswahili","102218"
305+
"syl","ꠍꠤꠟꠐꠤ","1190"
306+
"szl","ślůnski","59405"
307+
"szy","Sakizaya","2735"
308+
"ta","தமிழ்","177466"
309+
"tay","Tayal","2582"
310+
"tcy","ತುಳು","2835"
311+
"tdd","ᥖᥭᥰ ᥖᥬᥲ ᥑᥨᥒᥰ","342"
312+
"te","తెలుగు","116252"
313+
"tet","tetun","1379"
314+
"tg","тоҷикӣ","115961"
315+
"th","ไทย","176779"
316+
"ti","ትግርኛ","335"
317+
"tig","ትግሬ","44"
318+
"tk","Türkmençe","7001"
319+
"tl","Tagalog","48847"
320+
"tly","tolışi","10023"
321+
"tn","Setswana","2699"
322+
"to","lea faka-Tonga","2022"
323+
"tpi","Tok Pisin","1406"
324+
"tr","Türkçe","648979"
325+
"trv","Seediq","1201"
326+
"ts","Xitsonga","950"
327+
"tt","татарча / tatarça","560252"
328+
"tum","chiTumbuka","18792"
329+
"tw","Twi","4619"
330+
"ty","reo tahiti","1249"
331+
"tyv","тыва дыл","3911"
332+
"udm","удмурт","5714"
333+
"ug","ئۇيغۇرچە / Uyghurche","9606"
334+
"uk","українська","1393586"
335+
"ur","اردو","233355"
336+
"uz","oʻzbekcha / ўзбекча","308110"
337+
"ve","Tshivenda","822"
338+
"vec","vèneto","69498"
339+
"vep","vepsän kel’","7064"
340+
"vi","Tiếng Việt","1296533"
341+
"vls","West-Vlams","8179"
342+
"vo","Volapük","45349"
343+
"vro","võro","6837"
344+
"wa","walon","12759"
345+
"war","Winaray","1266772"
346+
"wo","Wolof","1740"
347+
"wuu","吴语","46783"
348+
"xal","хальмг","1609"
349+
"xh","isiXhosa","2313"
350+
"xmf","მარგალური","21894"
351+
"yi","ייִדיש","15613"
352+
"yo","Yorùbá","36051"
353+
"yue","粵語","147033"
354+
"za","Vahcuengh","3006"
355+
"zea","Zeêuws","7018"
356+
"zgh","ⵜⴰⵎⴰⵣⵉⵖⵜ ⵜⴰⵏⴰⵡⴰⵢⵜ","11860"
357+
"zh","中文","1504807"
358+
"zu","isiZulu","11796"

scripts/1-fetch/Wikipedia_fetch.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ def query_wikipedia_languages(session):
116116
{
117117
"lang": lang_code,
118118
"name": lang_name,
119+
"url": site["url"],
119120
}
120121
)
121122

0 commit comments

Comments
 (0)