|
| 1 | +#!/usr/bin/env python |
| 2 | +""" |
| 3 | +This script generates the gregorian_multilang.lark file |
| 4 | +with month names (full and abbreviated) based on the list of |
| 5 | +target languages. |
| 6 | +
|
| 7 | +Run this script with hatch to regenerate the file:: |
| 8 | +
|
| 9 | + hatch run codegen:generate |
| 10 | +
|
| 11 | +""" |
| 12 | + |
| 13 | +from collections import defaultdict |
| 14 | +import pathlib |
| 15 | + |
| 16 | +from babel.dates import get_month_names |
| 17 | + |
| 18 | +# lark grammar path relative to this script |
| 19 | +GRAMMAR_DIR_PATH = ( |
| 20 | + pathlib.Path(__file__).parent.parent / "src" / "undate" / "converters" / "grammars" |
| 21 | +) |
| 22 | +# file that is generated by this script, in that directory |
| 23 | +MONTH_GRAMMAR_FILE = GRAMMAR_DIR_PATH / "gregorian_multilang.lark" |
| 24 | + |
| 25 | +# include month names in the following languages |
| 26 | +languages = [ |
| 27 | + "en", # English |
| 28 | + "es", # Spanish |
| 29 | + "fr", # French |
| 30 | + "de", # German |
| 31 | + "rw", # Kinyarwanda |
| 32 | + "lg", # Ganda |
| 33 | + "ti", # Tigrinya |
| 34 | +] |
| 35 | + |
| 36 | +# warning to include at top of generated file |
| 37 | +warning_text = """// WARNING: This file is auto-generated. DO NOT EDIT. |
| 38 | +// To regenerate: hatch run codegen:generate |
| 39 | +
|
| 40 | +""" |
| 41 | + |
| 42 | + |
| 43 | +def main(): |
| 44 | + # create a dictionary of lists to hold the names for each month |
| 45 | + all_month_names = defaultdict(list) |
| 46 | + |
| 47 | + for lang in languages: |
| 48 | + for width in ["wide", "abbreviated"]: |
| 49 | + for month_num, month_name in get_month_names(width, locale=lang).items(): |
| 50 | + # some locales use a . on the shortened month; let's ignore that |
| 51 | + month_name = month_name.strip(".").lower() |
| 52 | + # In some cases different languages have the same abbreviations; |
| 53 | + # in some cases, abbreviated and full are the same. |
| 54 | + # Only add if not already present, to avoid redundancy |
| 55 | + if month_name not in all_month_names[month_num]: |
| 56 | + all_month_names[month_num].append(month_name) |
| 57 | + |
| 58 | + with MONTH_GRAMMAR_FILE.open("w") as outfile: |
| 59 | + outfile.write(warning_text) |
| 60 | + |
| 61 | + # for each numeric month, generate a rule with all variant names: |
| 62 | + # month_1: /January|Jan/i |
| 63 | + for i, names in all_month_names.items(): |
| 64 | + # combine all names in a case-insensitive OR regex |
| 65 | + # sort shortest variants last to avoid partial matches hitting first |
| 66 | + or_names = "|".join(sorted(names, key=len, reverse=True)) |
| 67 | + outfile.write(f"month_{i}: /({or_names})/i\n") |
| 68 | + |
| 69 | + print( |
| 70 | + f"Successfully regenerated {MONTH_GRAMMAR_FILE.relative_to(pathlib.Path.cwd())}" |
| 71 | + ) |
| 72 | + print("If the file has changed, make sure to commit the new version.") |
| 73 | + |
| 74 | + |
| 75 | +if __name__ == "__main__": |
| 76 | + main() |
0 commit comments