Skip to content

Commit d230b47

Browse files
authored
Merge branch 'master' into type-improvements
2 parents f3e9398 + d019ed1 commit d230b47

2 files changed

Lines changed: 88 additions & 33 deletions

File tree

babel/core.py

Lines changed: 84 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ def __init__(
170170
territory: str | None = None,
171171
script: str | None = None,
172172
variant: str | None = None,
173+
modifier: str | None = None,
173174
) -> None:
174175
"""Initialize the locale object from the given identifier components.
175176
@@ -183,6 +184,7 @@ def __init__(
183184
:param territory: the territory (country or region) code
184185
:param script: the script code
185186
:param variant: the variant code
187+
:param modifier: a modifier (following the '@' symbol, sometimes called '@variant')
186188
:raise `UnknownLocaleError`: if no locale data is available for the
187189
requested locale
188190
"""
@@ -194,10 +196,13 @@ def __init__(
194196
self.script = script
195197
#: the variant code
196198
self.variant = variant
199+
#: the modifier
200+
self.modifier = modifier
197201
self.__data = None
198202

199203
identifier = str(self)
200-
if not localedata.exists(identifier):
204+
identifier_without_modifier = identifier.partition('@')[0]
205+
if not localedata.exists(identifier_without_modifier):
201206
raise UnknownLocaleError(identifier)
202207

203208
@classmethod
@@ -284,6 +289,11 @@ def parse(
284289
>>> Locale.parse('und_AT')
285290
Locale('de', territory='AT')
286291
292+
Modifiers are optional, and always at the end, separated by "@":
293+
294+
>>> Locale.parse('de_AT@euro')
295+
Locale('de', territory='AT', modifier='euro')
296+
287297
:param identifier: the locale identifier string
288298
:param sep: optional component separator
289299
:param resolve_likely_subtags: if this is specified then a locale will
@@ -340,7 +350,11 @@ def _try_load_reducing(parts):
340350
# implement ICU like fuzzy locale objects and provide a way to
341351
# maximize and minimize locale tags.
342352

343-
language, territory, script, variant = parts
353+
if len(parts) == 5:
354+
language, territory, script, variant, modifier = parts
355+
else:
356+
language, territory, script, variant = parts
357+
modifier = None
344358
language = get_global('language_aliases').get(language, language)
345359
territory = get_global('territory_aliases').get(territory or '', (territory,))[0]
346360
script = get_global('script_aliases').get(script or '', script)
@@ -351,7 +365,7 @@ def _try_load_reducing(parts):
351365
if script == 'Zzzz':
352366
script = None
353367

354-
parts = language, territory, script, variant
368+
parts = language, territory, script, variant, modifier
355369

356370
# First match: try the whole identifier
357371
new_id = get_locale_identifier(parts)
@@ -365,41 +379,49 @@ def _try_load_reducing(parts):
365379
# simplified identifier that is just the language
366380
likely_subtag = get_global('likely_subtags').get(language)
367381
if likely_subtag is not None:
368-
language2, _, script2, variant2 = parse_locale(likely_subtag)
369-
locale = _try_load_reducing((language2, territory, script2, variant2))
382+
parts2 = parse_locale(likely_subtag)
383+
if len(parts2) == 5:
384+
language2, _, script2, variant2, modifier2 = parse_locale(likely_subtag)
385+
else:
386+
language2, _, script2, variant2 = parse_locale(likely_subtag)
387+
modifier2 = None
388+
locale = _try_load_reducing((language2, territory, script2, variant2, modifier2))
370389
if locale is not None:
371390
return locale
372391

373392
raise UnknownLocaleError(input_id)
374393

375394
def __eq__(self, other: object) -> bool:
376-
for key in ('language', 'territory', 'script', 'variant'):
395+
for key in ('language', 'territory', 'script', 'variant', 'modifier'):
377396
if not hasattr(other, key):
378397
return False
379398
return (
380399
self.language == getattr(other, 'language') and # noqa: B009
381400
self.territory == getattr(other, 'territory') and # noqa: B009
382401
self.script == getattr(other, 'script') and # noqa: B009
383-
self.variant == getattr(other, 'variant') # noqa: B009
402+
self.variant == getattr(other, 'variant') and # noqa: B009
403+
self.modifier == getattr(other, 'modifier') # noqa: B009
384404
)
385405

386406
def __ne__(self, other: object) -> bool:
387407
return not self.__eq__(other)
388408

389409
def __hash__(self) -> int:
390-
return hash((self.language, self.territory, self.script, self.variant))
410+
return hash((self.language, self.territory, self.script,
411+
self.variant, self.modifier))
391412

392413
def __repr__(self) -> str:
393414
parameters = ['']
394-
for key in ('territory', 'script', 'variant'):
415+
for key in ('territory', 'script', 'variant', 'modifier'):
395416
value = getattr(self, key)
396417
if value is not None:
397418
parameters.append(f"{key}={value!r}")
398419
return f"Locale({self.language!r}{', '.join(parameters)})"
399420

400421
def __str__(self) -> str:
401422
return get_locale_identifier((self.language, self.territory,
402-
self.script, self.variant))
423+
self.script, self.variant,
424+
self.modifier))
403425

404426
@property
405427
def _data(self) -> localedata.LocaleDataDict[str, Any]:
@@ -416,6 +438,11 @@ def get_display_name(self, locale: Locale | str | None = None) -> str | None:
416438
>>> Locale('zh', 'CN', script='Hans').get_display_name('en')
417439
u'Chinese (Simplified, China)'
418440
441+
Modifiers are currently passed through verbatim:
442+
443+
>>> Locale('it', 'IT', modifier='euro').get_display_name('en')
444+
u'Italian (Italy, euro)'
445+
419446
:param locale: the locale to use
420447
"""
421448
if locale is None:
@@ -430,6 +457,8 @@ def get_display_name(self, locale: Locale | str | None = None) -> str | None:
430457
details.append(locale.territories.get(self.territory))
431458
if self.variant:
432459
details.append(locale.variants.get(self.variant))
460+
if self.modifier:
461+
details.append(self.modifier)
433462
details = filter(None, details)
434463
if details:
435464
retval += f" ({', '.join(details)})"
@@ -1145,9 +1174,12 @@ def negotiate_locale(preferred: Iterable[str], available: Iterable[str], sep: st
11451174
return None
11461175

11471176

1148-
def parse_locale(identifier: str, sep: str = '_') -> tuple[str, str | None, str | None, str | None]:
1177+
def parse_locale(
1178+
identifier: str,
1179+
sep: str = '_'
1180+
) -> tuple[str, str | None, str | None, str | None, str | None]:
11491181
"""Parse a locale identifier into a tuple of the form ``(language,
1150-
territory, script, variant)``.
1182+
territory, script, variant, modifier)``.
11511183
11521184
>>> parse_locale('zh_CN')
11531185
('zh', 'CN', None, None)
@@ -1159,12 +1191,22 @@ def parse_locale(identifier: str, sep: str = '_') -> tuple[str, str | None, str
11591191
('en', '150', None, None)
11601192
>>> parse_locale('en_us_posix')
11611193
('en', 'US', None, 'POSIX')
1194+
>>> parse_locale('it_IT@euro')
1195+
('it', 'IT', None, None, 'euro')
1196+
>>> parse_locale('it_IT@custom')
1197+
('it', 'IT', None, None, 'custom')
1198+
>>> parse_locale('it_IT@')
1199+
('it', 'IT', None, None)
11621200
11631201
The default component separator is "_", but a different separator can be
1164-
specified using the `sep` parameter:
1202+
specified using the `sep` parameter.
1203+
1204+
The optional modifier is always separated with "@" and at the end:
11651205
11661206
>>> parse_locale('zh-CN', sep='-')
11671207
('zh', 'CN', None, None)
1208+
>>> parse_locale('zh-CN@custom', sep='-')
1209+
('zh', 'CN', None, None, 'custom')
11681210
11691211
If the identifier cannot be parsed into a locale, a `ValueError` exception
11701212
is raised:
@@ -1174,14 +1216,13 @@ def parse_locale(identifier: str, sep: str = '_') -> tuple[str, str | None, str
11741216
...
11751217
ValueError: 'not_a_LOCALE_String' is not a valid locale identifier
11761218
1177-
Encoding information and locale modifiers are removed from the identifier:
1219+
Encoding information is removed from the identifier, while modifiers are
1220+
kept:
11781221
1179-
>>> parse_locale('it_IT@euro')
1180-
('it', 'IT', None, None)
11811222
>>> parse_locale('en_US.UTF-8')
11821223
('en', 'US', None, None)
11831224
>>> parse_locale('de_DE.iso885915@euro')
1184-
('de', 'DE', None, None)
1225+
('de', 'DE', None, None, 'euro')
11851226
11861227
See :rfc:`4646` for more information.
11871228
@@ -1191,13 +1232,10 @@ def parse_locale(identifier: str, sep: str = '_') -> tuple[str, str | None, str
11911232
:raise `ValueError`: if the string does not appear to be a valid locale
11921233
identifier
11931234
"""
1235+
identifier, _, modifier = identifier.partition('@')
11941236
if '.' in identifier:
11951237
# this is probably the charset/encoding, which we don't care about
11961238
identifier = identifier.split('.', 1)[0]
1197-
if '@' in identifier:
1198-
# this is a locale modifier such as @euro, which we don't care about
1199-
# either
1200-
identifier = identifier.split('@', 1)[0]
12011239

12021240
parts = identifier.split(sep)
12031241
lang = parts.pop(0).lower()
@@ -1223,22 +1261,37 @@ def parse_locale(identifier: str, sep: str = '_') -> tuple[str, str | None, str
12231261
if parts:
12241262
raise ValueError(f"{identifier!r} is not a valid locale identifier")
12251263

1226-
return lang, territory, script, variant
1227-
1228-
1229-
def get_locale_identifier(tup: tuple[str, str | None, str | None, str | None], sep: str = '_') -> str:
1264+
# TODO(3.0): always return a 5-tuple
1265+
if modifier:
1266+
return lang, territory, script, variant, modifier
1267+
else:
1268+
return lang, territory, script, variant
1269+
1270+
1271+
def get_locale_identifier(
1272+
tup: tuple[str]
1273+
| tuple[str, str | None]
1274+
| tuple[str, str | None, str | None]
1275+
| tuple[str, str | None, str | None, str | None]
1276+
| tuple[str, str | None, str | None, str | None, str | None],
1277+
sep: str = "_",
1278+
) -> str:
12301279
"""The reverse of :func:`parse_locale`. It creates a locale identifier out
1231-
of a ``(language, territory, script, variant)`` tuple. Items can be set to
1280+
of a ``(language, territory, script, variant, modifier)`` tuple. Items can be set to
12321281
``None`` and trailing ``None``\\s can also be left out of the tuple.
12331282
1234-
>>> get_locale_identifier(('de', 'DE', None, '1999'))
1235-
'de_DE_1999'
1283+
>>> get_locale_identifier(('de', 'DE', None, '1999', 'custom'))
1284+
'de_DE_1999@custom'
1285+
>>> get_locale_identifier(('fi', None, None, None, 'custom'))
1286+
'fi@custom'
1287+
12361288
12371289
.. versionadded:: 1.0
12381290
12391291
:param tup: the tuple as returned by :func:`parse_locale`.
12401292
:param sep: the separator for the identifier.
12411293
"""
1242-
tup = tuple(tup[:4]) # type: ignore # length should be 4
1243-
lang, territory, script, variant = tup + (None,) * (4 - len(tup))
1244-
return sep.join(filter(None, (lang, script, territory, variant)))
1294+
tup = tuple(tup[:5]) # type: ignore # length should no more than 5
1295+
lang, territory, script, variant, modifier = tup + (None,) * (5 - len(tup))
1296+
ret = sep.join(filter(None, (lang, script, territory, variant)))
1297+
return f'{ret}@{modifier}' if modifier else ret

tests/test_core.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,12 @@ def test_parse_locale():
283283
assert (excinfo.value.args[0] ==
284284
"'not_a_LOCALE_String' is not a valid locale identifier")
285285

286-
assert core.parse_locale('it_IT@euro') == ('it', 'IT', None, None)
286+
assert core.parse_locale('it_IT@euro') == ('it', 'IT', None, None, 'euro')
287+
assert core.parse_locale('it_IT@something') == ('it', 'IT', None, None, 'something')
288+
287289
assert core.parse_locale('en_US.UTF-8') == ('en', 'US', None, None)
288290
assert (core.parse_locale('de_DE.iso885915@euro') ==
289-
('de', 'DE', None, None))
291+
('de', 'DE', None, None, 'euro'))
290292

291293

292294
@pytest.mark.parametrize('filename', [

0 commit comments

Comments
 (0)