@@ -170,6 +170,7 @@ def __init__(
170170 territory : str | None = None ,
171171 script : str | None = None ,
172172 variant : str | None = None ,
173+ modifier : str | None = None ,
173174 ) -> None :
174175 """Initialize the locale object from the given identifier components.
175176
@@ -183,6 +184,7 @@ def __init__(
183184 :param territory: the territory (country or region) code
184185 :param script: the script code
185186 :param variant: the variant code
187+ :param modifier: a modifier (following the '@' symbol, sometimes called '@variant')
186188 :raise `UnknownLocaleError`: if no locale data is available for the
187189 requested locale
188190 """
@@ -194,10 +196,13 @@ def __init__(
194196 self .script = script
195197 #: the variant code
196198 self .variant = variant
199+ #: the modifier
200+ self .modifier = modifier
197201 self .__data = None
198202
199203 identifier = str (self )
200- if not localedata .exists (identifier ):
204+ identifier_without_modifier = identifier .partition ('@' )[0 ]
205+ if not localedata .exists (identifier_without_modifier ):
201206 raise UnknownLocaleError (identifier )
202207
203208 @classmethod
@@ -284,6 +289,11 @@ def parse(
284289 >>> Locale.parse('und_AT')
285290 Locale('de', territory='AT')
286291
292+ Modifiers are optional, and always at the end, separated by "@":
293+
294+ >>> Locale.parse('de_AT@euro')
295+ Locale('de', territory='AT', modifier='euro')
296+
287297 :param identifier: the locale identifier string
288298 :param sep: optional component separator
289299 :param resolve_likely_subtags: if this is specified then a locale will
@@ -340,7 +350,11 @@ def _try_load_reducing(parts):
340350 # implement ICU like fuzzy locale objects and provide a way to
341351 # maximize and minimize locale tags.
342352
343- language , territory , script , variant = parts
353+ if len (parts ) == 5 :
354+ language , territory , script , variant , modifier = parts
355+ else :
356+ language , territory , script , variant = parts
357+ modifier = None
344358 language = get_global ('language_aliases' ).get (language , language )
345359 territory = get_global ('territory_aliases' ).get (territory or '' , (territory ,))[0 ]
346360 script = get_global ('script_aliases' ).get (script or '' , script )
@@ -351,7 +365,7 @@ def _try_load_reducing(parts):
351365 if script == 'Zzzz' :
352366 script = None
353367
354- parts = language , territory , script , variant
368+ parts = language , territory , script , variant , modifier
355369
356370 # First match: try the whole identifier
357371 new_id = get_locale_identifier (parts )
@@ -365,41 +379,49 @@ def _try_load_reducing(parts):
365379 # simplified identifier that is just the language
366380 likely_subtag = get_global ('likely_subtags' ).get (language )
367381 if likely_subtag is not None :
368- language2 , _ , script2 , variant2 = parse_locale (likely_subtag )
369- locale = _try_load_reducing ((language2 , territory , script2 , variant2 ))
382+ parts2 = parse_locale (likely_subtag )
383+ if len (parts2 ) == 5 :
384+ language2 , _ , script2 , variant2 , modifier2 = parse_locale (likely_subtag )
385+ else :
386+ language2 , _ , script2 , variant2 = parse_locale (likely_subtag )
387+ modifier2 = None
388+ locale = _try_load_reducing ((language2 , territory , script2 , variant2 , modifier2 ))
370389 if locale is not None :
371390 return locale
372391
373392 raise UnknownLocaleError (input_id )
374393
375394 def __eq__ (self , other : object ) -> bool :
376- for key in ('language' , 'territory' , 'script' , 'variant' ):
395+ for key in ('language' , 'territory' , 'script' , 'variant' , 'modifier' ):
377396 if not hasattr (other , key ):
378397 return False
379398 return (
380399 self .language == getattr (other , 'language' ) and # noqa: B009
381400 self .territory == getattr (other , 'territory' ) and # noqa: B009
382401 self .script == getattr (other , 'script' ) and # noqa: B009
383- self .variant == getattr (other , 'variant' ) # noqa: B009
402+ self .variant == getattr (other , 'variant' ) and # noqa: B009
403+ self .modifier == getattr (other , 'modifier' ) # noqa: B009
384404 )
385405
386406 def __ne__ (self , other : object ) -> bool :
387407 return not self .__eq__ (other )
388408
389409 def __hash__ (self ) -> int :
390- return hash ((self .language , self .territory , self .script , self .variant ))
410+ return hash ((self .language , self .territory , self .script ,
411+ self .variant , self .modifier ))
391412
392413 def __repr__ (self ) -> str :
393414 parameters = ['' ]
394- for key in ('territory' , 'script' , 'variant' ):
415+ for key in ('territory' , 'script' , 'variant' , 'modifier' ):
395416 value = getattr (self , key )
396417 if value is not None :
397418 parameters .append (f"{ key } ={ value !r} " )
398419 return f"Locale({ self .language !r} { ', ' .join (parameters )} )"
399420
400421 def __str__ (self ) -> str :
401422 return get_locale_identifier ((self .language , self .territory ,
402- self .script , self .variant ))
423+ self .script , self .variant ,
424+ self .modifier ))
403425
404426 @property
405427 def _data (self ) -> localedata .LocaleDataDict [str , Any ]:
@@ -416,6 +438,11 @@ def get_display_name(self, locale: Locale | str | None = None) -> str | None:
416438 >>> Locale('zh', 'CN', script='Hans').get_display_name('en')
417439 u'Chinese (Simplified, China)'
418440
441+ Modifiers are currently passed through verbatim:
442+
443+ >>> Locale('it', 'IT', modifier='euro').get_display_name('en')
444+ u'Italian (Italy, euro)'
445+
419446 :param locale: the locale to use
420447 """
421448 if locale is None :
@@ -430,6 +457,8 @@ def get_display_name(self, locale: Locale | str | None = None) -> str | None:
430457 details .append (locale .territories .get (self .territory ))
431458 if self .variant :
432459 details .append (locale .variants .get (self .variant ))
460+ if self .modifier :
461+ details .append (self .modifier )
433462 details = filter (None , details )
434463 if details :
435464 retval += f" ({ ', ' .join (details )} )"
@@ -1145,9 +1174,12 @@ def negotiate_locale(preferred: Iterable[str], available: Iterable[str], sep: st
11451174 return None
11461175
11471176
1148- def parse_locale (identifier : str , sep : str = '_' ) -> tuple [str , str | None , str | None , str | None ]:
1177+ def parse_locale (
1178+ identifier : str ,
1179+ sep : str = '_'
1180+ ) -> tuple [str , str | None , str | None , str | None , str | None ]:
11491181 """Parse a locale identifier into a tuple of the form ``(language,
1150- territory, script, variant)``.
1182+ territory, script, variant, modifier )``.
11511183
11521184 >>> parse_locale('zh_CN')
11531185 ('zh', 'CN', None, None)
@@ -1159,12 +1191,22 @@ def parse_locale(identifier: str, sep: str = '_') -> tuple[str, str | None, str
11591191 ('en', '150', None, None)
11601192 >>> parse_locale('en_us_posix')
11611193 ('en', 'US', None, 'POSIX')
1194+ >>> parse_locale('it_IT@euro')
1195+ ('it', 'IT', None, None, 'euro')
1196+ >>> parse_locale('it_IT@custom')
1197+ ('it', 'IT', None, None, 'custom')
1198+ >>> parse_locale('it_IT@')
1199+ ('it', 'IT', None, None)
11621200
11631201 The default component separator is "_", but a different separator can be
1164- specified using the `sep` parameter:
1202+ specified using the `sep` parameter.
1203+
1204+ The optional modifier is always separated with "@" and at the end:
11651205
11661206 >>> parse_locale('zh-CN', sep='-')
11671207 ('zh', 'CN', None, None)
1208+ >>> parse_locale('zh-CN@custom', sep='-')
1209+ ('zh', 'CN', None, None, 'custom')
11681210
11691211 If the identifier cannot be parsed into a locale, a `ValueError` exception
11701212 is raised:
@@ -1174,14 +1216,13 @@ def parse_locale(identifier: str, sep: str = '_') -> tuple[str, str | None, str
11741216 ...
11751217 ValueError: 'not_a_LOCALE_String' is not a valid locale identifier
11761218
1177- Encoding information and locale modifiers are removed from the identifier:
1219+ Encoding information is removed from the identifier, while modifiers are
1220+ kept:
11781221
1179- >>> parse_locale('it_IT@euro')
1180- ('it', 'IT', None, None)
11811222 >>> parse_locale('en_US.UTF-8')
11821223 ('en', 'US', None, None)
11831224 >>> parse_locale('de_DE.iso885915@euro')
1184- ('de', 'DE', None, None)
1225+ ('de', 'DE', None, None, 'euro' )
11851226
11861227 See :rfc:`4646` for more information.
11871228
@@ -1191,13 +1232,10 @@ def parse_locale(identifier: str, sep: str = '_') -> tuple[str, str | None, str
11911232 :raise `ValueError`: if the string does not appear to be a valid locale
11921233 identifier
11931234 """
1235+ identifier , _ , modifier = identifier .partition ('@' )
11941236 if '.' in identifier :
11951237 # this is probably the charset/encoding, which we don't care about
11961238 identifier = identifier .split ('.' , 1 )[0 ]
1197- if '@' in identifier :
1198- # this is a locale modifier such as @euro, which we don't care about
1199- # either
1200- identifier = identifier .split ('@' , 1 )[0 ]
12011239
12021240 parts = identifier .split (sep )
12031241 lang = parts .pop (0 ).lower ()
@@ -1223,22 +1261,37 @@ def parse_locale(identifier: str, sep: str = '_') -> tuple[str, str | None, str
12231261 if parts :
12241262 raise ValueError (f"{ identifier !r} is not a valid locale identifier" )
12251263
1226- return lang , territory , script , variant
1227-
1228-
1229- def get_locale_identifier (tup : tuple [str , str | None , str | None , str | None ], sep : str = '_' ) -> str :
1264+ # TODO(3.0): always return a 5-tuple
1265+ if modifier :
1266+ return lang , territory , script , variant , modifier
1267+ else :
1268+ return lang , territory , script , variant
1269+
1270+
1271+ def get_locale_identifier (
1272+ tup : tuple [str ]
1273+ | tuple [str , str | None ]
1274+ | tuple [str , str | None , str | None ]
1275+ | tuple [str , str | None , str | None , str | None ]
1276+ | tuple [str , str | None , str | None , str | None , str | None ],
1277+ sep : str = "_" ,
1278+ ) -> str :
12301279 """The reverse of :func:`parse_locale`. It creates a locale identifier out
1231- of a ``(language, territory, script, variant)`` tuple. Items can be set to
1280+ of a ``(language, territory, script, variant, modifier )`` tuple. Items can be set to
12321281 ``None`` and trailing ``None``\\ s can also be left out of the tuple.
12331282
1234- >>> get_locale_identifier(('de', 'DE', None, '1999'))
1235- 'de_DE_1999'
1283+ >>> get_locale_identifier(('de', 'DE', None, '1999', 'custom'))
1284+ 'de_DE_1999@custom'
1285+ >>> get_locale_identifier(('fi', None, None, None, 'custom'))
1286+ 'fi@custom'
1287+
12361288
12371289 .. versionadded:: 1.0
12381290
12391291 :param tup: the tuple as returned by :func:`parse_locale`.
12401292 :param sep: the separator for the identifier.
12411293 """
1242- tup = tuple (tup [:4 ]) # type: ignore # length should be 4
1243- lang , territory , script , variant = tup + (None ,) * (4 - len (tup ))
1244- return sep .join (filter (None , (lang , script , territory , variant )))
1294+ tup = tuple (tup [:5 ]) # type: ignore # length should no more than 5
1295+ lang , territory , script , variant , modifier = tup + (None ,) * (5 - len (tup ))
1296+ ret = sep .join (filter (None , (lang , script , territory , variant )))
1297+ return f'{ ret } @{ modifier } ' if modifier else ret
0 commit comments