66Each of them takes a record and return the modified record.
77"""
88
9- import re
109import logging
11-
10+ import re
11+ import warnings
1212from builtins import str
1313
1414from bibtexparser .latexenc import latex_to_unicode , string_to_latex , protect_uppercase
@@ -72,14 +72,14 @@ def splitname(name, strict_mode=True):
7272 # We'll iterate over the input once, dividing it into a list of words for
7373 # each comma-separated section. We'll also calculate the case of each word
7474 # as we work.
75- sections = [[]] # Sections of the name.
76- cases = [[]] # 1 = uppercase, 0 = lowercase, -1 = caseless.
77- word = [] # Current word.
78- case = - 1 # Case of the current word.
79- level = 0 # Current brace level.
80- bracestart = False # Will the next character be the first within a brace?
81- controlseq = True # Are we currently processing a control sequence?
82- specialchar = None # Are we currently processing a special character?
75+ sections = [[]] # Sections of the name.
76+ cases = [[]] # 1 = uppercase, 0 = lowercase, -1 = caseless.
77+ word = [] # Current word.
78+ case = - 1 # Case of the current word.
79+ level = 0 # Current brace level.
80+ bracestart = False # Will the next character be the first within a brace?
81+ controlseq = True # Are we currently processing a control sequence?
82+ specialchar = None # Are we currently processing a special character?
8383
8484 # Using an iterator allows us to deal with escapes in a simple manner.
8585 nameiter = iter (name )
@@ -246,12 +246,12 @@ def splitname(name, strict_mode=True):
246246 firstl = cases .index (0 ) - len (cases )
247247 lastl = - cases [::- 1 ].index (0 ) - 1
248248 if lastl == - 1 :
249- lastl -= 1 # Cannot consume the rest of the string.
249+ lastl -= 1 # Cannot consume the rest of the string.
250250
251251 # Pull the parts out.
252252 parts ['first' ] = p0 [:firstl ]
253- parts ['von' ] = p0 [firstl :lastl + 1 ]
254- parts ['last' ] = p0 [lastl + 1 :]
253+ parts ['von' ] = p0 [firstl :lastl + 1 ]
254+ parts ['last' ] = p0 [lastl + 1 :]
255255
256256 # No lowercase: last is the last word, first is everything else.
257257 else :
@@ -287,7 +287,7 @@ def splitname(name, strict_mode=True):
287287 if 0 in lcases :
288288 split = len (lcases ) - lcases [::- 1 ].index (0 )
289289 if split == len (lcases ):
290- split = 0 # Last cannot be empty.
290+ split = 0 # Last cannot be empty.
291291 parts ['von' ] = sections [0 ][:split ]
292292 parts ['last' ] = sections [0 ][split :]
293293
@@ -345,7 +345,8 @@ def author(record):
345345 """
346346 if "author" in record :
347347 if record ["author" ]:
348- record ["author" ] = getnames ([i .strip () for i in record ["author" ].replace ('\n ' , ' ' ).split (" and " )])
348+ record ["author" ] = getnames ([i .strip () for i in re .split (r"\ and\ " , record ["author" ].replace ('\n ' , ' ' ),
349+ flags = re .IGNORECASE )])
349350 else :
350351 del record ["author" ]
351352 return record
@@ -365,7 +366,8 @@ def editor(record):
365366 if record ["editor" ]:
366367 record ["editor" ] = getnames ([i .strip () for i in record ["editor" ].replace ('\n ' , ' ' ).split (" and " )])
367368 # convert editor to object
368- record ["editor" ] = [{"name" : i , "ID" : i .replace (',' , '' ).replace (' ' , '' ).replace ('.' , '' )} for i in record ["editor" ]]
369+ record ["editor" ] = [{"name" : i , "ID" : i .replace (',' , '' ).replace (' ' , '' ).replace ('.' , '' )} for i in
370+ record ["editor" ]]
369371 else :
370372 del record ["editor" ]
371373 return record
@@ -417,7 +419,8 @@ def journal(record):
417419 if "journal" in record :
418420 # switch journal to object
419421 if record ["journal" ]:
420- record ["journal" ] = {"name" : record ["journal" ], "ID" : record ["journal" ].replace (',' , '' ).replace (' ' , '' ).replace ('.' , '' )}
422+ record ["journal" ] = {"name" : record ["journal" ],
423+ "ID" : record ["journal" ].replace (',' , '' ).replace (' ' , '' ).replace ('.' , '' )}
421424
422425 return record
423426
@@ -518,13 +521,21 @@ def homogenize_latex_encoding(record):
518521 :type record: dict
519522 :returns: dict -- the modified record.
520523 """
521- # First, we convert everything to unicode
524+ # First, we convert everything to unicode
522525 record = convert_to_unicode (record )
523526 # And then, we fall back
524527 for val in record :
525528 if val not in ('ID' ,):
526529 logger .debug ('Apply string_to_latex to: %s' , val )
527- record [val ] = string_to_latex (record [val ])
530+ if isinstance (record [val ], list ):
531+ record [val ] = [
532+ string_to_latex (x ) for x in record [val ]
533+ ]
534+ elif isinstance (record [val ], str ):
535+ record [val ] = string_to_latex (record [val ])
536+ else :
537+ warnings .warn ('Unable to homogenize latex encoding for %s: Expected string or list,' % val ,
538+ RuntimeWarning )
528539 if val == 'title' :
529540 logger .debug ('Protect uppercase in title' )
530541 logger .debug ('Before: %s' , record [val ])
@@ -543,6 +554,7 @@ def add_plaintext_fields(record):
543554 :type record: dict
544555 :returns: dict -- the modified record.
545556 """
557+
546558 def _strip_string (string ):
547559 for stripped in ['{' , '}' ]:
548560 string = string .replace (stripped , "" )
0 commit comments