Skip to content

Commit 528714c

Browse files
authored
🐛 Fix problem in homogenize_latex_encoding when authors are lists
* Fix typo in unit test * Found issue in homogenize_latex_encoding: did not handle lists correctly
1 parent a8527ff commit 528714c

2 files changed

Lines changed: 32 additions & 19 deletions

File tree

bibtexparser/customization.py

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
Each of them takes a record and return the modified record.
77
"""
88

9-
import re
109
import logging
11-
10+
import re
11+
import warnings
1212
from builtins import str
1313

1414
from bibtexparser.latexenc import latex_to_unicode, string_to_latex, protect_uppercase
@@ -72,14 +72,14 @@ def splitname(name, strict_mode=True):
7272
# We'll iterate over the input once, dividing it into a list of words for
7373
# each comma-separated section. We'll also calculate the case of each word
7474
# as we work.
75-
sections = [[]] # Sections of the name.
76-
cases = [[]] # 1 = uppercase, 0 = lowercase, -1 = caseless.
77-
word = [] # Current word.
78-
case = -1 # Case of the current word.
79-
level = 0 # Current brace level.
80-
bracestart = False # Will the next character be the first within a brace?
81-
controlseq = True # Are we currently processing a control sequence?
82-
specialchar = None # Are we currently processing a special character?
75+
sections = [[]] # Sections of the name.
76+
cases = [[]] # 1 = uppercase, 0 = lowercase, -1 = caseless.
77+
word = [] # Current word.
78+
case = -1 # Case of the current word.
79+
level = 0 # Current brace level.
80+
bracestart = False # Will the next character be the first within a brace?
81+
controlseq = True # Are we currently processing a control sequence?
82+
specialchar = None # Are we currently processing a special character?
8383

8484
# Using an iterator allows us to deal with escapes in a simple manner.
8585
nameiter = iter(name)
@@ -246,12 +246,12 @@ def splitname(name, strict_mode=True):
246246
firstl = cases.index(0) - len(cases)
247247
lastl = -cases[::-1].index(0) - 1
248248
if lastl == -1:
249-
lastl -= 1 # Cannot consume the rest of the string.
249+
lastl -= 1 # Cannot consume the rest of the string.
250250

251251
# Pull the parts out.
252252
parts['first'] = p0[:firstl]
253-
parts['von'] = p0[firstl:lastl+1]
254-
parts['last'] = p0[lastl+1:]
253+
parts['von'] = p0[firstl:lastl + 1]
254+
parts['last'] = p0[lastl + 1:]
255255

256256
# No lowercase: last is the last word, first is everything else.
257257
else:
@@ -287,7 +287,7 @@ def splitname(name, strict_mode=True):
287287
if 0 in lcases:
288288
split = len(lcases) - lcases[::-1].index(0)
289289
if split == len(lcases):
290-
split = 0 # Last cannot be empty.
290+
split = 0 # Last cannot be empty.
291291
parts['von'] = sections[0][:split]
292292
parts['last'] = sections[0][split:]
293293

@@ -366,7 +366,8 @@ def editor(record):
366366
if record["editor"]:
367367
record["editor"] = getnames([i.strip() for i in record["editor"].replace('\n', ' ').split(" and ")])
368368
# convert editor to object
369-
record["editor"] = [{"name": i, "ID": i.replace(',', '').replace(' ', '').replace('.', '')} for i in record["editor"]]
369+
record["editor"] = [{"name": i, "ID": i.replace(',', '').replace(' ', '').replace('.', '')} for i in
370+
record["editor"]]
370371
else:
371372
del record["editor"]
372373
return record
@@ -418,7 +419,8 @@ def journal(record):
418419
if "journal" in record:
419420
# switch journal to object
420421
if record["journal"]:
421-
record["journal"] = {"name": record["journal"], "ID": record["journal"].replace(',', '').replace(' ', '').replace('.', '')}
422+
record["journal"] = {"name": record["journal"],
423+
"ID": record["journal"].replace(',', '').replace(' ', '').replace('.', '')}
422424

423425
return record
424426

@@ -519,13 +521,21 @@ def homogenize_latex_encoding(record):
519521
:type record: dict
520522
:returns: dict -- the modified record.
521523
"""
522-
# First, we convert everything to unicode
524+
#  First, we convert everything to unicode
523525
record = convert_to_unicode(record)
524526
# And then, we fall back
525527
for val in record:
526528
if val not in ('ID',):
527529
logger.debug('Apply string_to_latex to: %s', val)
528-
record[val] = string_to_latex(record[val])
530+
if isinstance(record[val], list):
531+
record[val] = [
532+
string_to_latex(x) for x in record[val]
533+
]
534+
elif isinstance(record[val], str):
535+
record[val] = string_to_latex(record[val])
536+
else:
537+
warnings.warn('Unable to homogenize latex encoding for %s: Expected string or list,' % val,
538+
RuntimeWarning)
529539
if val == 'title':
530540
logger.debug('Protect uppercase in title')
531541
logger.debug('Before: %s', record[val])
@@ -544,6 +554,7 @@ def add_plaintext_fields(record):
544554
:type record: dict
545555
:returns: dict -- the modified record.
546556
"""
557+
547558
def _strip_string(string):
548559
for stripped in ['{', '}']:
549560
string = string.replace(stripped, "")

bibtexparser/tests/test_bparser.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
# -*- coding: utf-8 -*-
33

44
from __future__ import unicode_literals
5+
6+
import os
57
import unittest
68
import codecs
79

@@ -209,7 +211,7 @@ def cust2(record):
209211
res = bib.get_entry_list()
210212
with open('bibtexparser/tests/data/multiple_entries.bib', 'r') as bibfile:
211213
bib2 = BibTexParser(bibfile.read(), customization=cust2)
212-
res2 = bib.get_entry_list()
214+
res2 = bib2.get_entry_list()
213215
self.assertEqual(res, res2)
214216

215217
def test_article_missing_coma(self):

0 commit comments

Comments
 (0)