Skip to content

Commit a5eeffd

Browse files
authored
🐛 customization.getnames: respect protected names (#334)
1 parent b47405b commit a5eeffd

2 files changed

Lines changed: 96 additions & 8 deletions

File tree

bibtexparser/customization.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,61 @@ def splitname(name, strict_mode=True):
299299
return parts
300300

301301

302+
def find_matching(
303+
text: str,
304+
opening: str,
305+
closing: str,
306+
ignore_escaped: bool = True,
307+
) -> dict:
308+
r"""
309+
Find matching 'brackets'.
310+
311+
:param text: The string to consider.
312+
:param opening: The opening bracket (e.g. "(", "[", "{").
313+
:param closing: The closing bracket (e.g. ")", "]", "}").
314+
:param ignore_escaped: Ignore escaped bracket (e.g. "\(", "\[", "\{", "\)", "\]", "\}").
315+
:return: Dictionary with ``{index_opening: index_closing}``
316+
"""
317+
318+
a = []
319+
b = []
320+
321+
if ignore_escaped:
322+
opening = r"(?<!\\)" + opening
323+
closing = r"(?<!\\)" + closing
324+
325+
for i in re.finditer(opening, text):
326+
a.append(i.span()[0])
327+
328+
for i in re.finditer(closing, text):
329+
b.append(-1 * i.span()[0])
330+
331+
if len(a) == 0 and len(b) == 0:
332+
return {}
333+
334+
if len(a) != len(b):
335+
raise IndexError(f"Unmatching {opening}...{closing} found")
336+
337+
brackets = sorted(a + b, key=lambda i: abs(i))
338+
339+
ret = {}
340+
stack = []
341+
342+
for i in brackets:
343+
if i >= 0:
344+
stack.append(i)
345+
else:
346+
if len(stack) == 0:
347+
raise IndexError(f"No closing {closing} at: {i:d}")
348+
j = stack.pop()
349+
ret[j] = -1 * i
350+
351+
if len(stack) > 0:
352+
raise IndexError(f"No opening {opening} at {stack.pop():d}")
353+
354+
return ret
355+
356+
302357
def getnames(names):
303358
"""Convert people names as surname, firstnames
304359
or surname, initials.
@@ -322,7 +377,29 @@ def getnames(names):
322377
last = namesplit[0].strip()
323378
firsts = [i.strip() for i in namesplit[1].split()]
324379
else:
325-
namesplit = namestring.split()
380+
if "{" in namestring and "}" in namestring:
381+
try:
382+
brackets = find_matching(namestring, "{", "}")
383+
except IndexError:
384+
tidynames.append(namestring)
385+
continue
386+
namesplit = []
387+
start = 0
388+
i = 0
389+
while True:
390+
if i in brackets:
391+
i = brackets[i]
392+
else:
393+
i += 1
394+
if i >= len(namestring):
395+
break
396+
if namestring[i] == " ":
397+
namesplit.append(namestring[start:i])
398+
start = i + 1
399+
elif i == len(namestring) - 1:
400+
namesplit.append(namestring[start:])
401+
else:
402+
namesplit = namestring.split()
326403
last = namesplit.pop()
327404
firsts = [i.replace('.', '. ').strip() for i in namesplit]
328405
if last in ['jnr', 'jr', 'junior']:

bibtexparser/tests/test_customization.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,15 @@ def test_getnames(self):
2222
'Jean la Tour',
2323
'Jean le Tour',
2424
'Mike ben Akar',
25+
'A. {Delgado de Molina}',
26+
r'M. Vign{\'e}',
27+
'Tom {de Geus}',
28+
'Tom {de \{Geus}',
29+
'Tom \{de Geus\}',
30+
'Tom de {G\{eus}',
31+
'Foo B{\'a}r',
32+
r'{G{\'{e}}rard} {Ben Arous}',
33+
'Incorrect {{name}',
2534
#'Jean de la Tour',
2635
#'Johannes Diderik van der Waals',
2736
]
@@ -35,18 +44,20 @@ def test_getnames(self):
3544
'la Tour, Jean',
3645
'le Tour, Jean',
3746
'ben Akar, Mike',
47+
'{Delgado de Molina}, A.',
48+
r'Vign{\'e}, M.',
49+
'{de Geus}, Tom',
50+
'{de \{Geus}, Tom',
51+
'Geus\}, Tom \{de',
52+
'de {G\{eus}, Tom',
53+
'B{\'a}r, Foo',
54+
r'{Ben Arous}, {G{\'{e}}rard}',
55+
'Incorrect {{name}',
3856
#'de la Tour, Jean',
3957
#'van der Waals, Johannes Diderik',
4058
]
4159
self.assertEqual(result, expected)
4260

43-
@unittest.skip('Bug #9')
44-
def test_getnames_braces(self):
45-
names = ['A. {Delgado de Molina}', r'M. Vign{\'e}']
46-
result = getnames(names)
47-
expected = ['Delgado de Molina, A.', 'Vigné, M.']
48-
self.assertEqual(result, expected)
49-
5061
###########
5162
# page_double_hyphen
5263
###########

0 commit comments

Comments
 (0)