🐛 align_values allows int or bool, fixes previous min alignment (#316)

Michael Fruth · web-flow · commit 2ba4323d8d1b · 2022-09-27T08:06:14.000+02:00
Fixes #315 The `align_values` of the BibTexWriter now accepts a bool or int value. If the bool value `true` is specified, the maximal number of characters used in any field name is used as length. If an integer value is specified, the greater of the specified integer value and the overall maximal number of characters used in any field name is used. This commit also fixes the previous behavior of align_values which results in a breaking change. The `ENTRYTYPE` entry was considered for calculating the maximal number of characters, which always leads to a minimum value of `9`. Now, keys that are not written into the BibTex output are ignored which leads to an exact computation of the field name lengths.
diff --git a/bibtexparser/bwriter.py b/bibtexparser/bwriter.py
@@ -5,7 +5,7 @@
 
 import logging
 from enum import Enum, auto
-from typing import Dict, Callable, Iterable
+from typing import Dict, Callable, Iterable, Union
 from bibtexparser.bibdatabase import (BibDatabase, COMMON_STRINGS,
                                       BibDataString,
                                       BibDataStringExpression)
@@ -15,6 +15,9 @@
 
 __all__ = ['BibTexWriter']
 
+# A list of entries that should not be included in the content (key = value) of a BibTex entry
+ENTRY_TO_BIBTEX_IGNORE_ENTRIES = ['ENTRYTYPE', 'ID']
+
 
 class SortingStrategy(Enum):
     """
@@ -89,9 +92,12 @@ def __init__(self, write_common_strings=False):
         self.contents = ['comments', 'preambles', 'strings', 'entries']
         #: Character(s) for indenting BibTeX field-value pairs. Default: single space.
         self.indent = ' '
-        #: Align values. Determines the maximal number of characters used in any fieldname and aligns all values
-        #    according to that by filling up with single spaces. Default: False
-        self.align_values = False
+        #: Align values. Aligns all values according to a given length by padding with single spaces.
+        #    If align_values is true, the maximum number of characters used in any field name is used as the length.
+        #    If align_values is a number, the greater of the specified value or the number of characters used in the
+        #    field name is used as the length.
+        #    Default: False
+        self.align_values: Union[int, bool] = False
         #: Align multi-line values. Formats a multi-line value such that the text is aligned exactly
         #    on top of each other. Default: False
         self.align_multiline_values = False
@@ -112,7 +118,7 @@ def __init__(self, write_common_strings=False):
         #: BibTeX syntax allows the comma to be optional at the end of the last field in an entry.
         #: Use this to enable writing this last comma in the bwriter output. Defaults: False.
         self.add_trailing_comma = False
-        #: internal variable used if self.align_values = True
+        #: internal variable used if self.align_values = True or self.align_values = <number>
         self._max_field_width = 0
         #: Whether common strings are written
         self.common_strings = write_common_strings
@@ -143,10 +149,13 @@ def _entries_to_bibtex(self, bib_database):
         else:
             entries = bib_database.entries
 
-        if self.align_values:
+        if self.align_values is True:
             # determine maximum field width to be used
-            widths = [max(map(len, entry.keys())) for entry in entries]
+            widths = [len(ele) for entry in entries for ele in entry if ele not in ENTRY_TO_BIBTEX_IGNORE_ENTRIES]
             self._max_field_width = max(widths)
+        elif type(self.align_values) == int:
+            # Use specified value
+            self._max_field_width = self.align_values
 
         return self.entry_separator.join(self._entry_to_bibtex(entry) for entry in entries)
 
@@ -165,7 +174,8 @@ def _entry_to_bibtex(self, entry):
         else:
             field_fmt = u",\n{indent}{field:<{field_max_w}} = {value}"
         # Write field = value lines
-        for field in [i for i in display_order if i not in ['ENTRYTYPE', 'ID']]:
+        for field in [i for i in display_order if i not in ENTRY_TO_BIBTEX_IGNORE_ENTRIES]:
+            max_field_width = max(len(field), self._max_field_width)
             try:
                 value = _str_or_expr_to_bibtex(entry[field])
 
@@ -176,12 +186,7 @@ def _entry_to_bibtex(self, entry):
                     #                      World}
                     # Calculate the indent of "World":
                     # Left of field (whitespaces before e.g. 'title')
-                    value_indent = len(self.indent)
-                    # Field itself (e.g. len('title'))
-                    if self._max_field_width > 0:
-                        value_indent += self._max_field_width
-                    else:
-                        value_indent += len(field)
+                    value_indent = len(self.indent) + max_field_width
                     # Right of field ' = ' (<- 3 chars) + '{' (<- 1 char)
                     value_indent += 3 + 1
 
@@ -190,7 +195,7 @@ def _entry_to_bibtex(self, entry):
                 bibtex += field_fmt.format(
                     indent=self.indent,
                     field=field,
-                    field_max_w=self._max_field_width,
+                    field_max_w=max_field_width,
                     value=value)
             except TypeError:
                 raise TypeError(u"The field %s in entry %s must be a string"
diff --git a/bibtexparser/tests/test_bibtexwriter.py b/bibtexparser/tests/test_bibtexwriter.py
@@ -70,7 +70,7 @@ def test_indent(self):
 """
         self.assertEqual(result, expected)
 
-    def test_align(self):
+    def test_align_bool(self):
         bib_database = BibDatabase()
         bib_database.entries = [{'ID': 'abc123',
                                  'ENTRYTYPE': 'book',
@@ -87,6 +87,22 @@ def test_align(self):
 """
         self.assertEqual(result, expected)
 
+        bib_database = BibDatabase()
+        bib_database.entries = [{'ID': 'veryveryverylongID',
+                                 'ENTRYTYPE': 'book',
+                                 'a': 'test',
+                                 'bb': 'longvalue'}]
+        writer = BibTexWriter()
+        writer.align_values = True
+        result = bibtexparser.dumps(bib_database, writer)
+        expected = \
+"""@book{veryveryverylongID,
+ a  = {test},
+ bb = {longvalue}
+}
+"""
+        self.assertEqual(result, expected)
+
         with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
             bib_database = bibtexparser.load(bibtex_file)
         writer = BibTexWriter()
@@ -121,6 +137,70 @@ def test_align(self):
 """
         self.assertEqual(result, expected)
 
+    def test_align_int(self):
+        bib_database = BibDatabase()
+        bib_database.entries = [{'ID': 'abc123',
+                                 'ENTRYTYPE': 'book',
+                                 'author': 'test',
+                                 'thisisaverylongkey': 'longvalue'}]
+        # Negative value should have no effect
+        writer = BibTexWriter()
+        writer.align_values = -20
+        result = bibtexparser.dumps(bib_database, writer)
+        expected = \
+"""@book{abc123,
+ author = {test},
+ thisisaverylongkey = {longvalue}
+}
+"""
+        self.assertEqual(result, expected)
+
+        # Value smaller than longest field name should only impact the "short" field names
+        writer = BibTexWriter()
+        writer.align_values = 10
+        result = bibtexparser.dumps(bib_database, writer)
+        expected = \
+"""@book{abc123,
+ author     = {test},
+ thisisaverylongkey = {longvalue}
+}
+"""
+        self.assertEqual(result, expected)
+
+
+        with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
+            bib_database = bibtexparser.load(bibtex_file)
+        writer = BibTexWriter()
+        writer.contents = ['entries']
+        writer.align_values = 15
+        result = bibtexparser.dumps(bib_database, writer)
+        expected = \
+"""@book{Toto3000,
+ author          = {Toto, A and Titi, B},
+ title           = {A title}
+}
+
+@article{Wigner1938,
+ author          = {Wigner, E.},
+ doi             = {10.1039/TF9383400029},
+ issn            = {0014-7672},
+ journal         = {Trans. Faraday Soc.},
+ owner           = {fr},
+ pages           = {29--41},
+ publisher       = {The Royal Society of Chemistry},
+ title           = {The transition state method},
+ volume          = {34},
+ year            = {1938}
+}
+
+@book{Yablon2005,
+ author          = {Yablon, A.D.},
+ publisher       = {Springer},
+ title           = {Optical fiber fusion slicing},
+ year            = {2005}
+}
+"""
+        self.assertEqual(result, expected)
 
     def test_entry_separator(self):
         bib_database = BibDatabase()
@@ -206,17 +286,17 @@ def test_align_multiline_values_with_align(self):
         result = bibtexparser.dumps(bib_database, writer)
         expected = \
 """@article{Cesar2013,
- author    = {Jean César},
- title     = {A mutline line title is very amazing. It should be
-              long enough to test multilines... with two lines or should we
-              even test three lines... What an amazing title.},
- year      = {2013},
- journal   = {Nice Journal},
- abstract  = {This is an abstract. This line should be long enough to test
-              multilines... and with a french érudit word},
- comments  = {A comment},
- keyword   = {keyword1, keyword2,
-              multiline-keyword1, multiline-keyword2}
+ author   = {Jean César},
+ title    = {A mutline line title is very amazing. It should be
+             long enough to test multilines... with two lines or should we
+             even test three lines... What an amazing title.},
+ year     = {2013},
+ journal  = {Nice Journal},
+ abstract = {This is an abstract. This line should be long enough to test
+             multilines... and with a french érudit word},
+ comments = {A comment},
+ keyword  = {keyword1, keyword2,
+             multiline-keyword1, multiline-keyword2}
 }
 """
         self.assertEqual(result, expected)
@@ -331,17 +411,17 @@ def test_align_multiline_values_with_align_with_indent(self):
         result = bibtexparser.dumps(bib_database, writer)
         expected = \
 """@article{Cesar2013,
-   author    = {Jean César},
-   title     = {A mutline line title is very amazing. It should be
-                long enough to test multilines... with two lines or should we
-                even test three lines... What an amazing title.},
-   year      = {2013},
-   journal   = {Nice Journal},
-   abstract  = {This is an abstract. This line should be long enough to test
-                multilines... and with a french érudit word},
-   comments  = {A comment},
-   keyword   = {keyword1, keyword2,
-                multiline-keyword1, multiline-keyword2}
+   author   = {Jean César},
+   title    = {A mutline line title is very amazing. It should be
+               long enough to test multilines... with two lines or should we
+               even test three lines... What an amazing title.},
+   year     = {2013},
+   journal  = {Nice Journal},
+   abstract = {This is an abstract. This line should be long enough to test
+               multilines... and with a french érudit word},
+   comments = {A comment},
+   keyword  = {keyword1, keyword2,
+               multiline-keyword1, multiline-keyword2}
 }
 """
         self.assertEqual(result, expected)