Merge pull request #299 from MiWeiss/master

MiWeiss · web-flow · commit 6c4a4ab8c1b6 · 2022-07-07T17:10:27.000+02:00
This merges all the minor changes I added to my fork into the main repo.

Much relies on other PRs (to which, at the time, I did not have access yet).
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,31 @@
+
+name: Tests
+
+on: [push, pull_request]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.6, 3.7, 3.8, 3.9, "3.10"]
+
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-node@v2-beta
+        with:
+          node-version: '12'
+          check-latest: true
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies (tf ${{ matrix.tf-version }} )
+        run: |
+          pip install -r requirements.txt
+
+      - name: Run Tests
+        run: |
+          python -m unittest discover -s ./bibtexparser/tests
diff --git a/.travis.yml b/.travis.yml
diff --git a/bibtexparser/bparser.py b/bibtexparser/bparser.py
@@ -77,7 +77,7 @@ def __init__(self, data=None,
                  common_strings=False,
                  add_missing_from_crossref=False):
         """
-        Creates a parser for rading BibTeX files
+        Creates a parser for reading BibTeX files
 
         :return: parser
         :rtype: `BibTexParser`
@@ -309,7 +309,7 @@ def _add_string(self, string_key, string):
         :type string: string
         """
         if string_key in self.bib_database.strings:
-            logger.warning('Overwritting existing string for key: %s.',
+            logger.warning('Overwriting existing string for key: %s.',
                            string_key)
         logger.debug(u'Store string: {} -> {}'.format(string_key, string))
         self.bib_database.strings[string_key] = self._clean_val(string)
diff --git a/bibtexparser/bwriter.py b/bibtexparser/bwriter.py
@@ -97,7 +97,6 @@ def write(self, bib_database):
         return bibtex
 
     def _entries_to_bibtex(self, bib_database):
-        bibtex = ''
         if self.order_entries_by:
             # TODO: allow sort field does not exist for entry
             entries = sorted(bib_database.entries, key=lambda x: BibDatabase.entry_sort_key(x, self.order_entries_by))
@@ -109,9 +108,7 @@ def _entries_to_bibtex(self, bib_database):
             widths = [max(map(len, entry.keys())) for entry in entries]
             self._max_field_width = max(widths)
 
-        for entry in entries:
-            bibtex += self._entry_to_bibtex(entry)
-        return bibtex
+        return self.entry_separator.join(self._entry_to_bibtex(entry) for entry in entries)
 
     def _entry_to_bibtex(self, entry):
         bibtex = ''
@@ -143,7 +140,7 @@ def _entry_to_bibtex(self, entry):
                 bibtex += '\n'+self.indent+','
             else:
                 bibtex += ','
-        bibtex += "\n}\n" + self.entry_separator
+        bibtex += "\n}\n"
         return bibtex
 
     def _comments_to_bibtex(self, bib_database):
diff --git a/bibtexparser/customization.py b/bibtexparser/customization.py
@@ -6,9 +6,9 @@
 Each of them takes a record and return the modified record.
 """
 
-import re
 import logging
-
+import re
+import warnings
 from builtins import str
 
 from bibtexparser.latexenc import latex_to_unicode, string_to_latex, protect_uppercase
@@ -72,14 +72,14 @@ def splitname(name, strict_mode=True):
     # We'll iterate over the input once, dividing it into a list of words for
     # each comma-separated section. We'll also calculate the case of each word
     # as we work.
-    sections = [[]]      # Sections of the name.
-    cases = [[]]         # 1 = uppercase, 0 = lowercase, -1 = caseless.
-    word = []            # Current word.
-    case = -1            # Case of the current word.
-    level = 0            # Current brace level.
-    bracestart = False   # Will the next character be the first within a brace?
-    controlseq = True    # Are we currently processing a control sequence?
-    specialchar = None   # Are we currently processing a special character?
+    sections = [[]]  # Sections of the name.
+    cases = [[]]  # 1 = uppercase, 0 = lowercase, -1 = caseless.
+    word = []  # Current word.
+    case = -1  # Case of the current word.
+    level = 0  # Current brace level.
+    bracestart = False  # Will the next character be the first within a brace?
+    controlseq = True  # Are we currently processing a control sequence?
+    specialchar = None  # Are we currently processing a special character?
 
     # Using an iterator allows us to deal with escapes in a simple manner.
     nameiter = iter(name)
@@ -246,12 +246,12 @@ def splitname(name, strict_mode=True):
                 firstl = cases.index(0) - len(cases)
                 lastl = -cases[::-1].index(0) - 1
                 if lastl == -1:
-                    lastl -= 1      # Cannot consume the rest of the string.
+                    lastl -= 1  # Cannot consume the rest of the string.
 
                 # Pull the parts out.
                 parts['first'] = p0[:firstl]
-                parts['von'] = p0[firstl:lastl+1]
-                parts['last'] = p0[lastl+1:]
+                parts['von'] = p0[firstl:lastl + 1]
+                parts['last'] = p0[lastl + 1:]
 
             # No lowercase: last is the last word, first is everything else.
             else:
@@ -287,7 +287,7 @@ def splitname(name, strict_mode=True):
             if 0 in lcases:
                 split = len(lcases) - lcases[::-1].index(0)
                 if split == len(lcases):
-                    split = 0            # Last cannot be empty.
+                    split = 0  # Last cannot be empty.
                 parts['von'] = sections[0][:split]
                 parts['last'] = sections[0][split:]
 
@@ -345,7 +345,8 @@ def author(record):
     """
     if "author" in record:
         if record["author"]:
-            record["author"] = getnames([i.strip() for i in record["author"].replace('\n', ' ').split(" and ")])
+            record["author"] = getnames([i.strip() for i in re.split(r"\ and\ ", record["author"].replace('\n', ' '),
+                                                                     flags=re.IGNORECASE)])
         else:
             del record["author"]
     return record
@@ -365,7 +366,8 @@ def editor(record):
         if record["editor"]:
             record["editor"] = getnames([i.strip() for i in record["editor"].replace('\n', ' ').split(" and ")])
             # convert editor to object
-            record["editor"] = [{"name": i, "ID": i.replace(',', '').replace(' ', '').replace('.', '')} for i in record["editor"]]
+            record["editor"] = [{"name": i, "ID": i.replace(',', '').replace(' ', '').replace('.', '')} for i in
+                                record["editor"]]
         else:
             del record["editor"]
     return record
@@ -417,7 +419,8 @@ def journal(record):
     if "journal" in record:
         # switch journal to object
         if record["journal"]:
-            record["journal"] = {"name": record["journal"], "ID": record["journal"].replace(',', '').replace(' ', '').replace('.', '')}
+            record["journal"] = {"name": record["journal"],
+                                 "ID": record["journal"].replace(',', '').replace(' ', '').replace('.', '')}
 
     return record
 
@@ -518,13 +521,21 @@ def homogenize_latex_encoding(record):
     :type record: dict
     :returns: dict -- the modified record.
     """
-    # First, we convert everything to unicode
+    #  First, we convert everything to unicode
     record = convert_to_unicode(record)
     # And then, we fall back
     for val in record:
         if val not in ('ID',):
             logger.debug('Apply string_to_latex to: %s', val)
-            record[val] = string_to_latex(record[val])
+            if isinstance(record[val], list):
+                record[val] = [
+                    string_to_latex(x) for x in record[val]
+                ]
+            elif isinstance(record[val], str):
+                record[val] = string_to_latex(record[val])
+            else:
+                warnings.warn('Unable to homogenize latex encoding for %s: Expected string or list,' % val,
+                              RuntimeWarning)
             if val == 'title':
                 logger.debug('Protect uppercase in title')
                 logger.debug('Before: %s', record[val])
@@ -543,6 +554,7 @@ def add_plaintext_fields(record):
     :type record: dict
     :returns: dict -- the modified record.
     """
+
     def _strip_string(string):
         for stripped in ['{', '}']:
             string = string.replace(stripped, "")
diff --git a/bibtexparser/tests/data/article_comma_first.bib b/bibtexparser/tests/data/article_comma_first.bib
@@ -16,3 +16,4 @@ @ARTICLE{ Baltazar2013
         , journal = {Nice Journal}
         , comments = {A comment}
         , keyword = {keyword1, keyword2}}
+
diff --git a/bibtexparser/tests/data/article_comma_first_and_trailing_comma_output.bib b/bibtexparser/tests/data/article_comma_first_and_trailing_comma_output.bib
@@ -12,4 +12,3 @@ @article{Cesar2013
  , year = {2013}
  ,
 }
-
diff --git a/bibtexparser/tests/data/article_output.bib b/bibtexparser/tests/data/article_output.bib
@@ -11,4 +11,3 @@ @article{Cesar2013
  volume = {12},
  year = {2013}
 }
-
diff --git a/bibtexparser/tests/data/article_trailing_comma_output.bib b/bibtexparser/tests/data/article_trailing_comma_output.bib
@@ -11,4 +11,3 @@ @article{Cesar2013
  volume = {12},
  year = {2013},
 }
-
diff --git a/bibtexparser/tests/data/article_with_annotation_output.bib b/bibtexparser/tests/data/article_with_annotation_output.bib
@@ -12,4 +12,3 @@ @article{Cesar2013
  volume = {12},
  year = {2013}
 }
-
diff --git a/bibtexparser/tests/data/article_with_strings_output.bib b/bibtexparser/tests/data/article_with_strings_output.bib
@@ -15,4 +15,3 @@ @article{Cesar2013
  volume = {12},
  year = {2013}
 }
-
diff --git a/bibtexparser/tests/data/book_capital_AND.bib b/bibtexparser/tests/data/book_capital_AND.bib
@@ -0,0 +1,8 @@
+@BOOK{Bird1987,
+  title = {Dynamics of Polymeric Liquid},
+  publisher = {Wiley Edition},
+  year = {1987},
+  author = {Bird, R.B. and Armstrong, R.C. AND Hassager, O.},
+  volume = {1},
+  edition = {2},
+}
diff --git a/bibtexparser/tests/data/book_comma_first.bib b/bibtexparser/tests/data/book_comma_first.bib
@@ -6,4 +6,3 @@ @book{Bird1987
    , volume = {1}
    , year = {1987}
 }
-
diff --git a/bibtexparser/tests/data/book_output.bib b/bibtexparser/tests/data/book_output.bib
@@ -6,4 +6,3 @@ @book{Bird1987
  volume = {1},
  year = {1987}
 }
-
diff --git a/bibtexparser/tests/data/multiple_entries_and_comments_output.bib b/bibtexparser/tests/data/multiple_entries_and_comments_output.bib
@@ -26,4 +26,3 @@ @book{Yablon2005
  title = {Optical fiber fusion slicing},
  year = {2005}
 }
-
diff --git a/bibtexparser/tests/data/multiple_entries_output.bib b/bibtexparser/tests/data/multiple_entries_output.bib
@@ -22,4 +22,3 @@ @book{Yablon2005
  title = {Optical fiber fusion slicing},
  year = {2005}
 }
-
diff --git a/bibtexparser/tests/test_bibtexparser.py b/bibtexparser/tests/test_bibtexparser.py
@@ -73,7 +73,6 @@ class TestBibtexparserWriteMethods(unittest.TestCase):
  volume = {1},
  year = {1987}
 }
-
 """
 
     def test_write_str(self):
diff --git a/bibtexparser/tests/test_bibtexwriter.py b/bibtexparser/tests/test_bibtexwriter.py
diff --git a/bibtexparser/tests/test_bparser.py b/bibtexparser/tests/test_bparser.py
diff --git a/bibtexparser/tests/test_crossref_resolving.py b/bibtexparser/tests/test_crossref_resolving.py
diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.py b/setup.py

Original file line number	Diff line number	Diff line change
`@@ -12,4 +12,3 @@ @article{Cesar2013`
`12`	`12`	`, year = {2013}`
`13`	`13`	`,`
`14`	`14`	`}`
`15`		`-`
Original file line number	Diff line number	Diff line change
`@@ -11,4 +11,3 @@ @article{Cesar2013`
`11`	`11`	`volume = {12},`
`12`	`12`	`year = {2013}`
`13`	`13`	`}`
`14`		`-`