add newline conversation and languages anchor name->id for HTML5

TimidRobot · TimidRobot · commit da802d9227e9 · 2020-06-03T10:03:53.000-07:00
diff --git a/tools/cc0_update.py b/tools/cc0_update.py
@@ -81,7 +81,6 @@ def update_lang_footer(args, filename, content, lang_tags):
     FOOTER_COMMENTS) with a list of links based on the legalcode files
     currently present.
     """
-    print(f"{filename}: inserting language footer links")
     current_language = lang_tags_from_filenames(filename)[0]
     footer = ""
     for lang_tag in lang_tags:
@@ -109,6 +108,13 @@ def update_lang_footer(args, filename, content, lang_tags):
         # Use ASCII period
         period = "."
     replacement = f"{start}\n{footer}{period}\n{end}"
+    if target == replacement:
+        print(
+            f"{filename}:     Skipping unneeded insertion of language footer"
+            " links"
+        )
+    else:
+        print(f"{filename}: Inserting language footer links")
     if args.debug:
         new_content = content.replace(target, replacement, 1)
         diff_changes(filename, content, new_content)
@@ -131,7 +137,10 @@ def insert_missing_lang_footer_comments(args, filename, content):
     present.
     """
     if has_footer_comments(content):
-        print(f"{filename}: language footer comments present: skipping insert")
+        print(
+            f"{filename}:     Skipping unneeded language footer comments"
+            "insertion"
+        )
         return content
     print(f"{filename}: inserting language footer HTML comments")
     re_pattern = re.compile(
@@ -185,7 +194,8 @@ def normalize_faq_translation_link(args, filename, content):
     """
     if has_correct_faq_officialtranslations(content):
         print(
-            f"{filename}: correct translation FAQ link: skipping normalization"
+            f"{filename}:     Skipping unneeded translation FAQ link"
+            " normalization"
         )
         return content
     print(f"{filename}: normalizing translation FAQ link")
@@ -221,13 +231,78 @@ def normalize_faq_translation_link(args, filename, content):
         return content.replace(target, replacement, 1)
 
 
+def has_correct_languages_anchor(content):
+    """Determine if the link to the tranlsation FAQ is correct.
+    """
+    if content.find('id="languages"') == -1:
+        return False
+    return True
+
+
+def normalize_languages_anchor(args, filename, content):
+    """Replace various incorrect translation FAQ links with the correct link
+    (FAQ_TRANSLATION_LINK).
+    """
+    if has_correct_languages_anchor(content):
+        print(
+            f"{filename}:     Skipping unneeded language anchor normalization"
+        )
+        return content
+    print(f"{filename}: normalizing language anchor id")
+    re_pattern = re.compile("name=['\"]languages['\"]", re.IGNORECASE)
+    matches = re_pattern.search(content)
+    if matches is None:
+        print(
+            f"{filename}: ERROR: languages anchor not matched. Aborting"
+            " processing"
+        )
+        return
+    target = matches.group()
+    replacement = 'id="languages"'
+    if args.debug:
+        new_content = content.replace(target, replacement, 1)
+        diff_changes(filename, content, new_content)
+        return new_content
+    else:
+        return content.replace(target, replacement, 1)
+
+
+def normalize_line_endings(args, filename, content):
+    """Normalize line endings to unix (\\n)
+    """
+    re_pattern = re.compile("\r(?!\n)")
+    matches = re_pattern.findall(content)
+    message = ""
+    if matches:
+        message = f" {len(matches)} mac newlines (CR)"
+    re_pattern = re.compile("\r\n")
+    matches = re_pattern.findall(content)
+    if matches:
+        if message:
+            message = f"{message} and"
+        message = f"{message} {len(matches)} windows newlines (CRLF)"
+    if message:
+        print(f"{filename}: Converting{message} to unix newlines (LF)")
+        return "\n".join(content.split("\r\n"))
+    else:
+        print(f"{filename}:     Skipping unneeded newline conversion")
+        return content
+
+
 def process_file_contents(args, file_list, lang_tags):
     """Process each of the CC0 legalcode files and update them, as necessary.
     """
     for filename in file_list:
-        with open(filename, "r", encoding="utf-8") as file_in:
+        with open(filename, "r", encoding="utf-8", newline="") as file_in:
             content = file_in.read()
-        new_content = normalize_faq_translation_link(args, filename, content)
+        new_content = content
+        new_content = normalize_line_endings(args, filename, new_content)
+        new_content = normalize_languages_anchor(args, filename, new_content)
+        if new_content is None:
+            sys.exit(1)
+        new_content = normalize_faq_translation_link(
+            args, filename, new_content
+        )
         if new_content is None:
             sys.exit(1)
         new_content = insert_missing_lang_footer_comments(
@@ -241,9 +316,11 @@ def process_file_contents(args, file_list, lang_tags):
         if new_content is None:
             sys.exit(1)
         if content == new_content:
-            print(f"{filename}: No changes: skipping writing back to file")
+            print(
+                f"{filename}:     Skipping writing back to file (no changes)"
+            )
         elif args.debug:
-            print(f"{filename}: DEBUG: skipping writing changes to file")
+            print(f"{filename}: DEBUG:     Skipping writing changes to file")
         else:
             print(f"{filename}: Writing changes to file")
             with open(filename, "w", encoding="utf-8") as file_out: