11#!/usr/bin/env python3
22# vim: set fileencoding=utf-8:
33
4- """Add/Update the language list at the bottom of all CC0 legalcode files.
4+ """Normalize file and add/update the language list at the bottom of all CC0
5+ legalcode files.
56"""
67
78# Copyright 2016, 2017 Creative Commons
@@ -81,7 +82,6 @@ def update_lang_footer(args, filename, content, lang_tags):
8182 FOOTER_COMMENTS) with a list of links based on the legalcode files
8283 currently present.
8384 """
84- print (f"{ filename } : inserting language footer links" )
8585 current_language = lang_tags_from_filenames (filename )[0 ]
8686 footer = ""
8787 for lang_tag in lang_tags :
@@ -109,6 +109,13 @@ def update_lang_footer(args, filename, content, lang_tags):
109109 # Use ASCII period
110110 period = "."
111111 replacement = f"{ start } \n { footer } { period } \n { end } "
112+ if target == replacement :
113+ print (
114+ f"{ filename } : Skipping unneeded insertion of language footer"
115+ " links"
116+ )
117+ else :
118+ print (f"{ filename } : Inserting language footer links" )
112119 if args .debug :
113120 new_content = content .replace (target , replacement , 1 )
114121 diff_changes (filename , content , new_content )
@@ -131,7 +138,10 @@ def insert_missing_lang_footer_comments(args, filename, content):
131138 present.
132139 """
133140 if has_footer_comments (content ):
134- print (f"{ filename } : language footer comments present: skipping insert" )
141+ print (
142+ f"{ filename } : Skipping unneeded language footer comments"
143+ "insertion"
144+ )
135145 return content
136146 print (f"{ filename } : inserting language footer HTML comments" )
137147 re_pattern = re .compile (
@@ -172,7 +182,7 @@ def insert_missing_lang_footer_comments(args, filename, content):
172182
173183
174184def has_correct_faq_officialtranslations (content ):
175- """Determine if the link to the tranlsation FAQ is correct.
185+ """Determine if the link to the translation FAQ is correct.
176186 """
177187 if content .find (f'"{ FAQ_TRANSLATION_LINK } "' ) == - 1 :
178188 return False
@@ -185,7 +195,8 @@ def normalize_faq_translation_link(args, filename, content):
185195 """
186196 if has_correct_faq_officialtranslations (content ):
187197 print (
188- f"{ filename } : correct translation FAQ link: skipping normalization"
198+ f"{ filename } : Skipping unneeded translation FAQ link"
199+ " normalization"
189200 )
190201 return content
191202 print (f"{ filename } : normalizing translation FAQ link" )
@@ -221,13 +232,77 @@ def normalize_faq_translation_link(args, filename, content):
221232 return content .replace (target , replacement , 1 )
222233
223234
235+ def has_correct_languages_anchor (content ):
236+ """Determine if language anchor uses id
237+ """
238+ if content .find ('id="languages"' ) == - 1 :
239+ return False
240+ return True
241+
242+
243+ def normalize_languages_anchor (args , filename , content ):
244+ """Replace name with id in languages anchor (HTML5 compatibility)
245+ """
246+ if has_correct_languages_anchor (content ):
247+ print (
248+ f"{ filename } : Skipping unneeded language anchor normalization"
249+ )
250+ return content
251+ print (f"{ filename } : normalizing language anchor id" )
252+ re_pattern = re .compile ("name=['\" ]languages['\" ]" , re .IGNORECASE )
253+ matches = re_pattern .search (content )
254+ if matches is None :
255+ print (
256+ f"{ filename } : ERROR: languages anchor not matched. Aborting"
257+ " processing"
258+ )
259+ return
260+ target = matches .group ()
261+ replacement = 'id="languages"'
262+ if args .debug :
263+ new_content = content .replace (target , replacement , 1 )
264+ diff_changes (filename , content , new_content )
265+ return new_content
266+ else :
267+ return content .replace (target , replacement , 1 )
268+
269+
270+ def normalize_line_endings (args , filename , content ):
271+ """Normalize line endings to unix LF (\\ n)
272+ """
273+ re_pattern = re .compile ("\r (?!\n )" )
274+ matches = re_pattern .findall (content )
275+ message = ""
276+ if matches :
277+ message = f" { len (matches )} mac newlines (CR)"
278+ re_pattern = re .compile ("\r \n " )
279+ matches = re_pattern .findall (content )
280+ if matches :
281+ if message :
282+ message = f"{ message } and"
283+ message = f"{ message } { len (matches )} windows newlines (CRLF)"
284+ if message :
285+ print (f"{ filename } : Converting{ message } to unix newlines (LF)" )
286+ return "\n " .join (content .split ("\r \n " ))
287+ else :
288+ print (f"{ filename } : Skipping unneeded newline conversion" )
289+ return content
290+
291+
224292def process_file_contents (args , file_list , lang_tags ):
225293 """Process each of the CC0 legalcode files and update them, as necessary.
226294 """
227295 for filename in file_list :
228- with open (filename , "r" , encoding = "utf-8" ) as file_in :
296+ with open (filename , "r" , encoding = "utf-8" , newline = "" ) as file_in :
229297 content = file_in .read ()
230- new_content = normalize_faq_translation_link (args , filename , content )
298+ new_content = content
299+ new_content = normalize_line_endings (args , filename , new_content )
300+ new_content = normalize_languages_anchor (args , filename , new_content )
301+ if new_content is None :
302+ sys .exit (1 )
303+ new_content = normalize_faq_translation_link (
304+ args , filename , new_content
305+ )
231306 if new_content is None :
232307 sys .exit (1 )
233308 new_content = insert_missing_lang_footer_comments (
@@ -241,9 +316,11 @@ def process_file_contents(args, file_list, lang_tags):
241316 if new_content is None :
242317 sys .exit (1 )
243318 if content == new_content :
244- print (f"{ filename } : No changes: skipping writing back to file" )
319+ print (
320+ f"{ filename } : Skipping writing back to file (no changes)"
321+ )
245322 elif args .debug :
246- print (f"{ filename } : DEBUG: skipping writing changes to file" )
323+ print (f"{ filename } : DEBUG: Skipping writing changes to file" )
247324 else :
248325 print (f"{ filename } : Writing changes to file" )
249326 with open (filename , "w" , encoding = "utf-8" ) as file_out :
0 commit comments