88from collections import defaultdict
99
1010
11- ##### Load, make structure, save functions for mapping .json
11+ ##### Load, make structure, save functions for improve_mapping .json
1212
13- def load_mapping (mapping_file = 'mapping .json' ):
13+ def load_mapping (mapping_file = 'improve_mapping .json' ):
1414 """
15- Loads an existing mapping .json if available.
15+ Loads an existing improve_mapping .json if available.
1616 Otherwise returns an empty base structure with metadata and samples.
1717 """
1818 if os .path .exists (mapping_file ):
@@ -26,7 +26,7 @@ def load_mapping(mapping_file='mapping.json'):
2626 "samples" : []
2727 }, False
2828
29- def save_mapping (mapping_data , mapping_file = 'mapping .json' ):
29+ def save_mapping (mapping_data , mapping_file = 'improve_mapping .json' ):
3030 """Saves mapping data to disk as JSON."""
3131 with open (mapping_file , 'w' ) as f :
3232 json .dump (mapping_data , f , indent = 2 )
@@ -117,15 +117,15 @@ def generate_new_stable_id(samples_list):
117117 return "1"
118118
119119
120- ##### Assign stable ID based on improve_sample_id, triplet, and previous mapping .json file
120+ ##### Assign stable ID based on improve_sample_id, triplet, and previous improve_mapping .json file
121121
122122
123123def unify_samples (mapping_data , all_samples_rows , current_build_metadata ):
124124 """
125125 Assign stable IDs to samples based on triplet overlaps.
126126
127127 Parameters:
128- mapping_data: existing mapping data loaded from mapping .json
128+ mapping_data: existing mapping data loaded from improve_mapping .json
129129 all_samples_rows: list of row dicts from the new build's samples CSVs
130130 current_build_metadata: dict containing build_date, version, etc.
131131
@@ -403,9 +403,9 @@ def rewrite_other_file(file_path, sample_id_mapping, datasets=None):
403403def main ():
404404 parser = argparse .ArgumentParser (description = """
405405Use triplet overlaps to assign stable IDs across builds.
406- In the first build, generate mapping .json without rewriting files.
406+ In the first build, generate improve_mapping .json without rewriting files.
407407In subsequent builds, match samples via triplet overlaps, assign stable IDs,
408- update mapping .json, and rewrite files by replacing improve_sample_id with stable_id.
408+ update improve_mapping .json, and rewrite files by replacing improve_sample_id with stable_id.
409409""" )
410410 parser .add_argument ('--build_date' , default = None ,
411411 help = 'Build date in YYYY-MM-DD. Default=now.' )
@@ -426,8 +426,8 @@ def main():
426426 # Set build_date
427427 build_date = args .build_date or datetime .utcnow ().strftime ("%Y-%m-%d" )
428428
429- # Load or initialize mapping .json
430- mapping_file = "mapping .json"
429+ # Load or initialize improve_mapping .json
430+ mapping_file = "improve_mapping .json"
431431 mapping_data , had_prior = load_mapping (mapping_file )
432432
433433 # Insert current build metadata
@@ -436,7 +436,7 @@ def main():
436436 if not any (b ["build_date" ] == build_date and b ["version" ] == args .version for b in mapping_data ["metadata" ]["builds" ]):
437437 mapping_data ["metadata" ]["builds" ].append (current_build_metadata )
438438 else :
439- print (f"Build with date { build_date } and version { args .version } already exists in mapping .json." )
439+ print (f"Build with date { build_date } and version { args .version } already exists in improve_mapping .json." )
440440 return
441441
442442 # Prepare dataset and file type lists
@@ -455,7 +455,7 @@ def main():
455455 is_first_build = not had_prior and not mapping_data ["samples" ]
456456
457457 if is_first_build :
458- print ("First build detected. Initializing mapping .json without rewriting files." )
458+ print ("First build detected. Initializing improve_mapping .json without rewriting files." )
459459
460460 # Assign stable_ids as improve_sample_id
461461 # Group samples by improve_sample_id to aggregate triplets
@@ -497,7 +497,7 @@ def main():
497497 key = lambda x : int (x ["stable_id" ]) if x ["stable_id" ].isdigit () else x ["stable_id" ]
498498 )
499499
500- # Save mapping .json
500+ # Save improve_mapping .json
501501 save_mapping (mapping_data , mapping_file )
502502 print (f"mapping.json created with { len (mapping_data ['samples' ])} samples." )
503503 print ("No file rewriting needed for the first build." )
@@ -515,7 +515,7 @@ def main():
515515 key = lambda x : int (x ["stable_id" ]) if x ["stable_id" ].isdigit () else x ["stable_id" ]
516516 )
517517
518- # Save updated mapping .json
518+ # Save updated improve_mapping .json
519519 save_mapping (mapping_data , mapping_file )
520520 print (f"mapping.json updated with { len (mapping_data ['samples' ])} samples." )
521521
@@ -540,7 +540,7 @@ def main():
540540 rewrite_other_file (gz_other_file , sample_id_mapping , datasets = ds_list )
541541
542542 print ("All files have been rewritten with stable IDs." )
543- print ("Stable IDs have been updated in mapping .json." )
543+ print ("Stable IDs have been updated in improve_mapping .json." )
544544
545545if __name__ == "__main__" :
546546 main ()
0 commit comments