1+ import pandas as pd
2+ import re
3+ import yaml
4+ import platform
5+ from pathlib import Path
6+ from argparse import ArgumentParser
7+
8+ from tqdm import tqdm
9+
10+ from .paths import posix_from_win
11+
12+
13+ def extract_suffix (filename : str ) -> int :
14+ """Extracts the _XXX suffix as an integer. Returns None if not found."""
15+ match = re .search (r'_(\d{3})(?=\.\w+$|$)' , filename )
16+ return int (match .group (1 )) if match else None
17+
18+
19+ def validate_sequence (suffixes : list [int ], animal : str , date : str ):
20+ """Checks if the 3-digit suffixes follow a continuous sequence starting at 0."""
21+ if not suffixes :
22+ return
23+
24+ suffixes = sorted (suffixes )
25+ expected = list (range (min (suffixes ), max (suffixes ) + 1 ))
26+
27+ if suffixes != expected :
28+ missing = set (expected ) - set (suffixes )
29+ print (f"![Sequence Gap] { animal } on { date } : Missing suffixes { missing } " )
30+
31+ if min (suffixes ) != 0 :
32+ print (f"![Sequence Note] { animal } on { date } : Sequence starts at { min (suffixes )} instead of 000" )
33+
34+
35+ def get_animal_topology (animal , tdata_root , match_roots ):
36+ # 1. Get Core Trial Data
37+ all_tdata = sorted (list ((tdata_root / animal / 'TrialData' ).glob ('*.csv' )))
38+
39+ # Group by date to validate sequences per day
40+ date_groups = {}
41+ for f in all_tdata :
42+ suffix = extract_suffix (f .name )
43+ if suffix is None : continue # Ignore files without _00X
44+
45+ date_match = re .search (r'\d{6}' , f .name )
46+ if not date_match : continue
47+
48+ date = date_match .group ()
49+ date_groups .setdefault (date , []).append ((suffix , f ))
50+
51+ topology_rows = []
52+
53+ for date , files in date_groups .items ():
54+ # Validate the sequence for this day
55+ suffixes = [f [0 ] for f in files ]
56+ validate_sequence (suffixes , animal , date )
57+
58+ for suffix , t_file in files :
59+ suffix_str = f"{ suffix :03d} "
60+ row = {
61+ 'name' : animal ,
62+ 'date' : date ,
63+ 'suffix' : suffix_str ,
64+ 'tdata_file' : t_file ,
65+ 'status' : 'Complete'
66+ }
67+
68+ # 2. Match other types by Date AND Suffix
69+ for label , root in match_roots .items ():
70+ if 'bin' in label :
71+ data_type = 'Hit' if 'beh' in label else 'Sound'
72+ pattern = f"{ animal } *{ data_type } Data*{ date } *_{ suffix_str } .bin"
73+ else :
74+ pattern = f"*{ animal } *{ date } *_{ suffix_str } *"
75+
76+ matches = [c for c in root .glob (pattern ) if extract_suffix (c .name ) == suffix ]
77+
78+ if len (matches ) == 1 :
79+ row [label ] = matches [0 ]
80+ else :
81+ row [label ] = None
82+ row ['status' ] = 'Partial' # Flag if any file is missing
83+
84+ topology_rows .append (row )
85+
86+ return topology_rows
87+
88+
89+ if __name__ == '__main__' :
90+ parser = ArgumentParser ()
91+ parser .add_argument ('config_file' )
92+ parser .add_argument ('animals' )
93+ parser .add_argument ('--sess_top_suffix' ,default = '' )
94+
95+ with open (parser .parse_args ().config_file ,'r' ) as file :
96+ config = yaml .safe_load (file )
97+ sys_os = platform .system ().lower ()
98+ ceph_dir = Path (config [f'ceph_dir_{ sys_os } ' ])
99+ assert ceph_dir .is_dir ()
100+
101+ args = parser .parse_args ()
102+ home_dir = Path (config [f'home_dir_{ sys_os } ' ])
103+ tdata_dir = home_dir / 'data'
104+ assert tdata_dir .is_dir ()
105+
106+ exp_name_dict = {
107+ 'DO' : 'Dammy' ,
108+ 'RS' : 'Ryan' ,
109+ 'LP' : 'Lida' ,
110+ 'JW' : 'JungWoo'
111+ }
112+
113+ match_roots = {'videos_dir' : ceph_dir / posix_from_win (r'X:\Dammy\mouse_pupillometry\mouse_hf' ),
114+ 'beh_bin' : ceph_dir / posix_from_win (r'X:\Dammy\harpbins' ),
115+ 'sound_bin' : ceph_dir / posix_from_win (r'X:\Dammy\harpbins' )}
116+
117+ all_data = []
118+ animals = args .animals .split (',' )
119+ for animal in tqdm (animals ,total = len (animals ),desc = 'Processing animals' ):
120+ exp_name = exp_name_dict .get (animal [:2 ])
121+
122+ all_data .extend (get_animal_topology (animal , tdata_dir / exp_name , match_roots = match_roots ))
123+
124+ projectdir = ceph_dir / posix_from_win (r'X:\Dammy\Xdetection_mouse_hf_test' )
125+ csv_path = projectdir / f'session_topology{ f"_{ args .sess_top_suffix } " if args .sess_top_suffix else "" } .csv'
126+
127+ df = pd .DataFrame (all_data )
128+ df .to_csv (csv_path , index = False )
0 commit comments