Skip to content

Commit 14d55c5

Browse files
author
Dammy Desktop
committed
fix imports
1 parent c6e5ee1 commit 14d55c5

1 file changed

Lines changed: 128 additions & 0 deletions

File tree

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
import pandas as pd
2+
import re
3+
import yaml
4+
import platform
5+
from pathlib import Path
6+
from argparse import ArgumentParser
7+
8+
from tqdm import tqdm
9+
10+
from .paths import posix_from_win
11+
12+
13+
def extract_suffix(filename: str) -> int:
14+
"""Extracts the _XXX suffix as an integer. Returns None if not found."""
15+
match = re.search(r'_(\d{3})(?=\.\w+$|$)', filename)
16+
return int(match.group(1)) if match else None
17+
18+
19+
def validate_sequence(suffixes: list[int], animal: str, date: str):
20+
"""Checks if the 3-digit suffixes follow a continuous sequence starting at 0."""
21+
if not suffixes:
22+
return
23+
24+
suffixes = sorted(suffixes)
25+
expected = list(range(min(suffixes), max(suffixes) + 1))
26+
27+
if suffixes != expected:
28+
missing = set(expected) - set(suffixes)
29+
print(f"![Sequence Gap] {animal} on {date}: Missing suffixes {missing}")
30+
31+
if min(suffixes) != 0:
32+
print(f"![Sequence Note] {animal} on {date}: Sequence starts at {min(suffixes)} instead of 000")
33+
34+
35+
def get_animal_topology(animal, tdata_root, match_roots):
36+
# 1. Get Core Trial Data
37+
all_tdata = sorted(list((tdata_root / animal / 'TrialData').glob('*.csv')))
38+
39+
# Group by date to validate sequences per day
40+
date_groups = {}
41+
for f in all_tdata:
42+
suffix = extract_suffix(f.name)
43+
if suffix is None: continue # Ignore files without _00X
44+
45+
date_match = re.search(r'\d{6}', f.name)
46+
if not date_match: continue
47+
48+
date = date_match.group()
49+
date_groups.setdefault(date, []).append((suffix, f))
50+
51+
topology_rows = []
52+
53+
for date, files in date_groups.items():
54+
# Validate the sequence for this day
55+
suffixes = [f[0] for f in files]
56+
validate_sequence(suffixes, animal, date)
57+
58+
for suffix, t_file in files:
59+
suffix_str = f"{suffix:03d}"
60+
row = {
61+
'name': animal,
62+
'date': date,
63+
'suffix': suffix_str,
64+
'tdata_file': t_file,
65+
'status': 'Complete'
66+
}
67+
68+
# 2. Match other types by Date AND Suffix
69+
for label, root in match_roots.items():
70+
if 'bin' in label:
71+
data_type = 'Hit' if 'beh' in label else 'Sound'
72+
pattern = f"{animal}*{data_type}Data*{date}*_{suffix_str}.bin"
73+
else:
74+
pattern = f"*{animal}*{date}*_{suffix_str}*"
75+
76+
matches = [c for c in root.glob(pattern) if extract_suffix(c.name) == suffix]
77+
78+
if len(matches) == 1:
79+
row[label] = matches[0]
80+
else:
81+
row[label] = None
82+
row['status'] = 'Partial' # Flag if any file is missing
83+
84+
topology_rows.append(row)
85+
86+
return topology_rows
87+
88+
89+
if __name__ == '__main__':
90+
parser = ArgumentParser()
91+
parser.add_argument('config_file')
92+
parser.add_argument('animals')
93+
parser.add_argument('--sess_top_suffix',default='')
94+
95+
with open(parser.parse_args().config_file,'r') as file:
96+
config = yaml.safe_load(file)
97+
sys_os = platform.system().lower()
98+
ceph_dir = Path(config[f'ceph_dir_{sys_os}'])
99+
assert ceph_dir.is_dir()
100+
101+
args = parser.parse_args()
102+
home_dir = Path(config[f'home_dir_{sys_os}'])
103+
tdata_dir = home_dir/'data'
104+
assert tdata_dir.is_dir()
105+
106+
exp_name_dict = {
107+
'DO': 'Dammy',
108+
'RS': 'Ryan',
109+
'LP': 'Lida',
110+
'JW': 'JungWoo'
111+
}
112+
113+
match_roots = {'videos_dir': ceph_dir / posix_from_win(r'X:\Dammy\mouse_pupillometry\mouse_hf'),
114+
'beh_bin': ceph_dir / posix_from_win(r'X:\Dammy\harpbins'),
115+
'sound_bin': ceph_dir / posix_from_win(r'X:\Dammy\harpbins')}
116+
117+
all_data = []
118+
animals = args.animals.split(',')
119+
for animal in tqdm(animals,total=len(animals),desc='Processing animals'):
120+
exp_name = exp_name_dict.get(animal[:2])
121+
122+
all_data.extend(get_animal_topology(animal, tdata_dir/exp_name, match_roots=match_roots))
123+
124+
projectdir = ceph_dir / posix_from_win(r'X:\Dammy\Xdetection_mouse_hf_test')
125+
csv_path = projectdir / f'session_topology{f"_{args.sess_top_suffix}" if args.sess_top_suffix else ""}.csv'
126+
127+
df = pd.DataFrame(all_data)
128+
df.to_csv(csv_path, index=False)

0 commit comments

Comments
 (0)