Skip to content

Commit 797f37c

Browse files
committed
fixes liverpdo experiments
1 parent 533f66b commit 797f37c

1 file changed

Lines changed: 16 additions & 3 deletions

File tree

build/liverpdo/04-experiments-liverpdo.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,9 @@ def download_experiments_data(synID:str , save_path:str = None, synToken:str = N
6060
### Parse Data Function
6161
def parse_experiments_excel_sheets(first_file_path, second_file_path):
6262
# read in the excel files
63-
first_exp_excel = pd.ExcelFile(open(first_experiments_path, 'rb'))
63+
first_exp_excel = pd.ExcelFile(open(first_file_path, 'rb'))
6464
first_experiments_dict = pd.read_excel(first_exp_excel, sheet_name=None, header=None)
65-
rest_exp_excel = pd.ExcelFile(open(rest_experiments_path, 'rb'))
65+
rest_exp_excel = pd.ExcelFile(open(second_file_path, 'rb'))
6666
rest_experiments_dict = pd.read_excel(rest_exp_excel, sheet_name=None, header=None)
6767
list_of_exp_excels = [first_experiments_dict,rest_experiments_dict]
6868
full_df_list = []
@@ -128,7 +128,20 @@ def merge_improve_samples_drugs(experiment_data:pd.DataFrame, samples_data_path:
128128
all_merged['source'] = "synapse"
129129
all_merged = all_merged.drop(columns={'drug_id','count', 'sample_name','Catalogue','chem_name','other_id','Drug'})
130130
all_merged = all_merged.rename(columns={'improve_drug_id':'Drug'})
131-
all_merged = all_merged.astype({'improve_sample_id':'int'})
131+
132+
# identify rows where improve_sample_id is NaN or non-finite
133+
all_merged['improve_sample_id'] = pd.to_numeric(all_merged['improve_sample_id'], errors='coerce')
134+
bad_mask = all_merged['improve_sample_id'].isna() | np.isinf(all_merged['improve_sample_id'])
135+
136+
print(f"Rows before dropping bad improve_sample_id: {len(all_merged)}")
137+
if bad_mask.any():
138+
print(f"{bad_mask.sum()} rows with missing/non-finite improve_sample_id will be dropped")
139+
# drop and report after
140+
all_merged = all_merged.loc[~bad_mask].copy()
141+
print(f"Rows after dropping: {len(all_merged)}")
142+
143+
# now safe to cast
144+
all_merged['improve_sample_id'] = all_merged['improve_sample_id'].astype(int)
132145
all_merged = all_merged[['study','time','DOSE','GROWTH','Drug','improve_sample_id','time_unit','source']]
133146
all_merged = all_merged.dropna() # drop na's bc that will also cause issues in curve fitting
134147

0 commit comments

Comments
 (0)