@@ -50,6 +50,27 @@ def download_parse_drug_data(synID:str , save_path:str = None, synToken:str = No
5050 # Parse the downloaded excel file
5151 drugs_excel = pd .ExcelFile (open (drugs_filepath , 'rb' ))
5252 drugs_data = pd .read_excel (drugs_excel )
53+ drugs_data .to_csv ("/tmp/raw_druginfo.csv" )
54+
55+ return (drugs_data )
5356
5457
55- return (drugs_data )
58+ def create_liverpdo_drug_data (drug_info_path :str , prevDrugFilepath :str , output_drug_data_path :str ):
59+ # import fitted drug data and get drug names from DRUG_NAME column
60+ drug_info_df = pd .read_csv (drug_info_path )
61+ liverpdo_drugs_df = pd .DataFrame ({"chem_name" :drug_info_df ['Drug' ].unique ()})
62+ # if there is a prev drug file, check for new drugs
63+ if prevDrugFilepath != "" :
64+ if prevDrugFilepath .__contains__ (".tsv" ):
65+ prev_drug_df = pd .read_csv (prevDrugFilepath , sep = '\t ' )
66+ else :
67+ prev_drug_df = pd .read_csv (prevDrugFilepath )
68+ # get drugs that are only in the crcpdo_drugs_df (aka new drugs only)
69+ new_drugs_df = liverpdo_drugs_df [~ liverpdo_drugs_df .chem_name .isin (prev_drug_df .chem_name )]
70+ else :
71+ # if there's no prev drugs, then all drugs are new
72+ new_drugs_df = liverpdo_drugs_df
73+ # get new drug names
74+ new_drug_names = new_drugs_df ['chem_name' ].unique ()
75+ # call function that gets info for these drugs
76+ update_dataframe_and_write_tsv (new_drug_names ,output_drug_data_path )
0 commit comments