Skip to content

Commit 8301d82

Browse files
added creating drug data to python script
1 parent a0c397c commit 8301d82

1 file changed

Lines changed: 22 additions & 1 deletion

File tree

build/liverpdo/03-drug-liverpdo.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,27 @@ def download_parse_drug_data(synID:str , save_path:str = None, synToken:str = No
5050
# Parse the downloaded excel file
5151
drugs_excel = pd.ExcelFile(open(drugs_filepath, 'rb'))
5252
drugs_data = pd.read_excel(drugs_excel)
53+
drugs_data.to_csv("/tmp/raw_druginfo.csv")
54+
55+
return(drugs_data)
5356

5457

55-
return(drugs_data)
58+
def create_liverpdo_drug_data(drug_info_path:str, prevDrugFilepath:str, output_drug_data_path:str):
59+
# import fitted drug data and get drug names from DRUG_NAME column
60+
drug_info_df = pd.read_csv(drug_info_path)
61+
liverpdo_drugs_df = pd.DataFrame({"chem_name":drug_info_df['Drug'].unique()})
62+
# if there is a prev drug file, check for new drugs
63+
if prevDrugFilepath != "":
64+
if prevDrugFilepath.__contains__(".tsv"):
65+
prev_drug_df = pd.read_csv(prevDrugFilepath, sep='\t')
66+
else:
67+
prev_drug_df = pd.read_csv(prevDrugFilepath)
68+
# get drugs that are only in the crcpdo_drugs_df (aka new drugs only)
69+
new_drugs_df = liverpdo_drugs_df[~liverpdo_drugs_df.chem_name.isin(prev_drug_df.chem_name)]
70+
else:
71+
# if there's no prev drugs, then all drugs are new
72+
new_drugs_df = liverpdo_drugs_df
73+
# get new drug names
74+
new_drug_names = new_drugs_df['chem_name'].unique()
75+
# call function that gets info for these drugs
76+
update_dataframe_and_write_tsv(new_drug_names,output_drug_data_path)

0 commit comments

Comments
 (0)