44import argparse
55import os
66
7- def get_novartis_pdx_experiments_file :
7+
8+ # add improve IDs - for sample and drug
9+ def get_novartis_pdx_experiments_file (synObject , samples_df ):
810 # input for the calc_pdx_metrics script
911
1012 file1 = synObject .get ('syn66276102' )
@@ -29,31 +31,38 @@ def get_novartis_pdx_experiments_file:
2931 todiscard = unique_vals_tally [unique_vals_tally ['treatment' ]== 1 ].index
3032 novartispdx_curvefile = novartispdx_curvefile [~ novartispdx_curvefile ['experiment' ].isin (todiscard )]
3133 # remove groups with no 'control' treatment
32- groupeddf = test .groupby ('experiment' )
34+ groupeddf = novartispdx_curvefile .groupby ('experiment' )
3335 no_control = groupeddf ['treatment' ].apply (lambda x : x .str .contains ('control' ).any ())
3436
3537 missingcontrols = no_control .reset_index ()[no_control .reset_index ()['treatment' ] == False ]['experiment' ]
36- finaldf = test [~ test ['experiment' ].isin (missingcontrols )]
37-
38- finalcurvefile = finaldf
38+ nomissingcontrols = novartispdx_curvefile [~ novartispdx_curvefile ['experiment' ].isin (missingcontrols )]
39+ #merge on drug names done in calc_pdx_metrics.py
40+ #final_w_drugIDs = finaldf.merge(drug_df, how='left',right_on='chem_name', left_on="treatment")
41+ final_allIDs = nomissingcontrols .merge (samples_df , how = 'left' , right_on = 'common_name' , left_on = 'model_id' )
42+ print (final_allIDs .head )
43+ final_allIDs = final_allIDs .drop ('model_id' , axis = 1 )
44+ finalDF = final_allIDs .rename ({'improve_sample_id' :'model_id' }, axis = 1 )
45+ print (finalDF .head )
46+ finalcurvefile = finalDF [['model_id' , 'time' , 'volume' , 'treatment' , 'experiment' , 'dose' ]]
47+ print (finalcurvefile .head )
3948 return finalcurvefile
40- #finalcurvefile.to_csv('/tmp/novartispdx_doserep.tsv', sep="\t")
4149
4250
4351if __name__ == "__main__" :
4452 parser = argparse .ArgumentParser ()
4553 parser .add_argument ('-t' , '--token' , help = 'Synapse authentication token' )
46- parser .add_argument ('-s' , '--curSampleFile' , help = 'Sample mapping file for bladder pdo samples' )
47- parser .add_argument ('-d' , '--drugfile' , help = 'Drug mapping file for bladder pdo samples' )
48- parser .add_argument ('-o' , '--output' , default = '/tmp/novartispdx_doserep .tsv' ,help = 'Output file to be read into curve fitting code ' )
54+ parser .add_argument ('-s' , '--curSampleFile' , default = '/tmp/novartispdx_samples.csv' , help = 'Sample mapping file for bladder pdo samples' )
55+ parser .add_argument ('-d' , '--drugfile' , default = '/tmp/novartispdx_drugs.tsv' , help = 'Drug mapping file for bladder pdo samples' )
56+ parser .add_argument ('-o' , '--output' , default = '/tmp/novartispdx_experiments .tsv' ,help = 'Output experiments file ' )
4957
5058 args = parser .parse_args ()
5159 print ("Logging into Synapse" )
5260 PAT = args .token
5361 synObject = synapseclient .login (authToken = PAT )
54- drug_df = pd .read_csv (args .drugfile , sep = '\t ' )
62+ # drug_df = pd.read_csv(args.drugfile, sep='\t')
5563 samples_df = pd .read_csv (args .curSampleFile )
56-
57- doseresponse_data = get_novartis_pdx_experiments_file (synObject , samples_df , drug_df )
58- doseresponse_data .to_csv (args .output , sep = '\t ' )
64+
65+ doseresponse_data = get_novartis_pdx_experiments_file (synObject , samples_df )
66+ print (doseresponse_data .head )
67+ doseresponse_data .to_csv ('/tmp/novartispdx_curvedata.tsv' , columns = list ({'model_id' , 'time' , 'volume' , 'treatment' ,'experiment' , 'dose' }), sep = '\t ' )
5968
0 commit comments