55import argparse
66import synapseclient as sc
77import math
8-
8+ import re
99
1010def main ():
1111 ##current AUC values are here: https://aacr.figshare.com/ndownloader/files/39996295 tabs 2 and 3
@@ -17,14 +17,17 @@ def main():
1717
1818 args = parser .parse_args ()
1919 newdata = get_data (args .pat )
20- newdata = newdata .rename (columns = {'Organoid' :'other_id' ,'Drug' :'chem_name' ,'Dose' :'DOSE' ,'FracResponse' :'GROWTH' ,'Passage' :'time' })
20+ newdata = newdata .rename (columns = {'Organoid' :'other_id' ,'Drug' :'chem_name' ,'Dose' :'DOSE' ,'PercResponse' :'GROWTH' ,'Passage' :'time' })
21+ # print(newdata)
2122 newdata = newdata [['other_id' ,'chem_name' ,'DOSE' ,'GROWTH' ]]
2223 newdata [['time' ]]= '120'
2324 newdata [['time_unit' ]]= 'hours'
2425 newdata [['study' ]]= 'pancpdo'
2526 newdata [['source' ]]= 'TiriacEtAl2018'
27+ print ('collected doses and response for ' + str (len (set (newdata .chem_name )))+ ' drugs and ' + str (len (set (newdata .other_id )))+ ' samples' )
2628# 'source', 'improve_sample_id', 'Drug', 'study','time','time_unit'
27- mappedresponse = map_drugs_to_samps (newddata ,args .drugs ,args ,samples )
29+ mappedresponse = map_to_drugs_samps (newdata ,args .drugs ,args .samples )
30+ print ('mapped doses and response for ' + str (len (set (mappedresponse .Drug )))+ ' drugs and ' + str (len (set (mappedresponse .improve_sample_id )))+ ' samples' )
2831 mappedresponse .to_csv (args .output , sep = '\t ' , index = False )
2932
3033def map_to_drugs_samps (dose_rep ,drugfile ,sampfile ):
@@ -35,8 +38,10 @@ def map_to_drugs_samps(dose_rep,drugfile,sampfile):
3538 samps = pd .read_csv (sampfile )
3639
3740 merged = dose_rep .merge (drugs ).merge (samps )
38- merged = merged [['improve_sample_id' ,'improve_drug_id' ,'DOSE' ,'GROWTH' ,'time' ,'time_unit' ,'study' ,'source' ]]
39- merged = merged .rename (columns = {'improve_drug_id' :'Drug' })
41+
42+ merged = merged .rename (columns = {'improve_drug_id' :'Drug' })
43+ merged = merged [['improve_sample_id' ,'Drug' ,'DOSE' ,'GROWTH' ,'time' ,'time_unit' ,'study' ,'source' ]].drop_duplicates ()
44+ print (merged )
4045 return merged
4146
4247def get_data (token ):
@@ -72,10 +77,13 @@ def get_data(token):
7277
7378
7479 ##now melt the data into single columns
75- rtab = responses .melt (id_vars = responses .columns [0 :4 ],value_vars = responses .columns [4 :10 ], var_name = 'Drug' ,value_name = 'Response' )
76-
80+ rtab = responses .melt (id_vars = responses .columns [0 :4 ],value_vars = responses .columns [4 :20 ], var_name = 'Drug' ,value_name = 'Response' )
81+ print ('Collected results from ' + str (len (set (rtab .Drug )))+ ' drugs and ' + str (len (set (rtab .Organoid )))+ ' organoids' )
82+ #print(set(rtab.Drug))
7783 ##rename the drugs
7884 rtab [['Drug' ,'Rep' ]]= rtab ['Drug' ].str .lower ().str .split ('.' ,expand = True )
85+ rtab .Drug = [re .sub ('-' ,'' ,a ) for a in rtab .Drug ]
86+ #print(set(rtab.Drug))
7987 newrep = []
8088 for r in rtab .Rep :
8189 if r is None :
@@ -94,12 +102,16 @@ def get_data(token):
94102 ##dosenum isa dummy value to use for merging since we need to repeat the concentrations over and over
95103 dosenum = [a for a in range (15 )]
96104 rtab ['Dosenum' ]= dosenum * int (rtab .shape [0 ]/ 15 )
97-
105+
106+ #print(set(rtab.Drug))
98107 ##merge the concentrations
99108 concs = concs .dropna ().melt (value_vars = concs .columns ,var_name = 'Drug' ,value_name = 'Dose' )
109+ print (concs )
110+ concs .Dose = [d * 10.0 ** 6.0 for d in concs .Dose ] ## convert M to uM here
111+
100112 concs .Drug = concs .Drug .str .lower ()
101113 concs ['Dosenum' ] = dosenum * int (concs .shape [0 ]/ 15 )##creating dosenum here to merge
102-
114+ #print(set(concs.Drug))
103115
104116 return rtab .merge (concs )
105117
0 commit comments