Skip to content

Commit 9d36112

Browse files
committed
progress on experiments data
adding in improve sample and drug ids, added build_experiments.py, testing single experiments with linkml. More work to do with combo experiments.
1 parent 08c2f2f commit 9d36112

4 files changed

Lines changed: 39 additions & 23 deletions

File tree

build/novartispdx/03-drugs-novartispdx.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def create_novartis_pdx_drugs_file(synObject, prevDrugFilepath, outputPath):
1515
rawDrugData = pd.read_csv(file.path)
1616
# split on + operator - there are 2- and one 3- way drug combos in this dataset
1717
sepDrugNames = pd.Series(rawDrugData['Treatment'].unique()).str.split("+", expand=True)
18-
### NEED TO ALSO remove drug names with different dose info
18+
1919

2020

2121
# taking the drug names from the first and second column from the split - there is only one

build/novartispdx/04-experiments-novartispdx.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
import argparse
55
import os
66

7-
def get_novartis_pdx_experiments_file:
7+
8+
# add improve IDs - for sample and drug
9+
def get_novartis_pdx_experiments_file(synObject, samples_df):
810
# input for the calc_pdx_metrics script
911

1012
file1 = synObject.get('syn66276102')
@@ -29,31 +31,38 @@ def get_novartis_pdx_experiments_file:
2931
todiscard = unique_vals_tally[unique_vals_tally['treatment']==1].index
3032
novartispdx_curvefile = novartispdx_curvefile[~novartispdx_curvefile['experiment'].isin(todiscard)]
3133
# remove groups with no 'control' treatment
32-
groupeddf = test.groupby('experiment')
34+
groupeddf = novartispdx_curvefile.groupby('experiment')
3335
no_control = groupeddf['treatment'].apply(lambda x: x.str.contains('control').any())
3436

3537
missingcontrols = no_control.reset_index()[no_control.reset_index()['treatment'] ==False]['experiment']
36-
finaldf=test[~test['experiment'].isin(missingcontrols)]
37-
38-
finalcurvefile = finaldf
38+
nomissingcontrols=novartispdx_curvefile[~novartispdx_curvefile['experiment'].isin(missingcontrols)]
39+
#merge on drug names done in calc_pdx_metrics.py
40+
#final_w_drugIDs = finaldf.merge(drug_df, how='left',right_on='chem_name', left_on="treatment")
41+
final_allIDs = nomissingcontrols.merge(samples_df, how='left', right_on='common_name', left_on='model_id')
42+
print(final_allIDs.head)
43+
final_allIDs = final_allIDs.drop('model_id', axis=1)
44+
finalDF = final_allIDs.rename({'improve_sample_id':'model_id'}, axis=1)
45+
print(finalDF.head)
46+
finalcurvefile = finalDF[['model_id', 'time', 'volume', 'treatment', 'experiment', 'dose']]
47+
print(finalcurvefile.head)
3948
return finalcurvefile
40-
#finalcurvefile.to_csv('/tmp/novartispdx_doserep.tsv', sep="\t")
4149

4250

4351
if __name__ == "__main__":
4452
parser = argparse.ArgumentParser()
4553
parser.add_argument('-t', '--token', help='Synapse authentication token')
46-
parser.add_argument('-s', '--curSampleFile', help='Sample mapping file for bladder pdo samples')
47-
parser.add_argument('-d', '--drugfile', help='Drug mapping file for bladder pdo samples')
48-
parser.add_argument('-o', '--output', default = '/tmp/novartispdx_doserep.tsv',help='Output file to be read into curve fitting code')
54+
parser.add_argument('-s', '--curSampleFile', default='/tmp/novartispdx_samples.csv', help='Sample mapping file for bladder pdo samples')
55+
parser.add_argument('-d', '--drugfile', default='/tmp/novartispdx_drugs.tsv', help='Drug mapping file for bladder pdo samples')
56+
parser.add_argument('-o', '--output', default = '/tmp/novartispdx_experiments.tsv',help='Output experiments file')
4957

5058
args = parser.parse_args()
5159
print("Logging into Synapse")
5260
PAT = args.token
5361
synObject = synapseclient.login(authToken=PAT)
54-
drug_df = pd.read_csv(args.drugfile, sep='\t')
62+
#drug_df = pd.read_csv(args.drugfile, sep='\t')
5563
samples_df = pd.read_csv(args.curSampleFile)
56-
57-
doseresponse_data = get_novartis_pdx_experiments_file(synObject, samples_df, drug_df)
58-
doseresponse_data.to_csv(args.output, sep='\t')
64+
65+
doseresponse_data = get_novartis_pdx_experiments_file(synObject, samples_df)
66+
print(doseresponse_data.head)
67+
doseresponse_data.to_csv('/tmp/novartispdx_curvedata.tsv', columns=list({'model_id', 'time', 'volume', 'treatment','experiment', 'dose'}), sep='\t')
5968

build/novartispdx/build_experiments.sh

100644100755
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
#python3 04-experiments-novartispdx.py --token $SYNAPSE_AUTH_TOKEN
3+
4+
python3 -m novartispdx.04-experiments-novartispdx --token $SYNAPSE_AUTH_TOKEN -o ~/Projects/CoderData/dev-environment/novartispdx/novartispdx_curvedata.tsv
5+
python3 utils/calc_pdx_metrics.py /tmp/novartispdx_curvedata.tsv --drugfile=/tmp/novartispdx_drugs.tsv --outprefix=/tmp/novartispdx

build/utils/calc_pdx_metrics.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -275,11 +275,11 @@ def lmm(time, volume, treatment, drug_name):
275275
raise ValueError("These columns must be present: 'model_id', 'volume', 'time', 'exp_type'")
276276

277277
data['log_volume'] = np.log(data['volume'])
278-
print('drug name is ' + drug_name)
278+
#print('drug name is ' + drug_name)
279279
data['exp_type'] = data['exp_type'].astype('category')
280280
data['exp_type']=pd.Categorical(data['exp_type'],categories = ['control',drug_name], ordered=True)
281-
print(data)
282-
print(data['exp_type'].cat.categories)
281+
#print(data)
282+
#print(data['exp_type'].cat.categories)
283283
# Define the formula for mixed linear model
284284
formula = 'log_volume ~ time*exp_type'
285285

@@ -327,11 +327,13 @@ def main():
327327
# source improve_sample_id improve_drug_id study time time_unit dose_response_metric dose_response_value
328328

329329
combos[['drug1','drug2']]=combos.drug.str.split('+',expand=True)
330+
print('COMBOS ARE: ')
331+
print(combos[['drug1', 'drug2']])
330332
combos = combos.rename({'metric':'drug_combination_metric','value':'drug_combination_value','sample':'improve_sample_id'},axis=1).dropna()
331333

332334
expcomb = combos.rename({'drug1':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_1'},axis=1)[['improve_drug_1','drug2','improve_sample_id','time_unit','time','drug_combination_metric','drug_combination_value']]
333335
expcomb = expcomb.rename({'drug2':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_2'},axis=1)[['improve_drug_1','improve_drug_2','improve_sample_id','time_unit','time','drug_combination_metric','drug_combination_value']]
334-
336+
print(expcomb[['improve_drug_1', 'improve_drug_2']])
335337
expcomb[['source']]='Synapse'
336338
expcomb[['study']]='MPNST PDX in vivo'
337339

@@ -352,9 +354,9 @@ def get_drug_stats(df, control='control'):
352354
for name, group in tqdm(groups):
353355
# Each group contains multiple treatments and a control
354356
drugs = set(group.treatment) - set([control])
355-
print('line 355')
356-
print(name[0])
357-
print(drugs)
357+
#print('line 355')
358+
#print(name[0])
359+
#print(drugs)
358360
mod = list(set(group.model_id))[0]
359361

360362
ctl_data = group[group.treatment == control]
@@ -364,8 +366,8 @@ def get_drug_stats(df, control='control'):
364366
continue
365367
ctl_auc = AUC(ctl_time, ctl_volume)
366368
for d in drugs:
367-
print('is our drug a string or dict?')
368-
print(str(d))
369+
#print('is our drug a string or dict?')
370+
#print(str(d))
369371
d_data = group[group.treatment == str(d)]
370372
treat_time = np.array(d_data.time)
371373
treat_volume = np.array(d_data.volume)

0 commit comments

Comments
 (0)