Skip to content

Commit ea9be4a

Browse files
committed
added docker and code for PDX data build
1 parent 45842dd commit ea9be4a

6 files changed

Lines changed: 68 additions & 23 deletions

File tree

build/docker/Dockerfile.mpnstPDX

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
FROM r-base:4.3.2
2+
ENV DEBIAN_FRONTEND=noninteractive
3+
RUN apt-get update --allow-insecure-repositories
4+
#RUN apt-get install -y --allow-unauthenticated build-essential --fix-missing libpq-dev python3-pip python3-setuptools python3-dev python3-venv libcurl4-openssl-dev libxml2-dev
5+
RUN apt-get install -y --allow-unauthenticated build-essential --fix-missing python3-pip python3-setuptools python3-dev python3-venv libcurl4-openssl-dev libxml2-dev
6+
7+
RUN python3 -m venv /opt/venv
8+
RUN /opt/venv/bin/pip3 install --upgrade pip
9+
10+
11+
ENV PYTHONPATH "${PYTHONPATH}:/app"
12+
WORKDIR /app
13+
14+
ADD build/mpnstPDX/requirements.txt .
15+
ADD build/mpnstPDX/requirements.r .
16+
ADD build/mpnstPDX/* ./
17+
ADD build/utils/* ./
18+
19+
# installing python libraries
20+
RUN /opt/venv/bin/pip3 install -r requirements.txt
21+
22+
# installing r libraries
23+
RUN Rscript requirements.r
24+
25+
26+
VOLUME ["/tmp"]

build/mpnstPDX/02_get_drug_data.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ ignore_file_path <- '/tmp/mpnstpdx_ignore_chems.txt'
101101
use_python("/opt/venv/bin/python3", required = TRUE)
102102
source_python("pubchem_retrieval.py")
103103

104-
update_dataframe_and_write_tsv(unique_names=alldrugs,output_filename=output_file_path,ignore_chems=ignore_file_path)
104+
update_dataframe_and_write_tsv(unique_names=drugs,output_filename=output_file_path,ignore_chems=ignore_file_path)
105105

106106

107107
tab<-read.table(newdrugfile,sep='\t',header=T,quote="",comment.char="")

build/mpnstPDX/03_get_drug_response_data.R

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ pdx_meta$parentId=unlist(lapply(pdx_meta$id,function(x) synGet(x)$parentId))
9797
oldfolders=c('syn22018363','syn22024460','syn22024428','syn22024429','syn22024437','syn22024438')
9898

9999
old_meta<-subset(pdx_meta,parentId%in%oldfolders)
100+
100101
old_data<-do.call(rbind,lapply(unique(old_meta$parentId),function(x){
101102
ids<-subset(old_meta,parentId==x)|>
102103
subset(!is.na(id))
@@ -121,7 +122,7 @@ old_data<-do.call(rbind,lapply(unique(old_meta$parentId),function(x){
121122
}))
122123
}))|>
123124
left_join(unique(select(old_meta,id=parentId,improve_sample_id)))|>
124-
dplyr::select(experiment=id,model_id=improve_sample_id,specimen_id,treatment=chem_name,time=experimental_time_point,volume=assay_value)|>distinct()
125+
dplyr::select(experiment=id,model_id=improve_sample_id,specimen_id,treatment=chem_name,time=experimental_time_point,time_unit=experimental_time_point_unit,volume=assay_value)|>distinct()
125126

126127

127128

@@ -150,7 +151,7 @@ new_data<-do.call(rbind,lapply(unique(new_meta$id), function(x){
150151
#print(head(tab))
151152
return(tab)}))|>
152153
left_join(pdx_meta)|>
153-
dplyr::select(experiment=id,model_id=improve_sample_id,specimen_id,treatment=chem_name,time=experimental_time_point,volume=assay_value)|>distinct()
154+
dplyr::select(experiment=id,model_id=improve_sample_id,specimen_id,treatment=chem_name,time=experimental_time_point,time_unit=experimental_time_point_unit,volume=assay_value)|>distinct()
154155

155156
##maybe tweak the data frame a bit depending on curve fitting script
156157
pdx_data<-rbind(old_data,new_data)
@@ -161,7 +162,7 @@ pdx_data<-rbind(old_data,new_data)
161162
fwrite(pdx_data,'/tmp/curve_data.tsv',sep='\t')
162163

163164
##TODO: create new curve fitting script in python
164-
pycmd = '/opt/venv/bin/python fit_pdx_curve.py --input /tmp/curve_data.tsv --output /tmp/mpnstPDX_experiments.tsv'
165+
pycmd = '/opt/venv/bin/python calc_pdx_metrics.py --input /tmp/curve_data.tsv --outprefix /tmp/mpnstPDX'
165166
print('running curve fitting')
166167
#system(pycmd)
167168

build/mpnstPDX/build_exp.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
Rscript 03_get_drug_response_data.R $SYNAPSE_AUTH_TOKEN $1 $2
2-
/opt/venv/bin/python3 compute_metrics.py /tmp/file.tsv --drugfile=/tmp/mpnstpdx_drugs.tsv
2+
/opt/venv/bin/python3 compute_metrics.py /tmp/curve_data.tsv --drugfile=/tmp/mpnstpdx_drugs.tsv --outprefix=/tmp/mpnstpdx

build/utils/calc_pdx_metrics.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -187,9 +187,9 @@ def AUC(time, volume, time_normalize=True):
187187
#print(time)
188188
if time_normalize:
189189
auc = auc/np.max(time)
190-
return {"metric": "auc", "value": auc}
190+
return {"metric": "auc", "value": auc, 'time':np.max(time)}
191191

192-
def TGI(contr_volume, treat_volume):
192+
def TGI(contr_volume, treat_volume,time):
193193
"""
194194
Computes the tumor growth inhibition (TGI) between two time-volume curves.
195195
@@ -207,7 +207,8 @@ def TGI(contr_volume, treat_volume):
207207
# Simulated batch response class object
208208
rtx = {
209209
"metric": "TGI",
210-
"value": tgi
210+
"value": tgi,
211+
'time': np.max(time)
211212
}
212213
return rtx
213214

@@ -240,7 +241,7 @@ def ABC(contr_time=None, contr_volume=None, treat_time=None, treat_volume=None):
240241
tre = AUC(treat_time, treat_volume)
241242

242243
abc = con['value'] - tre['value']
243-
return {"metric": "abc", "value": abc}#, "control": con, "treatment": tre}
244+
return {"metric": "abc", "value": abc,'time':np.max(treat_time)}#, "control": con, "treatment": tre}
244245

245246

246247
###LMM CODE
@@ -292,13 +293,14 @@ def lmm(time, volume, treatment, drug_name):
292293
'interaction_coef_value': i_coef_value
293294
}
294295

295-
return {'metric': 'lmm','value': i_coef_value}
296+
return {'metric': 'lmm','value': i_coef_value,'time':np.max(time)}
296297

297298
def main():
298299
parser=ArgumentParser()
299300
###read in file with model id, volume, time, condition
300301
parser.add_argument('curvefile')
301302
parser.add_argument('--drugfile')
303+
parser.add_argument('--outprefix',default='/tmp/')
302304

303305
args = parser.parse_args()
304306

@@ -308,8 +310,24 @@ def main():
308310
singles, combos = get_drug_stats(tab)
309311

310312
##join with drug ids
313+
expsing = singles.rename({'drug':'chem_name','metric':'drug_combination_metric','value':'drug_combination_value','sample':'improve_sample_id'},axis=1).merge(drugs,on='chem_name',how='left')[['improve_drug_id','improve_sample_id','drug_combination_metric','drug_combination_value']]
314+
expsing = expsing.dropna()
315+
316+
combos[['drug1','drug2']]=combos.drug.str.split('+',expand=True)
317+
combos = combos.rename({'metric':'drug_combination_metric','value':'drug_combination_value','sample':'improve_sample_id'},axis=1).dropna()
318+
319+
expcomb = combos.rename({'drug1':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_1'},axis=1)[['improve_drug_1','drug2','improve_sample_id','drug_combination_metric','drug_combination_value']]
320+
expcomb = expcomb.rename({'drug2':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_2'},axis=1)[['improve_drug_1','improve_drug_2','improve_sample_id','drug_combination_metric','drug_combination_value']]
321+
322+
expcomb[['source']]='Synapse'
323+
expcomb[['study']]='MPNST PDX in vivo'
324+
325+
expsing[['source']]='Synapse'
326+
expsing[['study']]='MPNST PDX in vivo'
327+
expsing.to_csv(args.outprefix+'_experiments.csv',index=False)
328+
expcomb.to_csv(args.outprefix+'_combinations.csv',index=False)
311329

312-
##stats.to_csv('pdx_exp.tsv',sep='\t',index=False)
330+
313331

314332
def get_drug_stats(df,control='control'):
315333
##for each experiment, call group
@@ -341,24 +359,24 @@ def get_drug_stats(df,control='control'):
341359
treat_abc = ABC(ctl_time,ctl_volume,treat_time,treat_volume)
342360
#print(f"AUC: {treat_auc}")
343361
#print(f"ABC: {treat_abc}")
344-
treat_abc.update({'sample':mod,'drug':d})
362+
treat_abc.update({'sample':mod,'drug':d,'time_unit':'days'})
345363
if '+' in d:
346364
combores.append(treat_abc)
347365
else:
348366
singleres.append(treat_abc)
349367
#lmm
350368
comb = pd.concat([ctl_data,d_data])
351369
lmm_res = lmm(comb.time, comb.volume, comb.treatment,d)
352-
lmm_res.update({'sample':mod,'drug':d})
370+
lmm_res.update({'sample':mod,'drug':d,'time_unit':'days'})
353371
#print(f"LMM: {lmm_res}")
354372
if '+' in d:
355-
combores.append(lm_res)
373+
combores.append(lmm_res)
356374
else:
357-
singleres.append(lm_res)
375+
singleres.append(lmm_res)
358376

359377
#get tgi for group
360-
tg = TGI(ctl_volume,treat_volume)
361-
tg.update({'sample':mod,'drug':d})
378+
tg = TGI(ctl_volume,treat_volume,treat_time)
379+
tg.update({'sample':mod,'drug':d,'time_unit':'days'})
362380
#print(tg)
363381
if '+' in d:
364382
combores.append(tg)
@@ -368,13 +386,13 @@ def get_drug_stats(df,control='control'):
368386

369387
#get mRECIST for group
370388
mr = mrecist(treat_time,treat_volume)
371-
mr.update({'sample':mod,'drug':d})
389+
mr.update({'sample':mod,'drug':d,'time_unit':'days'})
372390
if '+' in d:
373391
combores.append(mr)
374392
else:
375393
singleres.append(mr)
376394

377-
sing = pd.DataFrame.from_records(singlres)
395+
sing = pd.DataFrame.from_records(singleres)
378396
comb = pd.DataFrame.from_records(combores)
379397
return sing,comb
380398

schema/coderdata.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -187,13 +187,13 @@ classes:
187187
- source
188188
- study
189189
attributes:
190-
drug_one_id:
190+
drug_drug_2:
191191
description: improve_drug_id of first drug
192-
drug_two_id:
192+
improve_drug_2:
193193
description: imrrove_drug_id of second drug
194-
combination_metric:
194+
drug_combination_metric:
195195
description: metric calculated for synergy, or other metric of two drugs
196-
combination_value:
196+
drug_combination_value:
197197
description: value of metric for synergy or combination
198198
enums:
199199
ResponseMetric:

0 commit comments

Comments
 (0)