Skip to content

Commit cbc80a9

Browse files
committed
updated schema
allowed for `published_auc` as another dose response value
1 parent 1c4969e commit cbc80a9

4 files changed

Lines changed: 57 additions & 2 deletions

File tree

build/pancpdo/04-getPancPDOExperiments.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def map_to_drugs_samps(dose_rep,drugfile,sampfile):
4444
print(merged)
4545
return merged
4646

47+
4748
def get_data(token):
4849
synid = 'syn64333325'
4950

build/pancpdo/05-addPrecalcAUC.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
2+
3+
4+
import os
5+
import pandas as pd
6+
import wget
7+
import argparse
8+
import synapseclient as sc
9+
import math
10+
import re
11+
12+
13+
def get_precalc_auc():
14+
'''
15+
get pre-calculaterd AUC from supp data
16+
'''
17+
tablink = 'https://aacr.silverchair-cdn.com/aacr/content_public/journal/cancerdiscovery/8/9/10.1158_2159-8290.cd-18-0349/5/21598290cd180349-sup-199398_2_supp_4775187_p95dln.xlsx?Expires=1738004990&Signature=av8XadTm9AmI20O2Y7J7aHDtPbpluKJIfI5ubsoiYJ15D0zh5p1ltF4a7-DCSWTSMs-qX5TD09shxHeqkQ2NkLWHZsXoCD5KyREGhEgcDAvWZ1V9kwXDm0bjpINipAPPtC20oeuw6c~hPooF3Mtgzp4MzMCCjcVwfn05u27a0kS0yifBi11wQj3nmHlR3ym-2fYkFuqQtnNPCzH8-yIw21y0kTvXrNodAzC5pGA8qUK4PLxBt52xUIvTEPsPiPjXwBnDCfVsLGGdDYIY25lEPKiA403q6kFYvrSQ3bsTvM4kuvltb7yS4AXjK0-tthMOKbqq8~uREmJCcueADUF91g__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA'
18+
19+
chemo = pd.read_excel(tablink,sheet_name=1,skiprows=1)
20+
chemo.columns = [c.lower() for c in chemo.columns]
21+
targeted = res = pd.read_excel(tablink,sheet_name=2,skiprows=1)
22+
targeted.columns = [c.lower() for c in targeted.columns]
23+
24+
cdat = chemo.melt(id_vars='sample id',value_vars=['gemcitabine','paclitaxel','sn-38','5-fu','oxaliplatin'],var_name='drug',value_name='published_auc')
25+
tdat = targeted.melt(id_vars='sample id',value_vars=set(targeted.columns)-set('sample id'),var_name='drug',value_name='published_auc')
26+
combined= pd.concat([cdat,tdat])
27+
combined = combined.rename(columns={'sample id':'other_id','drug':'chem_name'})
28+
29+
return combined
30+
31+
def main():
32+
parser = argparse.ArgumentParser()
33+
parser.add_argument('-s', '--samples', help='Sample mapping file for panc pdo samples')
34+
parser.add_argument('-d', '--drugs', help='Drug mapping file for panc pdo samples')
35+
parser.add_argument('-e', '--expfile', default = '/tmp/pancpdo_experiments.tsv',help='Output file to be read into curve fitting code')
36+
37+
args = parser.parse_args()
38+
samples = pd.read_csv(args.samples,sep=',')
39+
drugs = pd.read_tsv(args.drugs,sep='\t')
40+
41+
newdat = get_precalc_auc().merge(samples).merge(drugs)
42+
newdat = newdat[['improve_sample_id','improve_drug_id','published_auc']].drop_duplicates()
43+
newdat = newdat.melt(id_vars=['improve_sample_id','improve_drug_id'],value_vars='published_auc',var_name='dose_response_metric',value_name='dose_response_value')
44+
newdat[['source']]='TiriacEtAl2018'
45+
newdat[['time']]=120
46+
newdat[['time_unit']]='hours'
47+
newdat[['study']]='pancpdo'
48+
oldat = pd.read_csv(args.expfile,sep='\t')
49+
res = pd.concat([olddat,newdat])
50+
res.to_csv(args.expfile)

build/pancpdo/build_exp.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit
55

66
echo "Running 04-drug_dosage_and_curves.py with drugfile $2 and curSampleFile $1"
77
python 04-getPancPDOExperiments.py --pat $SYNAPSE_AUTH_TOKEN --drugs $2 --samples $1 --output /tmp/pancpdo_doserep.tsv
8-
python fit_curve.py --input /tmp/pancpdo_doserep.tsv
8+
python fit_curve.py --input /tmp/pancpdo_doserep.tsv --output /tmp/pancpdo_doserep.tsv
99

1010
##now move file and gzip
11-
mv /tmp/pancpdo_doserep.tsv /tmp/pancpdo_experiments.tsv
11+
mv /tmp/pancpdo_doserep.tsv.0 /tmp/pancpdo_experiments.tsv
12+
13+
python 05-addPrecalcAUC.py --samples $1 --drugs $2 --expfile /tmp/pancpdo_experiments.tsv
1214
gzip /tmp/pancpdo_experiments.tsv

schema/coderdata.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,8 @@ enums:
221221
description: I believe this is the drug sensitivity score
222222
mrecist:
223223
description: For PDX data this value should be either Progressive Disease, Stable Disease, Partial Response or Complete Response.
224+
published_auc:
225+
description: AUC that has been published with the study. Currently used in organoid data.
224226
gc_auc:
225227
description: Area under tumor growth curve
226228
efs:

0 commit comments

Comments
 (0)