|
| 1 | + |
| 2 | + |
| 3 | + |
| 4 | +import os |
| 5 | +import pandas as pd |
| 6 | +import wget |
| 7 | +import argparse |
| 8 | +import synapseclient as sc |
| 9 | +import math |
| 10 | +import re |
| 11 | + |
| 12 | + |
| 13 | +def get_precalc_auc(): |
| 14 | + ''' |
| 15 | + get pre-calculaterd AUC from supp data |
| 16 | + ''' |
| 17 | + tablink = 'https://aacr.silverchair-cdn.com/aacr/content_public/journal/cancerdiscovery/8/9/10.1158_2159-8290.cd-18-0349/5/21598290cd180349-sup-199398_2_supp_4775187_p95dln.xlsx?Expires=1738004990&Signature=av8XadTm9AmI20O2Y7J7aHDtPbpluKJIfI5ubsoiYJ15D0zh5p1ltF4a7-DCSWTSMs-qX5TD09shxHeqkQ2NkLWHZsXoCD5KyREGhEgcDAvWZ1V9kwXDm0bjpINipAPPtC20oeuw6c~hPooF3Mtgzp4MzMCCjcVwfn05u27a0kS0yifBi11wQj3nmHlR3ym-2fYkFuqQtnNPCzH8-yIw21y0kTvXrNodAzC5pGA8qUK4PLxBt52xUIvTEPsPiPjXwBnDCfVsLGGdDYIY25lEPKiA403q6kFYvrSQ3bsTvM4kuvltb7yS4AXjK0-tthMOKbqq8~uREmJCcueADUF91g__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA' |
| 18 | + |
| 19 | + chemo = pd.read_excel(tablink,sheet_name=1,skiprows=1) |
| 20 | + chemo.columns = [c.lower() for c in chemo.columns] |
| 21 | + targeted = res = pd.read_excel(tablink,sheet_name=2,skiprows=1) |
| 22 | + targeted.columns = [c.lower() for c in targeted.columns] |
| 23 | + |
| 24 | + cdat = chemo.melt(id_vars='sample id',value_vars=['gemcitabine','paclitaxel','sn-38','5-fu','oxaliplatin'],var_name='drug',value_name='published_auc') |
| 25 | + tdat = targeted.melt(id_vars='sample id',value_vars=set(targeted.columns)-set('sample id'),var_name='drug',value_name='published_auc') |
| 26 | + combined= pd.concat([cdat,tdat]) |
| 27 | + combined = combined.rename(columns={'sample id':'other_id','drug':'chem_name'}) |
| 28 | + |
| 29 | + return combined |
| 30 | + |
| 31 | +def main(): |
| 32 | + parser = argparse.ArgumentParser() |
| 33 | + parser.add_argument('-s', '--samples', help='Sample mapping file for panc pdo samples') |
| 34 | + parser.add_argument('-d', '--drugs', help='Drug mapping file for panc pdo samples') |
| 35 | + parser.add_argument('-e', '--expfile', default = '/tmp/pancpdo_experiments.tsv',help='Output file to be read into curve fitting code') |
| 36 | + |
| 37 | + args = parser.parse_args() |
| 38 | + samples = pd.read_csv(args.samples,sep=',') |
| 39 | + drugs = pd.read_tsv(args.drugs,sep='\t') |
| 40 | + |
| 41 | + newdat = get_precalc_auc().merge(samples).merge(drugs) |
| 42 | + newdat = newdat[['improve_sample_id','improve_drug_id','published_auc']].drop_duplicates() |
| 43 | + newdat = newdat.melt(id_vars=['improve_sample_id','improve_drug_id'],value_vars='published_auc',var_name='dose_response_metric',value_name='dose_response_value') |
| 44 | + newdat[['source']]='TiriacEtAl2018' |
| 45 | + newdat[['time']]=120 |
| 46 | + newdat[['time_unit']]='hours' |
| 47 | + newdat[['study']]='pancpdo' |
| 48 | + oldat = pd.read_csv(args.expfile,sep='\t') |
| 49 | + res = pd.concat([olddat,newdat]) |
| 50 | + res.to_csv(args.expfile) |
0 commit comments