Skip to content

Commit 3798bfb

Browse files
committed
add novartispdx sample file
Sample file for 386 solid tumor PDX from the Novartis project.
1 parent c35ec76 commit 3798bfb

2 files changed

Lines changed: 66 additions & 0 deletions

File tree

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import pandas as pd
2+
import synapseclient
3+
import numpy as np
4+
import argparse
5+
import os
6+
7+
def get_complete_novartispdx_sample_sheet(synObject):
8+
9+
files = list(synObject.getChildren(parent='syn66275995', includeTypes=['file']))
10+
11+
synIDs = [item['id'] for item in files]
12+
# leave off synIDs for drug info
13+
synIDs.remove('syn66276102')
14+
synIDs.remove('syn66276098')
15+
synIDs.remove("syn66477971")
16+
# create empty dataframe
17+
allsamplesheet = pd.DataFrame()
18+
# iterate through IDs and concatenate
19+
for id in synIDs:
20+
curr = synObject.get(id)
21+
currdf = pd.read_csv(curr.path)
22+
allsamplesheet = pd.concat([allsamplesheet, currdf], ignore_index=True)
23+
# rename columns and reformat cancer type from CANCER_HISTOLOGY column
24+
allsamplesheet['other_id'] = allsamplesheet['Sample ID']
25+
allsamplesheet['common_name'] = allsamplesheet['MODEL_ORIGINATOR_ID']
26+
allsamplesheet['cancer_type'] = allsamplesheet['CANCER_HISTOLOGY'].str.lower().str.split(pat="^[^\s]*\s", expand=True)[1]
27+
allsamplesheet['species'] = "Homo Sapiens(human)"
28+
allsamplesheet['model_type'] = 'patient derived xenograft'
29+
allsamplesheet['other_id_source'] = 'Synapse'
30+
allsamplesheet['other_names'] = ''
31+
finalsamplesheet = allsamplesheet[['other_id', 'common_name', 'other_id_source', 'other_names', 'cancer_type', 'species', 'model_type']]
32+
return finalsamplesheet
33+
34+
if __name__ == "__main__":
35+
36+
parser = argparse.ArgumentParser(description="This script handles downloading, processing and formatting of sample files for the Novartis PDX data into a single samplesheet")
37+
38+
parser.add_argument('-t', '--token', type=str, help='Synapse Token')
39+
40+
parser.add_argument("-p", '--prevSamples', nargs="?", type=str, default ="", const = "", help = "Use this to provide previous sample file, will run sample file generation")
41+
42+
args = parser.parse_args()
43+
44+
print("Logging into Synapse")
45+
PAT = args.token
46+
synObject = synapseclient.login(authToken=PAT)
47+
48+
samplesheet = get_complete_novartispdx_sample_sheet(synObject)
49+
50+
if (args.prevSamples):
51+
prev_max_improve_id = max(pd.read_csv(args.prevSamples).improve_sample_id)
52+
else:
53+
prev_max_improve_id = 0
54+
55+
samplesheet['improve_sample_id'] = range(prev_max_improve_id+1, prev_max_improve_id+samplesheet.shape[0]+1)
56+
57+
samplesheet.to_csv('/tmp/novartispdx_samples.csv', index=False)
58+
59+

build/novartispdx/build_samples.sh

100644100755
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
set -euo pipefail
3+
4+
trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit 1' ERR
5+
6+
echo "Running 01-samples-novartispdx.py with token and previous sample file $1"
7+
python3 01-samples-novartispdx.py --token $SYNAPSE_AUTH_TOKEN -p $1

0 commit comments

Comments
 (0)