Skip to content

Commit 04821c1

Browse files
changes for docker to work
1 parent 837db18 commit 04821c1

8 files changed

Lines changed: 36 additions & 32 deletions

File tree

build/crc_organoids/01-samples-crc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def generate_sample_file(sequencing_data_path:str = None, prev_samples_path:str
171171
# Download RNA seq data
172172
download_rnaseq(save_path = "/tmp/GSE65253_col_tum_org_merge.csv.gz")
173173
# Download sequencing data
174-
sequencing_download_path = download_sequencing_data(synID = args.synapseID, synToken = args.token, save_path = "/tmp/mmc2.xlsx")
174+
sequencing_download_path = download_sequencing_data(synID = args.synapseID, synToken = args.token, save_path = "/tmp")
175175

176176
if args.samples:
177177
if args.prevSamples is None or args.prevSamples=='':
@@ -180,6 +180,6 @@ def generate_sample_file(sequencing_data_path:str = None, prev_samples_path:str
180180
else:
181181
print("Previous sample sheet {} detected. Running sample file generation and checking for duplicate IDs.".format(args.prevSamples))
182182
sample_sheet = generate_sample_file(sequencing_data_path = sequencing_download_path, prev_samples_path= args.prevSamples)
183-
sample_sheet.to_csv("/tmp/crc_samples.csv", index=False)
183+
sample_sheet.to_csv("/tmp/crc_organoids_samples.csv", index=False)
184184

185185

build/crc_organoids/02-omics-crc.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,13 @@ def map_transcriptomics(transciptomics_data, improve_id_data, entrez_data):
103103
# move row names to a column called "stable_id" and format gene names to remove the chromosome num
104104
transciptomics_data['stable_id'] = transciptomics_data.index
105105
transciptomics_data['stable_id'] = transciptomics_data['stable_id'].str.split('__',n = 1,expand=True).iloc[:,0]
106-
transciptomics_data.to_csv("counts_for_tpm_conversion.csv")
106+
transciptomics_data.to_csv("/tmp/counts_for_tpm_conversion.csv")
107107

108108
# run tpmFromCounts.py to convert counts to tpm
109-
os.system("python tpmFromCounts.py --counts counts_for_tpm_conversion.csv --genome_build https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.13_GRCh37/GCF_000001405.13_GRCh37_genomic.gtf.gz --gene_col stable_id --exclude_col stable_id --out_file transcriptomics_tpm.tsv")
109+
os.system("python3 tpmFromCounts.py --counts /tmp/counts_for_tpm_conversion.csv --genome_build https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.13_GRCh37/GCF_000001405.13_GRCh37_genomic.gtf.gz --gene_col stable_id --exclude_col stable_id --out_file /tmp/transcriptomics_tpm.tsv")
110110

111111
# get output from script (in tsv format) and average across organoids from each patient ]
112-
tpm_transciptomics_data = pd.read_csv("transcriptomics_tpm.tsv", sep="\t")
112+
tpm_transciptomics_data = pd.read_csv("/tmp/transcriptomics_tpm.tsv", sep="\t")
113113
tpm_transciptomics_data.index = tpm_transciptomics_data['stable_id']
114114
tpm_transciptomics_data = tpm_transciptomics_data.drop(columns=['stable_id'])
115115
transpose_transcriptomics = tpm_transciptomics_data.T
@@ -226,7 +226,7 @@ def map_copy_number(copy_number_data, improve_id_data, entrez_data):
226226
if args.parse:
227227
print("Parsing excel file.")
228228
# Download parse excel file to get mutation data and the copy num data
229-
mutation_df, copy_num_df = parse_mmc2("/tmp/mmc2.xlsx")
229+
mutation_df, copy_num_df = parse_mmc2("/tmp/mmc2.xlsx/mmc2.xlsx")
230230
# Save mutation and copy number data into csv format
231231
mutation_df.to_csv("/tmp/mutation_data.csv")
232232
copy_num_df.to_csv("/tmp/copy_num_data.csv")
@@ -241,7 +241,7 @@ def map_copy_number(copy_number_data, improve_id_data, entrez_data):
241241
exit()
242242
else:
243243
print("Starting transcriptomics data.")
244-
transcriptomics_df = map_transcriptomics(transciptomics_data = "/tmp/GSE65253_col_tum_org_merge.csv.gz", improve_id_data = "/tmp/crc_samples.csv", entrez_data = "/tmp/genes.csv")
244+
transcriptomics_df = map_transcriptomics(transciptomics_data = "/tmp/GSE65253_col_tum_org_merge.csv.gz", improve_id_data = "/tmp/crc_organoids_samples.csv", entrez_data = "/tmp/genes.csv")
245245
transcriptomics_df.to_csv("/tmp/crc_organoids_transcriptomics.csv", index=False)
246246

247247
if args.mutations:
@@ -253,7 +253,7 @@ def map_copy_number(copy_number_data, improve_id_data, entrez_data):
253253
exit()
254254
else:
255255
print("Starting mutations data.")
256-
mutation_df = map_mutations(mutation_data = "/tmp/mutation_data.csv", improve_id_data = "/tmp/crc_samples.csv", entrez_data = "/tmp/genes.csv")
256+
mutation_df = map_mutations(mutation_data = "/tmp/mutation_data.csv", improve_id_data = "/tmp/crc_organoids_samples.csv", entrez_data = "/tmp/genes.csv")
257257
mutation_df.to_csv("/tmp/crc_organoids_mutations.csv", index=False)
258258

259259
if args.copy_number:
@@ -265,6 +265,6 @@ def map_copy_number(copy_number_data, improve_id_data, entrez_data):
265265
exit()
266266
else:
267267
print("Starting copy number data.")
268-
mutation_df = map_copy_number(copy_number_data = "/tmp/copy_num_data.csv", improve_id_data = "/tmp/crc_samples.csv", entrez_data = "/tmp/genes.csv")
268+
mutation_df = map_copy_number(copy_number_data = "/tmp/copy_num_data.csv", improve_id_data = "/tmp/crc_organoids_samples.csv", entrez_data = "/tmp/genes.csv")
269269
mutation_df.to_csv("/tmp/crc_organoids_copynumber.csv", index=False)
270270

build/crc_organoids/03-drug-crc.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import argparse
66
import synapseclient
77
from pubchem_retrieval import update_dataframe_and_write_tsv
8+
import warnings
9+
warnings.filterwarnings("ignore")
810

911
### get drug data
1012
def download_synapse_data(synID:str, save_path:str = None, synToken:str = None):
@@ -44,17 +46,17 @@ def download_synapse_data(synID:str, save_path:str = None, synToken:str = None):
4446
def create_crc_drug_data(fitted_drug_data_path:str, prevDrugFilepath:str, output_drug_data_path:str):
4547
# import fitted drug data and get drug names from DRUG_NAME column
4648
fitted_drug_df = pd.read_csv(fitted_drug_data_path)
47-
crc_drugs_df = pd.DataFrame(fitted_drug_df['DRUG_NAME'].unique())
49+
crc_drugs_df = pd.DataFrame(columns={"DRUG_NAME":fitted_drug_df['DRUG_NAME'].unique()})
4850
# if there is a prev drug file, check for new drugs
49-
if prevDrugFilepath is not None and prevDrugFilepath is not "":
51+
if prevDrugFilepath != None and prevDrugFilepath != "":
5052
prev_drug_df = pd.read_csv(prevDrugFilepath)
5153
# get drugs that are only in the crc_drugs_df (aka new drugs only)
5254
new_drugs_df = crc_drugs_df[~crc_drugs_df.chem_name.isin(prev_drug_df.chem_name)]
5355
else:
5456
# if there's no prev drugs, then all drugs are new
5557
new_drugs_df = crc_drugs_df
5658
# get new drug names
57-
new_drug_names = new_drugs_df['chem_name'].unique()
59+
new_drug_names = new_drugs_df['DRUG_NAME'].unique()
5860
# call function that gets info for these drugs
5961
update_dataframe_and_write_tsv(new_drug_names,output_drug_data_path)
6062

@@ -69,7 +71,7 @@ def create_crc_drug_data(fitted_drug_data_path:str, prevDrugFilepath:str, output
6971
parser.add_argument('-d', '--Download', action = 'store_true', default=False, help='Download drug data.')
7072
parser.add_argument('-t', '--Token', type=str, default=None, help='Synapse Token')
7173
parser.add_argument('-D', '--Drug', action = 'store_true', default=False, help='Generate drug data.')
72-
parser.add_argument('-p', '--PrevDrugs', type=str, default=None, help='Synapse Token')
74+
parser.add_argument('-p', '--PrevDrugs', nargs='?', type=str, default='', const='', help='Previous drug file')
7375

7476
args = parser.parse_args()
7577

@@ -87,8 +89,8 @@ def create_crc_drug_data(fitted_drug_data_path:str, prevDrugFilepath:str, output
8789
if args.Drug:
8890
if args.PrevDrugs is None or args.PrevDrugs=='':
8991
print("No previous drugs file provided. Starting improve_drug_id from SMI_1. Running drug file generation")
90-
create_crc_drug_data(fitted_drug_data_path = "/tmp/fitted_data_GDSC_Org_restricted_11Mar25.csv", output_drug_data_path = "/tmp/crc_drugs.tsv")
92+
create_crc_drug_data(fitted_drug_data_path = "/tmp/fitted_data_GDSC_Org_restricted_11Mar25.csv", output_drug_data_path = "/tmp/crc_organoids_drugs.tsv", prevDrugFilepath = "")
9193
else:
9294
print("Previous drugs file {} detected. Running drugs file generation and checking for duplicate IDs.".format(args.PrevDrugs))
93-
create_crc_drug_data(fitted_drug_data_path = "/tmp/fitted_data_GDSC_Org_restricted_11Mar25.csv", prevDrugFilepath = args.PrevDrugs, output_drug_data_path = "/tmp/crc_drugs.tsv")
95+
create_crc_drug_data(fitted_drug_data_path = "/tmp/fitted_data_GDSC_Org_restricted_11Mar25.csv", prevDrugFilepath = args.PrevDrugs, output_drug_data_path = "/tmp/crc_organoids_drugs.tsv")
9496

build/crc_organoids/build_drugs.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
#!/bin/bash
22
set -euo pipefail
3+
echo "the variable is $1"
34

45
trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit 1' ERR
56

67
# running the drug python script
7-
echo "Running 03-drug-crc.py with token and prevSamples $1."
8-
python 03-drug-crc.py --Download --Drugs --token $SYNAPSE_AUTH_TOKEN --prevSamples $1
8+
echo "Running 03-drug-crc.py with token and PrevDrugs $1."
9+
python3 03-drug-crc.py --Download --Drug --Token $SYNAPSE_AUTH_TOKEN --PrevDrugs $1
910

1011
# running the drug descriptor python script
11-
python build_drug_desc.py --drugtable /tmp/crc_drugs.csv --desctable /tmp/crc_drug_descriptors.csv.gz
12+
python3 build_drug_desc.py --drugtable /tmp/crc_organoids_drugs.tsv --desctable /tmp/crc_drug_descriptors.csv.gz

build/crc_organoids/build_exp.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit
55

66
# running the drug python script
77
echo "Running 04-experiments-crc.py with token, samples file $1 and drugs file $2."
8-
python 04-experiments-crc.py --Download --Experiment --token $SYNAPSE_AUTH_TOKEN --Samples $1 --Drugs $2
8+
python3 04-experiments-crc.py --Download --Experiment --Token $SYNAPSE_AUTH_TOKEN --Samples $1 --Drugs $2
99

1010
# running the drug descriptor python script
11-
python fit_curve.py --input /tmp/crc_experiments_for_curve_fitting.tsv --output /tmp/crc_experiment.tsv
11+
python3 fit_curve.py --input /tmp/crc_experiments_for_curve_fitting.tsv --output /tmp/crc_experiment.tsv
1212

1313
# for some reason, the fit_curve.py script always outputs with .0 at the end, so remove that
1414
mv /tmp/crc_doserep.tsv.0 crc_doserep.tsv

build/crc_organoids/build_omics.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@ set -euo pipefail
33

44
trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit 1' ERR
55

6-
echo "Running 02-omics-cdc.py with token, curSamples $2, and genes $1."
7-
python 02-omics-cdc.py --parse --transcriptomics --mutations --copy_number --omics --ids $2 --genes $1
6+
echo "Running 02-omics-crc.py with token, curSamples $2, and genes $1."
7+
python3 02-omics-crc.py --parse --transcriptomics --mutations --copy_number --ids $2 --genes $1

build/crc_organoids/build_samples.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit
55

66
echo "Running 01-createSamples-crc.py with token and prevSamples $1."
77
# download the data and then create sample sheet
8-
python 01-samples-crc.py --download --samples --token $SYNAPSE_AUTH_TOKEN --prevSamples $1
8+
python3 01-samples-crc.py --download --samples --token $SYNAPSE_AUTH_TOKEN --prevSamples $1
Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
pandas
22
numpy==1.23
3-
os
4-
gzip
3+
#os
4+
#gzip
55
requests
66
argparse
77
synapseclient
8-
math
9-
time
10-
threading
11-
signal
8+
#math
9+
#time
10+
#threading
11+
#signal
1212
rdkit
1313
mordred
14-
multiprocessing
14+
#multiprocessing
1515
tqdm
16-
itertools
17-
scikit-learn
16+
#itertools
17+
scikit-learn
18+
openpyxl

0 commit comments

Comments
 (0)