Skip to content

Commit 0c48089

Browse files
corrections for build
1 parent b7dcd6b commit 0c48089

5 files changed

Lines changed: 15 additions & 22 deletions

File tree

build/docker/Dockerfile.liverpdo

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,6 @@ ENV MPLCONFIGDIR=/app/tmp/matplotlib
3838
RUN mkdir -p /app/tmp/matplotlib
3939

4040

41-
ADD build/liverpdo/requirements.R .
42-
# installing r libraries
43-
RUN Rscript requirements.R
44-
4541

4642

4743
# installing python libraries
@@ -50,11 +46,9 @@ ADD build/liverpdo/requirements.txt .
5046
RUN pip3 install -r requirements.txt
5147

5248
RUN python3 --version
53-
RUN which Rscript
5449

5550
#ENV PATH="/opt/venv/bin:$PATH"
5651

57-
ADD build/liverpdo/CNV-segfile-annotation.R ./
5852
ADD build/liverpdo/*py ./
5953
ADD build/liverpdo/*sh ./
6054

build/liverpdo/01-samples-liverpdo.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def generate_sample_file(samples_data_path:str = None, prev_samples_path:str = "
107107

108108
parser.add_argument('-D', '--download',action='store_true', default=False, help='Download RNA seq and sequencing data from GEO and supplemental materials from https://www.cell.com/cell/fulltext/S0092-8674(15)00373-6#mmc2')
109109
parser.add_argument('-t', '--token', type=str, default=None, help='Synapse Token')
110-
parser.add_argument('-i', '--synapseID', type=str, default="syn64961953", help='SynapseID of data to download')
110+
parser.add_argument('-i', '--synapseID', type=str, default="syn66593307", help='SynapseID of data to download')
111111

112112
parser.add_argument('-s', '--samples', action = 'store_true', help='Only generate samples, requires previous samples',default=False)
113113
parser.add_argument('-p', '--prevSamples', nargs='?',type=str, default='', const='', help='Use this to provide previous sample file')
@@ -128,10 +128,10 @@ def generate_sample_file(samples_data_path:str = None, prev_samples_path:str = "
128128
if args.samples:
129129
if args.prevSamples is None or args.prevSamples=='':
130130
print("No previous samples file provided. Starting improve_sample_id from 1. Running sample file generation")
131-
sample_sheet = generate_sample_file(sequencing_data_path = samples_download_path)
131+
sample_sheet = generate_sample_file(samples_data_path = samples_download_path)
132132
else:
133133
print("Previous sample sheet {} detected. Running sample file generation and checking for duplicate IDs.".format(args.prevSamples))
134-
sample_sheet = generate_sample_file(sequencing_data_path = samples_download_path, prev_samples_path= args.prevSamples)
134+
sample_sheet = generate_sample_file(samples_data_path = samples_download_path, prev_samples_path= args.prevSamples)
135135
sample_sheet.to_csv("/tmp/liverpdo_samples.csv", index=False)
136136

137137

build/liverpdo/02-omics-liverpdo.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ def map_transcriptomics(transciptomics_data, improve_id_data, entrez_data):
315315
exit()
316316
else:
317317
print("Starting transcriptomics data.")
318-
transcriptomics_df = map_transcriptomics(transciptomics_data = "/tmp/raw_rnaseq_data.csv", improve_id_data = "/tmp/crcpdo_samples.csv", entrez_data = "/tmp/genes.csv")
318+
transcriptomics_df = map_transcriptomics(transciptomics_data = "/tmp/raw_rnaseq_data.csv", improve_id_data = "/tmp/liverpdo_samples.csv", entrez_data = "/tmp/genes.csv")
319319
transcriptomics_df.to_csv("/tmp/liverpdo_transcriptomics.csv", index=False)
320320

321321
if args.mutations:
@@ -327,7 +327,7 @@ def map_transcriptomics(transciptomics_data, improve_id_data, entrez_data):
327327
exit()
328328
else:
329329
print("Starting mutations data.")
330-
mutation_df = map_mutations(mutation_data = "/tmp/raw_mutation_data.csv", improve_id_data = "/tmp/crcpdo_samples.csv", entrez_data = "/tmp/genes.csv")
330+
mutation_df = map_mutations(mutation_data = "/tmp/raw_mutation_data.csv", improve_id_data = "/tmp/liverpdo_samples.csv", entrez_data = "/tmp/genes.csv")
331331
mutation_df.to_csv("/tmp/liverpdo_mutations.csv", index=False)
332332

333333
if args.copy_number:
@@ -339,6 +339,6 @@ def map_transcriptomics(transciptomics_data, improve_id_data, entrez_data):
339339
exit()
340340
else:
341341
print("Starting copy number data.")
342-
mutation_df = map_copy_number(copy_number_data = "/tmp/raw_copynum_data.csv", improve_id_data = "/tmp/crcpdo_samples.csv", entrez_data = "/tmp/genes.csv")
342+
mutation_df = map_copy_number(copy_number_data = "/tmp/raw_copynum_data.csv", improve_id_data = "/tmp/liverpdo_samples.csv", entrez_data = "/tmp/genes.csv")
343343
mutation_df.to_csv("/tmp/liverpdo_copy_number.csv", index=False)
344344

build/liverpdo/03-drug-liverpdo.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,8 @@ def download_parse_drug_data(synID:str , save_path:str = None, synToken:str = No
4646

4747
# Get the path to the local copy of the data file
4848
drugs_filepath = downloaded_data.path
49-
50-
# Parse the downloaded excel file
51-
drugs_excel = pd.ExcelFile(open(drugs_filepath, 'rb'))
52-
drugs_data = pd.read_excel(drugs_excel)
53-
drugs_data.to_csv("/tmp/raw_druginfo.csv")
5449

55-
return(drugs_data)
50+
return(drugs_filepath)
5651

5752

5853
def create_liverpdo_drug_data(drug_info_path:str, prevDrugFilepath:str, output_drug_data_path:str):
@@ -94,17 +89,20 @@ def create_liverpdo_drug_data(drug_info_path:str, prevDrugFilepath:str, output_d
9489

9590
if args.Download:
9691
if args.Token is None:
97-
print("No synpase download tocken was provided. Cannot download data.")
92+
print("No synpase download token was provided. Cannot download data.")
9893
exit()
9994
else:
10095
print("Downloading Files from Synapse.")
10196
# download fitted and raw drug data from synapse
10297
fitted_drug_data_path = download_parse_drug_data(synID = "syn66401300", save_path = "/tmp/", synToken = args.Token)
98+
drug_excel = pd.ExcelFile(open(fitted_drug_data_path, 'rb'))
99+
druginfo_df = pd.read_excel(drug_excel)
100+
druginfo_df.to_csv("/tmp/raw_druginfo.csv")
103101
if args.Drug:
104102
if args.PrevDrugs is None or args.PrevDrugs=='':
105103
print("No previous drugs file provided. Starting improve_drug_id from SMI_1. Running drug file generation")
106-
create_liverpdo_drug_data(fitted_drug_data_path = "/tmp/raw_druginfo.csv", output_drug_data_path = "/tmp/liverpdo_drugs.tsv", prevDrugFilepath = "")
104+
create_liverpdo_drug_data(drug_info_path = "/tmp/raw_druginfo.csv", output_drug_data_path = "/tmp/liverpdo_drugs.tsv", prevDrugFilepath = "")
107105
else:
108106
print("Previous drugs file {} detected. Running drugs file generation and checking for duplicate IDs.".format(args.PrevDrugs))
109-
create_liverpdo_drug_data(fitted_drug_data_path = "/tmp/raw_druginfo.csv", prevDrugFilepath = args.PrevDrugs, output_drug_data_path = "/tmp/liverpdo_drugs.tsv")
107+
create_liverpdo_drug_data(drug_info_path = "/tmp/raw_druginfo.csv", prevDrugFilepath = args.PrevDrugs, output_drug_data_path = "/tmp/liverpdo_drugs.tsv")
110108

build/liverpdo/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@ mordred
1616
tqdm
1717
#itertools
1818
scikit-learn
19-
openpyxl
19+
openpyxl
20+
mygene

0 commit comments

Comments
 (0)