Skip to content

Commit 6729ec6

Browse files
adding main to omics
1 parent 08c2f2f commit 6729ec6

2 files changed

Lines changed: 63 additions & 12 deletions

File tree

build/crcpdo/02-omics-crcpdo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def map_copy_number(copy_number_data, improve_id_data, entrez_data):
209209
parser = argparse.ArgumentParser(description='###')
210210

211211
# arguments for file paths
212-
parser.add_argument('-g', '--genes', type=str, default=None, help='Path to transcriptomics genes.csv. Can be obtained using this docker container: https://github.com/PNNL-CompBio/coderdata/blob/0225c52b861dcd6902521228731c54a61768bcd6/build/genes/README.md#L4')
212+
parser.add_argument('-g', '--genes', type=str, default=None, help='Path to genes.csv. Can be obtained using this docker container: https://github.com/PNNL-CompBio/coderdata/blob/0225c52b861dcd6902521228731c54a61768bcd6/build/genes/README.md#L4')
213213
parser.add_argument('-i', '--ids', type=str, default=None, help='Path to sample Ids')
214214

215215
# arguments for what data to process

build/novartispdx/02-omics-novartispdx.py

Lines changed: 62 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,11 @@ def download_parse_omics_novPDX(synID:str , save_path:str = None, synToken:str =
6464
syn.login(authToken=synToken)
6565

6666
# Obtain a pointer and download the data
67-
syn66364488 = syn.get(entity=synID, downloadLocation = save_path)
67+
all_omics_data = syn.get(entity=synID, downloadLocation = save_path)
6868

6969
# Get the path to the local copy of the data file
70-
sequencing_filepath = syn66364488.path
71-
all_omics_excel = pd.ExcelFile(open(sequencing_filepath, 'rb'))
70+
all_omics_data_path = all_omics_data.path
71+
all_omics_excel = pd.ExcelFile(open(all_omics_data_path, 'rb'))
7272
mutations_data = pd.read_excel(all_omics_excel, 'pdxe_mut_and_cn2') # table with somatic mutation information
7373
copy_number_data = pd.read_excel(all_omics_excel, 'copy number') # table with copy number information
7474
rnaseq_data = pd.read_excel(all_omics_excel, 'RNAseq_fpkm')
@@ -140,7 +140,7 @@ def map_transcriptomics_novPDX(transcriptomics_data, improve_id_data, entrez_dat
140140
141141
Parameters
142142
----------
143-
copy_number_data : pd.Dataframe OR string
143+
transcriptomics_data : pd.Dataframe OR string
144144
Pandas dataframe object with transcriptomics data OR path to csv with transcriptomics data
145145
146146
improve_id_data : pd.Dataframe OR string
@@ -197,16 +197,67 @@ def map_transcriptomics_novPDX(transcriptomics_data, improve_id_data, entrez_dat
197197
if __name__ == "__main__":
198198
print('in main')
199199
parser = argparse.ArgumentParser(description="This script handles downloading, processing and formatting of omics data files for the Bladder PDO project")
200-
parser.add_argument('-s', '--samples', help='Path to sample file',default=None)
201-
parser.add_argument('-g', '--genes', help='Path to genes file', default = None)
202-
parser.add_argument('-c', '--copy', help='Flag to capture copy number data', action='store_true', default=False)
203-
parser.add_argument('-m', '--mutation', help='Flag to capture mutation data', action='store_true', default=False)
204-
parser.add_argument('-e', '--expression', help='Flag to capture transcriptomic data', action='store_true', default=False)
200+
201+
# filepath and token args
202+
parser.add_argument('-s', '--samples', help='Path to improve sample file',default=None)
203+
parser.add_argument('-g', '--genes', help='Path to genes.csv. Can be obtained using this docker container: https://github.com/PNNL-CompBio/coderdata/blob/0225c52b861dcd6902521228731c54a61768bcd6/build/genes/README.md#L4', default = None)
205204
parser.add_argument('-t', '--token', help='Synapse token')
206205

206+
# args for what data to process
207+
parser.add_argument('-D', '--download', action = 'store_true', default=False, help='Download excel files with omics data')
208+
parser.add_argument('-c', '--copy_number', help='Flag to capture copy number data', action='store_true', default=False)
209+
parser.add_argument('-m', '--mutations', help='Flag to capture mutation data', action='store_true', default=False)
210+
parser.add_argument('-e', '--transcriptomics', help='Flag to capture transcriptomic data', action='store_true', default=False)
211+
207212
args = parser.parse_args()
208-
print("Logging into Synapse")
209-
PAT = args.token
213+
214+
###########################
215+
216+
if args.download:
217+
print("Parsing excel file.")
218+
# Download parse excel file to get mutation data and the copy num data
219+
mutation_df, copy_num_df, rnaseq_df = download_parse_omics_novPDX(synID="syn66477971", save_path="/tmp/", synToken=args.token)
220+
# Save mutation and copy number data into csv format
221+
mutation_df.to_csv("/tmp/mutation_data.csv")
222+
copy_num_df.to_csv("/tmp/copy_num_data.csv")
223+
rnaseq_df.to_csv("/tmp/rnaseq_data.csv")
224+
225+
if args.transcriptomics:
226+
if args.genes is None or args.genes=='':
227+
print("No genes data provided. Exiting script.")
228+
exit()
229+
if args.ids is None or args.ids=='':
230+
print("No samples data provided. Exiting script.")
231+
exit()
232+
else:
233+
print("Starting transcriptomics data.")
234+
transcriptomics_df = map_transcriptomics_novPDX(transciptomics_data = "/tmp/rnaseq_data.csv", improve_id_data = "/tmp/novartispdx_samples.csv", entrez_data = "/tmp/genes.csv")
235+
transcriptomics_df.to_csv("/tmp/crcpdo_transcriptomics.csv", index=False)
236+
237+
if args.mutations:
238+
if args.genes is None or args.genes=='':
239+
print("No genes data provided. Exiting script.")
240+
exit()
241+
if args.ids is None or args.ids=='':
242+
print("No samples data provided. Exiting script.")
243+
exit()
244+
else:
245+
print("Starting mutations data.")
246+
mutation_df = map_mutations(mutation_data = "/tmp/mutation_data.csv", improve_id_data = "/tmp/novartispdx_samples.csv", entrez_data = "/tmp/genes.csv")
247+
mutation_df.to_csv("/tmp/crcpdo_mutations.csv", index=False)
248+
249+
if args.copy_number:
250+
if args.genes is None or args.genes=='':
251+
print("No genes data provided. Exiting script.")
252+
exit()
253+
if args.ids is None or args.ids=='':
254+
print("No samples data provided. Exiting script.")
255+
exit()
256+
else:
257+
print("Starting copy number data.")
258+
mutation_df = map_copy_number_novPDX(copy_number_data = "/tmp/copy_num_data.csv", improve_id_data = "/tmp/novartispdx_samples.csv", entrez_data = "/tmp/genes.csv")
259+
mutation_df.to_csv("/tmp/crcpdo_copy_number.csv", index=False)
260+
210261

211262
genes=pd.read_csv(args.genes)
212263
samples = pd.read_csv(args.samples)

0 commit comments

Comments
 (0)