Skip to content

Commit c35ec76

Browse files
created get omics data function
1 parent d74ac3c commit c35ec76

1 file changed

Lines changed: 44 additions & 0 deletions

File tree

build/novartispdx/02-omics-novartispdx.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,47 @@
44
import math
55
import argparse
66

7+
def download_parse_omics_novPDX(synID:str , save_path:str = None, synToken:str = None):
8+
"""
9+
Download omics data from Synapse at synapseID syn66364488. Requires a synapse token, which requires you to make a Synapse account
10+
and create a Personal Access Token. More information here: https://help.synapse.org/docs/Managing-Your-Account.2055405596.html#ManagingYourAccount-PersonalAccessTokens
11+
Omics data is an excel file. The excel file is then parsed for the RNAseq, copy number, and mutations data.
12+
13+
Parameters
14+
----------
15+
synID : string
16+
SynapseID of dataset to download. Default is synapseID of the sequencing dataset.
17+
18+
save_path : string
19+
Local path where the downloaded file will be saved.
20+
21+
synToken : string
22+
Synapse Personal Access Token of user. Requires a Synapse account. More information at: https://help.synapse.org/docs/Managing-Your-Account.2055405596.html#ManagingYourAccount-PersonalAccessTokens
23+
24+
Returns
25+
-------
26+
mutations_data : pd.DataFrame
27+
A DataFrame containing mutations data.
28+
29+
copy_number_data : pd.DataFrame
30+
A DataFrame containing copy number data.
31+
32+
rnaseq_data : pd.DataFrame
33+
A DataFrame containing RNAseq data.
34+
"""
35+
36+
syn = synapseclient.Synapse()
37+
syn.login(authToken=synToken)
38+
39+
# Obtain a pointer and download the data
40+
syn66364488 = syn.get(entity=synID, downloadLocation = save_path)
41+
42+
# Get the path to the local copy of the data file
43+
sequencing_filepath = syn66364488.path
44+
all_omics_excel = pd.ExcelFile(open(sequencing_filepath, 'rb'))
45+
mutations_data = pd.read_excel(all_omics_excel, 'pdxe_mut_and_cn2') # table with somatic mutation information
46+
copy_number_data = pd.read_excel(all_omics_excel, 'copy number') # table with copy number information
47+
rnaseq_data = pd.read_excel(all_omics_excel, 'RNAseq_fpkm')
48+
49+
50+
return(rnaseq_data, copy_number_data, mutations_data)

0 commit comments

Comments
 (0)