Skip to content

Commit 0616d11

Browse files
committed
added new scripts to pull data
1 parent b1effa1 commit 0616d11

4 files changed

Lines changed: 36 additions & 3 deletions

File tree

build/build_dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def process_omics(executor, dataset, should_continue):
124124
'broad_sanger': ['copy_number', 'mutations', 'proteomics', 'transcriptomics'],
125125
'cptac': ['copy_number', 'mutations', 'proteomics', 'transcriptomics'],
126126
'hcmi': ['mutations', 'transcriptomics'],
127-
'pancpdo': ['mutations', 'transcriptomics'],
127+
'pancpdo': ['transcriptomics'],
128128
'mpnstpdx':['copy_number', 'mutations', 'proteomics', 'transcriptomics']
129129
}
130130

build/pancpdo/02-getPancPDOData.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -576,9 +576,9 @@ def write_dataframe_to_csv(dataframe, outname):
576576
None
577577
"""
578578
if('gz' in outname):
579-
dataframe.to_pandas().to_csv(outname,compression='gzip',index=False)
579+
dataframe.to_pandas().drop_duplicates().to_csv(outname,compression='gzip',index=False)
580580
else:
581-
dataframe.to_pandas().to_csv(outname,index=False)
581+
dataframe.to_pandas().drop_duplicates().to_csv(outname,index=False)
582582
return
583583

584584
def main():
@@ -682,6 +682,7 @@ def main():
682682
print("Aligning to Schema")
683683
final_data = align_to_schema(combined_data,args.type,7500,args.samples)
684684
gc.collect()
685+
685686
combined_data = None
686687

687688
print(f"final data:\n{final_data}")
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import pandas as pd
2+
import os
3+
import argparse
4+
5+
6+
7+
8+
###figshare link:
9+
10+
filelink='https://aacr.figshare.com/ndownloader/files/39996295'
11+
##get third tab and drugsa re listeda cross top
12+
13+
def main():
14+
parser = argparse.ArgumentParser(description='Download and match pancpdocdrugs')
15+
parser.add_argument('-d', '--prevDrugFile')
16+
parser.add_argument('-o', '--output', default = '/tmp/panpdc_drugs.tsv')
17+
18+
19+
20+
if __name__=='__main__':
21+
main()
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
2+
import os
3+
import pandas as pd
4+
import wget
5+
import argparse
6+
7+
8+
9+
def main():
10+
##current AUC values are here: https://aacr.figshare.com/ndownloader/files/39996295 tabs 2 and 3
11+
parser = argparse.ArgumentParser()

0 commit comments

Comments
 (0)