File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -533,12 +533,30 @@ def load(
533533 setattr (dataset , dataset_type , _load_file (file ))
534534 print ("DONE" , file = sys .stderr )
535535 else :
536+ '''
537+ The genes dataset available in the online repository is
538+ universal and contains information on genes of all
539+ datasets. To that end it needs to be subsetted to only
540+ those genes that are associate with a specific cancer
541+ dataset.
542+ '''
536543 print (
537544 f"Importing 'genes' from { file } ..." ,
538545 end = ' ' ,
539546 file = sys .stderr
540547 )
541548 dataset .genes = _load_file (file )
549+
550+ entrez_ids = set ()
551+ for dataset_type in ('transcriptomics' , 'proteomics' ,
552+ 'mutations' , 'copy_number' ):
553+ if getattr (dataset , dataset_type ) is not None :
554+ entrez_ids .update (list (
555+ getattr (dataset , dataset_type )['entrez_id' ].unique ()
556+ ))
557+ dataset .genes = dataset .genes [
558+ dataset .genes ['entrez_id' ].isin (entrez_ids )
559+ ]
542560 print ("DONE" , file = sys .stderr )
543561
544562 print (f"Importing raw data ... DONE" , file = sys .stderr )
You can’t perform that action at this time.
0 commit comments