@@ -323,6 +323,7 @@ def process_datasets(args):
323323
324324 merged_transcriptomics = merge_master_tables (
325325 args = args ,
326+ data_sets = data_sets ,
326327 data_type = 'transcriptomics'
327328 )
328329
@@ -358,7 +359,7 @@ def process_datasets(args):
358359 # writing the expression datatable to '/x_data/*_expression.tsv'
359360 outfile_path = args .WORKDIR .joinpath (
360361 "data_out" ,
361- "y_data " ,
362+ "x_data " ,
362363 "cancer_gene_expression.tsv"
363364 )
364365 merged_transcriptomics .transpose ().to_csv (
@@ -377,7 +378,7 @@ def process_datasets(args):
377378 # join the "meta data tables" like copynumber etc.
378379
379380
380- def merge_master_tables (args , data_type : str = 'transcriptomics' ):
381+ def merge_master_tables (args , data_sets , data_type : str = 'transcriptomics' ):
381382 """
382383 Helper function to merge several DataTables into one master table
383384
@@ -394,22 +395,14 @@ def merge_master_tables(args, data_type: str='transcriptomics'):
394395 _description_
395396 """
396397
397- local_path = args .WORKDIR .joinpath ('data_in_tmp' )
398-
399- # getting the info which datasets are available
400- data_sets_info = cd .list_datasets (raw = True )
401-
402- # loading all available datasets into a dict where the dataset name
403- # is the key
404- data_sets = {}
405- for data_set in data_sets_info .keys ():
406- data_sets [data_set ] = cd .load (name = data_set , local_path = local_path )
407-
408398 # creating a list that contains all DataFrames to be merged
409399 dfs_to_merge = []
410400 for data_set in data_sets :
411401 if data_sets [data_set ].experiments is not None :
412- if data_type in ['transcriptomics' , 'copy_number' ]:
402+ if (
403+ data_type in ['transcriptomics' , 'copy_number' ] and
404+ getattr (data_sets [data_set ], data_type , None ) is not None
405+ ):
413406 dfs_to_merge .append (
414407 data_sets [data_set ].format (data_type = data_type )
415408 )
0 commit comments