Skip to content

Commit 3c1e18b

Browse files
committed
fixed outfolder & added passing of data_sets to merge_master_tables
1 parent 6ab6a59 commit 3c1e18b

1 file changed

Lines changed: 7 additions & 14 deletions

File tree

scripts/prepare_data_for_improve.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ def process_datasets(args):
323323

324324
merged_transcriptomics = merge_master_tables(
325325
args=args,
326+
data_sets=data_sets,
326327
data_type='transcriptomics'
327328
)
328329

@@ -358,7 +359,7 @@ def process_datasets(args):
358359
# writing the expression datatable to '/x_data/*_expression.tsv'
359360
outfile_path = args.WORKDIR.joinpath(
360361
"data_out",
361-
"y_data",
362+
"x_data",
362363
"cancer_gene_expression.tsv"
363364
)
364365
merged_transcriptomics.transpose().to_csv(
@@ -377,7 +378,7 @@ def process_datasets(args):
377378
# join the "meta data tables" like copynumber etc.
378379

379380

380-
def merge_master_tables(args, data_type: str='transcriptomics'):
381+
def merge_master_tables(args, data_sets, data_type: str='transcriptomics'):
381382
"""
382383
Helper function to merge several DataTables into one master table
383384
@@ -394,22 +395,14 @@ def merge_master_tables(args, data_type: str='transcriptomics'):
394395
_description_
395396
"""
396397

397-
local_path = args.WORKDIR.joinpath('data_in_tmp')
398-
399-
# getting the info which datasets are available
400-
data_sets_info = cd.list_datasets(raw=True)
401-
402-
# loading all available datasets into a dict where the dataset name
403-
# is the key
404-
data_sets = {}
405-
for data_set in data_sets_info.keys():
406-
data_sets[data_set] = cd.load(name=data_set, local_path=local_path)
407-
408398
# creating a list that contains all DataFrames to be merged
409399
dfs_to_merge = []
410400
for data_set in data_sets:
411401
if data_sets[data_set].experiments is not None:
412-
if data_type in ['transcriptomics', 'copy_number']:
402+
if (
403+
data_type in ['transcriptomics', 'copy_number'] and
404+
getattr(data_sets[data_set], data_type, None) is not None
405+
):
413406
dfs_to_merge.append(
414407
data_sets[data_set].format(data_type=data_type)
415408
)

0 commit comments

Comments
 (0)