Skip to content

Commit 286ce3a

Browse files
committed
slight refactoring of file handeling in Dataset.load() in preparation for gene table fix
1 parent 47e9df8 commit 286ce3a

1 file changed

Lines changed: 45 additions & 15 deletions

File tree

coderdata/dataset/dataset.py

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,21 @@ def load(
470470
_description_
471471
"""
472472

473+
data_types_to_load = (
474+
'transcriptomics',
475+
'proteomics',
476+
'mutations',
477+
'copy_number',
478+
'samples',
479+
'drugs',
480+
'drug_descriptors',
481+
'mirna',
482+
'experiments',
483+
'methylation',
484+
'metabolomics',
485+
'genes',
486+
)
487+
473488
if type(local_path) is not Path:
474489
try:
475490
local_path = Path(local_path)
@@ -487,30 +502,45 @@ def load(
487502
dataset = Dataset(name)
488503
accepted_file_endings = ('.csv', '.tsv', '.csv.gz', '.tsv.gz')
489504
print(f"Importing raw data ...", file=sys.stderr)
490-
for child in local_path.iterdir():
491-
if child.name in ["genes.csv", "genes.csv.gz"]:
505+
506+
# generating the file list that contains all files that need to
507+
# be imported based on the Dataset name
508+
files = {}
509+
for p in local_path.glob(f'{name}_*'):
510+
if p.name.endswith(accepted_file_endings) and p.is_file():
511+
dataset_type = p.name[len(name)+1:].split('.')[0]
512+
files[dataset_type] = p
513+
for p in local_path.glob(f'genes*'):
514+
if p.name.endswith(accepted_file_endings) and p.is_file():
515+
files['genes'] = p
516+
517+
for dataset_type in data_types_to_load:
518+
if dataset_type not in files:
492519
print(
493-
f"Importing 'genes' from {child} ...",
494-
end=' ',
520+
f"'{dataset_type}' not available for {name}",
521+
end='\n',
495522
file=sys.stderr
496523
)
497-
dataset.genes = _load_file(child)
498-
print("DONE", file=sys.stderr)
499-
500-
if (
501-
child.name.startswith(name)
502-
and child.name.endswith(accepted_file_endings)
503-
):
504-
505-
dataset_type = child.name[len(name)+1:].split('.')[0]
524+
continue
525+
file = files[dataset_type]
526+
if dataset_type != 'genes':
506527
print(
507-
f"Importing '{dataset_type}' from {child} ...",
528+
f"Importing '{dataset_type}' from {file} ...",
508529
end=' ',
509530
file=sys.stderr
510531
)
511532
if hasattr(dataset, dataset_type):
512-
setattr(dataset, dataset_type, _load_file(child))
533+
setattr(dataset, dataset_type, _load_file(file))
513534
print("DONE", file=sys.stderr)
535+
else:
536+
print(
537+
f"Importing 'genes' from {file} ...",
538+
end=' ',
539+
file=sys.stderr
540+
)
541+
dataset.genes = _load_file(file)
542+
print("DONE", file=sys.stderr)
543+
514544
print(f"Importing raw data ... DONE", file=sys.stderr)
515545
return dataset
516546

0 commit comments

Comments
 (0)