Skip to content

Commit 1b16b82

Browse files
committed
added helper function to list all available datasets
1 parent 9862154 commit 1b16b82

4 files changed

Lines changed: 55 additions & 3 deletions

File tree

coderdata/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,5 @@
1717
except ImportError:
1818
__version_tuple__ = (0, 1, 40)
1919

20-
from .utils.utils import version
20+
from .utils.utils import version
21+
from .utils.utils import list_datasets

coderdata/datasets.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
datasets:
2+
beataml:
3+
description: "Beat acute myeloid leukemia (BeatAML) focuses on acute myeloid leukemia tumor data. Data includes drug response, proteomics, and transcriptomics datasets."
4+
doi: "https://doi.org/10.1016/j.ccell.2022.07.002"
5+
cptac:
6+
description: "The Clinical Proteomic Tumor Analysis Consortium (CPTAC) project is a collaborative network funded by the National Cancer Institute (NCI) focused on improving our understanding of cancer biology through the integration of transcriptomic, proteomic, and genomic data."
7+
hcmi:
8+
description: "Human Cancer Models Initiative (HCMI) encompasses numerous cancer types and includes cell line, organoid, and tumor data. Data includes the transcriptomics, somatic mutation, and copy number datasets."
9+
mpnst:
10+
description: "Malignant Peripheral Nerve Sheath Tumor is a rare, agressive sarcoma that affects peripheral nerves throughout the body."

coderdata/utils/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
from .utils import version
1+
from .utils import version
2+
from .utils import list_datasets

coderdata/utils/utils.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,53 @@
11
"""
22
Collection of small utility and helper functions.
33
"""
4+
5+
from importlib import resources
6+
import yaml
7+
48
from .. import __version__
59
from .. import __version_tuple__
610

7-
def version():
11+
12+
def version() -> dict:
13+
"""
14+
Helper function that returns the version strings for the package and
15+
the dataset build.
16+
17+
Returns
18+
-------
19+
dict
20+
Contains package and dataset build version.
21+
"""
822
return {
923
'package' : __version__,
1024
'dataset' : f"{__version_tuple__[0]}.{__version_tuple__[1]}"
1125
}
1226

1327

28+
def list_datasets(raw: bool=False) -> dict | None:
29+
"""
30+
Hepler function that returns a list of available datasets including
31+
a short description and additional information available.
32+
33+
Parameters
34+
----------
35+
raw : bool, default=False
36+
If set to True returns a yaml dictionary containing all
37+
available datasets including additional information. If set to
38+
false prints information to stdout and returns None.
39+
40+
Returns
41+
-------
42+
dict | None
43+
Returns a dict containing the information if ``raw==True``,
44+
otherwise prints information to stdout and returns `None`.
45+
"""
46+
with resources.open_text('coderdata', 'datasets.yml') as f:
47+
datasets = yaml.load(f, Loader=yaml.FullLoader)
48+
if raw:
49+
return datasets
50+
else:
51+
datasets = datasets['datasets']
52+
for dataset in datasets:
53+
print(f'{dataset}: "{datasets[dataset]['description']}"')

0 commit comments

Comments
 (0)