Skip to content

Commit 727e086

Browse files
committed
updated gen_stats.py to work with coderdata>=2.0
1 parent 0333956 commit 727e086

1 file changed

Lines changed: 8 additions & 8 deletions

File tree

scripts/gen_stats.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
1-
import coderdata.load.DatasetLoader as cd
1+
import coderdata as cd
22
import yaml
33

44
class DatasetStatistics:
55
def __init__(self, dataset_type):
6-
self.dataset_loader = cd.DatasetLoader(dataset_type)
6+
self.data = cd.load(dataset_type)
77

88
def count_unique(self, attribute, unique_field):
9-
if hasattr(self.dataset_loader, attribute):
10-
dataset = getattr(self.dataset_loader, attribute)
9+
if getattr(self.data, attribute) is not None:
10+
dataset = getattr(self.data, attribute)
1111
if unique_field in dataset.columns:
1212
return len(dataset[unique_field].unique())
1313
return 0
1414

1515
def count_unique_genes(self):
1616
gene_ids = set()
17-
for data_type in ['transcriptomics', 'proteomics', 'mutations', 'copy_number', 'methylation']:
18-
if hasattr(self.dataset_loader, data_type):
19-
dataset = getattr(self.dataset_loader, data_type)
17+
for data_type in ['transcriptomics', 'proteomics', 'mutations', 'copy_number']:
18+
if getattr(self.data, data_type) is not None:
19+
dataset = getattr(self.data, data_type)
2020
if 'entrez_id' in dataset.columns:
2121
gene_ids.update(dataset.entrez_id.unique().tolist())
2222
return len(gene_ids)
@@ -41,5 +41,5 @@ def calculate_stats_for_datasets(dataset_types):
4141
yaml.dump(stats, file)
4242

4343
# Dataset types
44-
dataset_types = ['broad_sanger', 'cptac', 'beataml', 'hcmi','mpnst']
44+
dataset_types = cd.list_datasets(raw=True).keys()
4545
calculate_stats_for_datasets(dataset_types)

0 commit comments

Comments
 (0)