Skip to content

Commit 5123588

Browse files
PGijsbersmfeurer
authored andcommitted
Fix check_datasets_active and corresponding unit test (#642)
* Now use different did for active, as d/1 is deactivated. Test against production server as test server does not have deactivated datasets. * Fix that reflects dataset_list has integer keys (and can not be indexed). Fix retrieving all datasets instead of only active ones. Add documentation. * Refactored to have a single use of 'active' and forgo many excessive checks on datasets that were not asked for. * Remove spaces from empty like (flake error). * Removed unused import. * PEP8
1 parent aecb6ac commit 5123588

2 files changed

Lines changed: 15 additions & 18 deletions

File tree

openml/datasets/functions.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -279,32 +279,28 @@ def _load_features_from_file(features_file: str) -> Dict:
279279
return xml_dict["oml:data_features"]
280280

281281

282-
def check_datasets_active(dataset_ids):
283-
"""Check if the dataset ids provided are active.
282+
def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
283+
""" Check if the dataset ids provided are active.
284284
285285
Parameters
286286
----------
287-
dataset_ids : iterable
288-
Integers representing dataset ids.
287+
dataset_ids : List[int]
288+
A list of integers representing dataset ids.
289289
290290
Returns
291291
-------
292292
dict
293293
A dictionary with items {did: bool}
294294
"""
295-
dataset_list = list_datasets()
296-
dataset_ids = sorted(dataset_ids)
295+
dataset_list = list_datasets(status='all')
297296
active = {}
298297

299-
for dataset in dataset_list:
300-
active[dataset['did']] = dataset['status'] == 'active'
301-
302298
for did in dataset_ids:
303-
if did not in active:
304-
raise ValueError('Could not find dataset {} in '
305-
'OpenML dataset list.'.format(did))
306-
307-
active = {did: active[did] for did in dataset_ids}
299+
dataset = dataset_list.get(did, None)
300+
if dataset is None:
301+
raise ValueError('Could not find dataset {} in OpenML dataset list.'.format(did))
302+
else:
303+
active[did] = (dataset['status'] == 'active')
308304

309305
return active
310306

tests/test_datasets/test_dataset_functions.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import unittest
21
import os
32
import random
43
from itertools import product
@@ -206,17 +205,19 @@ def test_list_datasets_empty(self):
206205

207206
self.assertIsInstance(datasets, dict)
208207

209-
@unittest.skip('See https://github.com/openml/openml-python/issues/149')
210208
def test_check_datasets_active(self):
211-
active = openml.datasets.check_datasets_active([1, 17])
212-
self.assertTrue(active[1])
209+
# Have to test on live because there is no deactivated dataset on the test server.
210+
openml.config.server = self.production_server
211+
active = openml.datasets.check_datasets_active([2, 17])
212+
self.assertTrue(active[2])
213213
self.assertFalse(active[17])
214214
self.assertRaisesRegex(
215215
ValueError,
216216
'Could not find dataset 79 in OpenML dataset list.',
217217
openml.datasets.check_datasets_active,
218218
[79],
219219
)
220+
openml.config.server = self.test_server
220221

221222
def test_get_datasets(self):
222223
dids = [1, 2]

0 commit comments

Comments
 (0)