Skip to content

Commit 98a73b3

Browse files
authored
Merge pull request #633 from openml/add_#632
Add #632
2 parents 0980673 + 4f60c25 commit 98a73b3

5 files changed

Lines changed: 115 additions & 7 deletions

File tree

openml/study/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from .study import OpenMLStudy
22
from .functions import get_study, create_study, create_benchmark_suite, \
3-
status_update, attach_to_study, detach_from_study, delete_study
3+
status_update, attach_to_study, detach_from_study, delete_study, \
4+
list_studies
45

56

67
__all__ = [
78
'OpenMLStudy', 'attach_to_study', 'create_benchmark_suite', 'create_study',
8-
'delete_study', 'detach_from_study', 'get_study', 'status_update',
9+
'delete_study', 'detach_from_study', 'get_study', 'list_studies',
10+
'status_update'
911
]

openml/study/functions.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,3 +278,105 @@ def detach_from_study(study_id, entity_ids):
278278
post_variables)
279279
result = xmltodict.parse(result_xml)['oml:study_detach']
280280
return int(result['oml:linked_entities'])
281+
282+
283+
def list_studies(offset=None, size=None, main_entity_type=None, status=None,
284+
uploader=None, benchmark_suite=None):
285+
"""
286+
Return a list of all studies which are on OpenML.
287+
288+
Parameters
289+
----------
290+
offset : int, optional
291+
The number of studies to skip, starting from the first.
292+
size : int, optional
293+
The maximum number of studies to show.
294+
main_entity_type : str, optional
295+
Can be ``'task'`` or ``'run'``. In case of `task`, only benchmark
296+
suites are returned. In case of `run`, only studies are returned.
297+
status : str, optional
298+
Should be {active, in_preparation, deactivated, all}. By default active
299+
studies are returned.
300+
uploader : list (int), optional
301+
Result filter. Will only return studies created by these users.
302+
303+
Returns
304+
-------
305+
datasets : dict of dicts
306+
A mapping from dataset ID to dict.
307+
308+
Every dataset is represented by a dictionary containing
309+
the following information:
310+
- id
311+
- alias (optional)
312+
- name
313+
- main_entity_type
314+
- benchmark_suite (optional)
315+
- status
316+
- creator
317+
- creation_date
318+
319+
If qualities are calculated for the dataset, some of
320+
these are also returned.
321+
"""
322+
return openml.utils._list_all(_list_studies,
323+
offset=offset,
324+
size=size,
325+
main_entity_type=main_entity_type,
326+
status=status,
327+
uploader=uploader,
328+
benchmark_suite=benchmark_suite)
329+
330+
331+
def _list_studies(**kwargs):
332+
"""
333+
Perform api call to return a list of studies.
334+
335+
Parameters
336+
----------
337+
kwargs : dict, optional
338+
Legal filter operators (keys in the dict):
339+
status, limit, offset, main_entity_type, uploader
340+
341+
Returns
342+
-------
343+
studies : dict of dicts
344+
"""
345+
api_call = "study/list"
346+
if kwargs is not None:
347+
for operator, value in kwargs.items():
348+
api_call += "/%s/%s" % (operator, value)
349+
return __list_studies(api_call)
350+
351+
352+
def __list_studies(api_call):
353+
xml_string = openml._api_calls._perform_api_call(api_call, 'get')
354+
study_dict = xmltodict.parse(xml_string, force_list=('oml:study',))
355+
356+
# Minimalistic check if the XML is useful
357+
assert type(study_dict['oml:study_list']['oml:study']) == list, \
358+
type(study_dict['oml:study_list'])
359+
assert study_dict['oml:study_list']['@xmlns:oml'] == \
360+
'http://openml.org/openml', study_dict['oml:study_list']['@xmlns:oml']
361+
362+
studies = dict()
363+
for study_ in study_dict['oml:study_list']['oml:study']:
364+
# maps from xml name to a tuple of (dict name, casting fn)
365+
expected_fields = {
366+
'oml:id': ('id', int),
367+
'oml:alias': ('alias', str),
368+
'oml:main_entity_type': ('main_entity_type', str),
369+
'oml:benchmark_suite': ('benchmark_suite', int),
370+
'oml:name': ('name', str),
371+
'oml:status': ('status', str),
372+
'oml:creation_date': ('creation_date', str),
373+
'oml:creator': ('creator', int),
374+
}
375+
study_id = int(study_['oml:id'])
376+
current_study = dict()
377+
for oml_field_name, (real_field_name, cast_fn) in expected_fields.items():
378+
if oml_field_name in study_:
379+
current_study[real_field_name] = cast_fn(study_[oml_field_name])
380+
current_study['id'] = int(current_study['id'])
381+
studies[study_id] = current_study
382+
return studies

openml/study/study.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ def publish(self):
8383
file_elements = {
8484
'description': self._to_xml()
8585
}
86-
8786
return_value = openml._api_calls._perform_api_call(
8887
"study/",
8988
'post',

tests/test_study/test_study_functions.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ def test_get_study(self):
1818
self.assertEqual(len(study.setups), 30)
1919

2020
def test_get_tasks(self):
21-
study_id = 14
21+
study_id = 1
2222

2323
study = openml.study.get_study(study_id, 'tasks')
24-
self.assertGreater(len(study.tasks), 0)
24+
self.assertGreater(len(study.data), 0)
25+
self.assertGreaterEqual(len(study.tasks), len(study.data))
2526
# note that other entities are None, even though this study has
2627
# datasets
27-
self.assertIsNone(study.data)
2828
self.assertIsNone(study.flows)
2929
self.assertIsNone(study.setups)
3030
self.assertIsNone(study.runs)
@@ -159,3 +159,8 @@ def test_study_attach_illegal(self):
159159
openml.study.attach_to_study(study_id, list(run_list_more.keys()))
160160
study_downloaded = openml.study.get_study(study_id)
161161
self.assertListEqual(study_original.runs, study_downloaded.runs)
162+
163+
def test_study_list(self):
164+
study_list = openml.study.list_studies(status='in_preparation')
165+
# might fail if server is recently resetted
166+
self.assertGreater(len(study_list), 2)

tests/test_tasks/test_task_functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def test_list_tasks_empty(self):
7373

7474
def test_list_tasks_by_tag(self):
7575
num_basic_tasks = 100 # number is flexible, check server if fails
76-
tasks = openml.tasks.list_tasks(tag='study_14')
76+
tasks = openml.tasks.list_tasks(tag='OpenML100')
7777
self.assertGreaterEqual(len(tasks), num_basic_tasks)
7878
for tid in tasks:
7979
self._check_task(tasks[tid])

0 commit comments

Comments
 (0)