Merge pull request #633 from openml/add_#632

janvanrijn · web-flow · commit 98a73b338511 · 2019-02-26T15:18:48.000+01:00
Add #632
diff --git a/openml/study/__init__.py b/openml/study/__init__.py
@@ -1,9 +1,11 @@
 from .study import OpenMLStudy
 from .functions import get_study, create_study, create_benchmark_suite, \
-    status_update, attach_to_study, detach_from_study, delete_study
+    status_update, attach_to_study, detach_from_study, delete_study, \
+    list_studies
 
 
 __all__ = [
     'OpenMLStudy', 'attach_to_study', 'create_benchmark_suite', 'create_study',
-    'delete_study', 'detach_from_study', 'get_study', 'status_update',
+    'delete_study', 'detach_from_study', 'get_study', 'list_studies',
+    'status_update'
 ]
diff --git a/openml/study/functions.py b/openml/study/functions.py
@@ -278,3 +278,105 @@ def detach_from_study(study_id, entity_ids):
                                                      post_variables)
     result = xmltodict.parse(result_xml)['oml:study_detach']
     return int(result['oml:linked_entities'])
+
+
+def list_studies(offset=None, size=None, main_entity_type=None, status=None,
+                 uploader=None, benchmark_suite=None):
+    """
+    Return a list of all studies which are on OpenML.
+
+    Parameters
+    ----------
+    offset : int, optional
+        The number of studies to skip, starting from the first.
+    size : int, optional
+        The maximum number of studies to show.
+    main_entity_type : str, optional
+        Can be ``'task'`` or ``'run'``. In case of `task`, only benchmark
+        suites are returned. In case of `run`, only studies are returned.
+    status : str, optional
+        Should be {active, in_preparation, deactivated, all}. By default active
+        studies are returned.
+    uploader : list (int), optional
+        Result filter. Will only return studies created by these users.
+
+    Returns
+    -------
+    datasets : dict of dicts
+        A mapping from dataset ID to dict.
+
+        Every dataset is represented by a dictionary containing
+        the following information:
+        - id
+        - alias (optional)
+        - name
+        - main_entity_type
+        - benchmark_suite (optional)
+        - status
+        - creator
+        - creation_date
+
+        If qualities are calculated for the dataset, some of
+        these are also returned.
+    """
+    return openml.utils._list_all(_list_studies,
+                                  offset=offset,
+                                  size=size,
+                                  main_entity_type=main_entity_type,
+                                  status=status,
+                                  uploader=uploader,
+                                  benchmark_suite=benchmark_suite)
+
+
+def _list_studies(**kwargs):
+    """
+    Perform api call to return a list of studies.
+
+    Parameters
+    ----------
+    kwargs : dict, optional
+        Legal filter operators (keys in the dict):
+        status, limit, offset, main_entity_type, uploader
+
+    Returns
+    -------
+    studies : dict of dicts
+    """
+    api_call = "study/list"
+    if kwargs is not None:
+        for operator, value in kwargs.items():
+            api_call += "/%s/%s" % (operator, value)
+    return __list_studies(api_call)
+
+
+def __list_studies(api_call):
+    xml_string = openml._api_calls._perform_api_call(api_call, 'get')
+    study_dict = xmltodict.parse(xml_string, force_list=('oml:study',))
+
+    # Minimalistic check if the XML is useful
+    assert type(study_dict['oml:study_list']['oml:study']) == list, \
+        type(study_dict['oml:study_list'])
+    assert study_dict['oml:study_list']['@xmlns:oml'] == \
+        'http://openml.org/openml', study_dict['oml:study_list']['@xmlns:oml']
+
+    studies = dict()
+    for study_ in study_dict['oml:study_list']['oml:study']:
+        # maps from xml name to a tuple of (dict name, casting fn)
+        expected_fields = {
+            'oml:id': ('id', int),
+            'oml:alias': ('alias', str),
+            'oml:main_entity_type': ('main_entity_type', str),
+            'oml:benchmark_suite': ('benchmark_suite', int),
+            'oml:name': ('name', str),
+            'oml:status': ('status', str),
+            'oml:creation_date': ('creation_date', str),
+            'oml:creator': ('creator', int),
+        }
+        study_id = int(study_['oml:id'])
+        current_study = dict()
+        for oml_field_name, (real_field_name, cast_fn) in expected_fields.items():
+            if oml_field_name in study_:
+                current_study[real_field_name] = cast_fn(study_[oml_field_name])
+        current_study['id'] = int(current_study['id'])
+        studies[study_id] = current_study
+    return studies
diff --git a/openml/study/study.py b/openml/study/study.py
@@ -83,7 +83,6 @@ def publish(self):
         file_elements = {
             'description': self._to_xml()
         }
-
         return_value = openml._api_calls._perform_api_call(
             "study/",
             'post',
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
@@ -18,13 +18,13 @@ def test_get_study(self):
         self.assertEqual(len(study.setups), 30)
 
     def test_get_tasks(self):
-        study_id = 14
+        study_id = 1
 
         study = openml.study.get_study(study_id, 'tasks')
-        self.assertGreater(len(study.tasks), 0)
+        self.assertGreater(len(study.data), 0)
+        self.assertGreaterEqual(len(study.tasks), len(study.data))
         # note that other entities are None, even though this study has
         # datasets
-        self.assertIsNone(study.data)
         self.assertIsNone(study.flows)
         self.assertIsNone(study.setups)
         self.assertIsNone(study.runs)
@@ -159,3 +159,8 @@ def test_study_attach_illegal(self):
             openml.study.attach_to_study(study_id, list(run_list_more.keys()))
         study_downloaded = openml.study.get_study(study_id)
         self.assertListEqual(study_original.runs, study_downloaded.runs)
+
+    def test_study_list(self):
+        study_list = openml.study.list_studies(status='in_preparation')
+        # might fail if server is recently resetted
+        self.assertGreater(len(study_list), 2)
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
@@ -73,7 +73,7 @@ def test_list_tasks_empty(self):
 
     def test_list_tasks_by_tag(self):
         num_basic_tasks = 100  # number is flexible, check server if fails
-        tasks = openml.tasks.list_tasks(tag='study_14')
+        tasks = openml.tasks.list_tasks(tag='OpenML100')
         self.assertGreaterEqual(len(tasks), num_basic_tasks)
         for tid in tasks:
             self._check_task(tasks[tid])

Original file line number	Diff line number	Diff line change
`@@ -83,7 +83,6 @@ def publish(self):`
`83`	`83`	`file_elements = {`
`84`	`84`	`'description': self._to_xml()`
`85`	`85`	`}`
`86`		`-`
`87`	`86`	`return_value = openml._api_calls._perform_api_call(`
`88`	`87`	`"study/",`
`89`	`88`	`'post',`