@@ -278,3 +278,105 @@ def detach_from_study(study_id, entity_ids):
278278 post_variables )
279279 result = xmltodict .parse (result_xml )['oml:study_detach' ]
280280 return int (result ['oml:linked_entities' ])
281+
282+
283+ def list_studies (offset = None , size = None , main_entity_type = None , status = None ,
284+ uploader = None , benchmark_suite = None ):
285+ """
286+ Return a list of all studies which are on OpenML.
287+
288+ Parameters
289+ ----------
290+ offset : int, optional
291+ The number of studies to skip, starting from the first.
292+ size : int, optional
293+ The maximum number of studies to show.
294+ main_entity_type : str, optional
295+ Can be ``'task'`` or ``'run'``. In case of `task`, only benchmark
296+ suites are returned. In case of `run`, only studies are returned.
297+ status : str, optional
298+ Should be {active, in_preparation, deactivated, all}. By default active
299+ studies are returned.
300+ uploader : list (int), optional
301+ Result filter. Will only return studies created by these users.
302+
303+ Returns
304+ -------
305+ datasets : dict of dicts
306+ A mapping from dataset ID to dict.
307+
308+ Every dataset is represented by a dictionary containing
309+ the following information:
310+ - id
311+ - alias (optional)
312+ - name
313+ - main_entity_type
314+ - benchmark_suite (optional)
315+ - status
316+ - creator
317+ - creation_date
318+
319+ If qualities are calculated for the dataset, some of
320+ these are also returned.
321+ """
322+ return openml .utils ._list_all (_list_studies ,
323+ offset = offset ,
324+ size = size ,
325+ main_entity_type = main_entity_type ,
326+ status = status ,
327+ uploader = uploader ,
328+ benchmark_suite = benchmark_suite )
329+
330+
331+ def _list_studies (** kwargs ):
332+ """
333+ Perform api call to return a list of studies.
334+
335+ Parameters
336+ ----------
337+ kwargs : dict, optional
338+ Legal filter operators (keys in the dict):
339+ status, limit, offset, main_entity_type, uploader
340+
341+ Returns
342+ -------
343+ studies : dict of dicts
344+ """
345+ api_call = "study/list"
346+ if kwargs is not None :
347+ for operator , value in kwargs .items ():
348+ api_call += "/%s/%s" % (operator , value )
349+ return __list_studies (api_call )
350+
351+
352+ def __list_studies (api_call ):
353+ xml_string = openml ._api_calls ._perform_api_call (api_call , 'get' )
354+ study_dict = xmltodict .parse (xml_string , force_list = ('oml:study' ,))
355+
356+ # Minimalistic check if the XML is useful
357+ assert type (study_dict ['oml:study_list' ]['oml:study' ]) == list , \
358+ type (study_dict ['oml:study_list' ])
359+ assert study_dict ['oml:study_list' ]['@xmlns:oml' ] == \
360+ 'http://openml.org/openml' , study_dict ['oml:study_list' ]['@xmlns:oml' ]
361+
362+ studies = dict ()
363+ for study_ in study_dict ['oml:study_list' ]['oml:study' ]:
364+ # maps from xml name to a tuple of (dict name, casting fn)
365+ expected_fields = {
366+ 'oml:id' : ('id' , int ),
367+ 'oml:alias' : ('alias' , str ),
368+ 'oml:main_entity_type' : ('main_entity_type' , str ),
369+ 'oml:benchmark_suite' : ('benchmark_suite' , int ),
370+ 'oml:name' : ('name' , str ),
371+ 'oml:status' : ('status' , str ),
372+ 'oml:creation_date' : ('creation_date' , str ),
373+ 'oml:creator' : ('creator' , int ),
374+ }
375+ study_id = int (study_ ['oml:id' ])
376+ current_study = dict ()
377+ for oml_field_name , (real_field_name , cast_fn ) in expected_fields .items ():
378+ if oml_field_name in study_ :
379+ current_study [real_field_name ] = cast_fn (study_ [oml_field_name ])
380+ current_study ['id' ] = int (current_study ['id' ])
381+ studies [study_id ] = current_study
382+ return studies
0 commit comments