88
99from build .lib .openml .exceptions import PyOpenMLError
1010from .. import config
11- from ..flows import sklearn_to_flow
11+ from ..flows import sklearn_to_flow , get_flow
12+ from ..setups import setup_exists
1213from ..exceptions import OpenMLCacheException
1314from ..util import URLError
1415from ..tasks .functions import _create_task_from_xml
@@ -42,6 +43,20 @@ def run_task(task, model):
4243 # TODO move this into its onwn module. While it somehow belongs here, it
4344 # adds quite a lot of functionality which is better suited in other places!
4445 # TODO why doesn't this accept a flow as input? - this would make this more flexible!
46+ flow = sklearn_to_flow (model )
47+ flow_id = flow ._ensure_flow_exists ()
48+ if flow_id < 0 :
49+ print ("No flow" )
50+ return 0 , 2
51+ config .logger .info (flow_id )
52+
53+ if config .avoid_duplicate_runs :
54+ # TODO: would be nice if flow._ensure_flow_exists already handled this
55+ flow = get_flow (flow_id )
56+ setup_id = setup_exists (flow , model )
57+ ids = _run_exists (task .task_id , setup_id )
58+ if ids :
59+ raise PyOpenMLError ("Run already exists in server. Run id(s): %s" % str (ids ))
4560
4661 dataset = task .get_dataset ()
4762 X , Y = dataset .get_data (target = task .target_name )
@@ -52,27 +67,34 @@ def run_task(task, model):
5267 'only works for tasks with class labels.' )
5368
5469 # execute the run
55- run = OpenMLRun (task_id = task .task_id , flow_id = None , dataset_id = dataset .dataset_id , model = model )
70+ run = OpenMLRun (task_id = task .task_id , flow_id = flow_id , dataset_id = dataset .dataset_id , model = model )
5671
5772 try :
5873 run .data_content , run .trace_content = _run_task_get_arffcontent (model , task , class_labels )
5974 except PyOpenMLError as message :
6075 run .error_message = str (message )
6176 warnings .warn ("Run terminated with error: %s" % run .error_message )
6277
63- # now generate the flow
64- flow = sklearn_to_flow (model )
65- flow_id = flow ._ensure_flow_exists ()
66- if flow_id < 0 :
67- print ("No flow" )
68- return 0 , 2
69- config .logger .info (flow_id )
70-
71- # attach the flow to the run
72- run .flow_id = flow_id
73-
7478 return run
7579
80+ def _run_exists (task_id , setup_id ):
81+ '''
82+ Checks whether a task/setup combination is already present on the server.
83+
84+ :param task_id: int
85+ :param setup_id: int
86+ :return: List of run ids iff these already exists on the server, False otherwise
87+ '''
88+ if setup_id <= 0 :
89+ # openml setups are in range 1-inf
90+ return False
91+
92+ result = list_runs (task = [task_id ], setup = [setup_id ])
93+ if len (result ) > 0 :
94+ return set (result .keys ())
95+ else :
96+ return False
97+
7698
7799def _prediction_to_row (rep_no , fold_no , row_id , correct_label , predicted_label , predicted_probabilities , class_labels , model_classes_mapping ):
78100 """Complicated util function that turns probability estimates of a classifier for a given instance into the right arff format to upload to openml.
@@ -329,7 +351,7 @@ def _get_cached_run(run_id):
329351 "cached" % run_id )
330352
331353
332- def list_runs (offset = None , size = None , id = None , task = None ,
354+ def list_runs (offset = None , size = None , id = None , task = None , setup = None ,
333355 flow = None , uploader = None , tag = None ):
334356 """List all runs matching all of the given filters.
335357
@@ -346,6 +368,8 @@ def list_runs(offset=None, size=None, id=None, task=None,
346368
347369 task : list, optional
348370
371+ setup: list, optional
372+
349373 flow : list, optional
350374
351375 uploader : list, optional
@@ -367,6 +391,8 @@ def list_runs(offset=None, size=None, id=None, task=None,
367391 api_call += "/run/%s" % ',' .join ([str (int (i )) for i in id ])
368392 if task is not None :
369393 api_call += "/task/%s" % ',' .join ([str (int (i )) for i in task ])
394+ if setup is not None :
395+ api_call += "/setup/%s" % ',' .join ([str (int (i )) for i in setup ])
370396 if flow is not None :
371397 api_call += "/flow/%s" % ',' .join ([str (int (i )) for i in flow ])
372398 if uploader is not None :
0 commit comments