@@ -888,35 +888,56 @@ def _format_external_version(
888888 ) -> str :
889889 return '%s==%s' % (model_package_name , model_package_version_number )
890890
891- def _check_n_jobs (self , model : Any ) -> bool :
892- """Returns True if the parameter settings of model are chosen s.t. the model
893- will run on a single core (if so, openml-python can measure runtimes)"""
894-
895- def check (param_grid , restricted_parameter_name , legal_values ):
896- if isinstance (param_grid , dict ):
897- for param , value in param_grid .items ():
898- # n_jobs is scikitlearn parameter for paralizing jobs
899- if param .split ('__' )[- 1 ] == restricted_parameter_name :
900- # 0 = illegal value (?), 1 / None = use one core,
901- # n = use n cores,
902- # -1 = use all available cores -> this makes it hard to
903- # measure runtime in a fair way
904- if legal_values is None or value not in legal_values :
905- return False
906- return True
907- elif isinstance (param_grid , list ):
908- return all (
909- check (sub_grid , restricted_parameter_name , legal_values )
910- for sub_grid in param_grid
911- )
891+ @staticmethod
892+ def _check_parameter_value_recursive (param_grid : Union [Dict , List [Dict ]], parameter_name : str , legal_values : Optional [List ]):
893+ """
894+ Checks within a flow (recursively) whether a given hyperparameter complies to one of the values presented in a
895+ grid. If the hyperparameter does not exist in the grid, True is returned.
912896
913- if not (
914- isinstance (model , sklearn .base .BaseEstimator ) or self .is_hpo_class (model )
915- ):
916- raise ValueError ('model should be BaseEstimator or BaseSearchCV' )
897+ Parameters
898+ ----------
899+ param_grid: Union[Dict, List[Dict]]
900+ Dict mapping from hyperparameter list to value, to a list of such dicts
901+
902+ parameter_name: str
903+ The hyperparameter that needs to be inspected
904+
905+ legal_values: List
906+ The values that are accepted. None if no values are legal (the presence of the hyperparameter will trigger
907+ to return False)
917908
918- # make sure that n_jobs is not in the parameter grid of optimization
919- # procedure
909+ Returns
910+ -------
911+ bool
912+ True if all occurrences of the hyperparameter only have legal values, False otherwise
913+
914+ """
915+ if isinstance (param_grid , dict ):
916+ for param , value in param_grid .items ():
917+ # n_jobs is scikitlearn parameter for paralizing jobs
918+ if param .split ('__' )[- 1 ] == parameter_name :
919+ # 0 = illegal value (?), 1 / None = use one core,
920+ # n = use n cores,
921+ # -1 = use all available cores -> this makes it hard to
922+ # measure runtime in a fair way
923+ if legal_values is None or value not in legal_values :
924+ return False
925+ return True
926+ elif isinstance (param_grid , list ):
927+ return all (
928+ SklearnExtension ._check_parameter_value_recursive (sub_grid , parameter_name , legal_values )
929+ for sub_grid in param_grid
930+ )
931+
932+ def _prevent_optimize_n_jobs (self , model ):
933+ """
934+ Ensures that HPO classess will not optimize the n_jobs hyperparameter
935+
936+ Parameters:
937+ -----------
938+ model:
939+ The model that will be fitted
940+ """
920941 if self .is_hpo_class (model ):
921942 if isinstance (model , sklearn .model_selection .GridSearchCV ):
922943 param_distributions = model .param_grid
@@ -934,12 +955,55 @@ def check(param_grid, restricted_parameter_name, legal_values):
934955 '{GridSearchCV, RandomizedSearchCV}. '
935956 'Should implement param check. ' )
936957
937- if not check (param_distributions , 'n_jobs' , None ):
958+ if not SklearnExtension . _check_parameter_value_recursive (param_distributions , 'n_jobs' , None ):
938959 raise PyOpenMLError ('openml-python should not be used to '
939960 'optimize the n_jobs parameter.' )
940961
962+ def _can_measure_cputime (self , model : Any ) -> bool :
963+ """
964+ Returns True if the parameter settings of model are chosen s.t. the model
965+ will run on a single core (if so, openml-python can measure cpu-times)
966+
967+ Parameters:
968+ -----------
969+ model:
970+ The model that will be fitted
971+
972+ Returns:
973+ --------
974+ bool:
975+ True if all n_jobs parameters will be either set to None or 1, False otherwise
976+ """
977+ if not (
978+ isinstance (model , sklearn .base .BaseEstimator ) or self .is_hpo_class (model )
979+ ):
980+ raise ValueError ('model should be BaseEstimator or BaseSearchCV' )
981+
982+ # check the parameters for n_jobs
983+ return SklearnExtension ._check_parameter_value_recursive (model .get_params (), 'n_jobs' , [1 , None ])
984+
985+ def _can_measure_wallclocktime (self , model : Any ) -> bool :
986+ """
987+ Returns True if the parameter settings of model are chosen s.t. the model
988+ will run on a preset number of cores (if so, openml-python can measure wallclock time)
989+
990+ Parameters:
991+ -----------
992+ model:
993+ The model that will be fitted
994+
995+ Returns:
996+ --------
997+ bool:
998+ True if none n_jobs parameters is set ot -1, False otherwise
999+ """
1000+ if not (
1001+ isinstance (model , sklearn .base .BaseEstimator ) or self .is_hpo_class (model )
1002+ ):
1003+ raise ValueError ('model should be BaseEstimator or BaseSearchCV' )
1004+
9411005 # check the parameters for n_jobs
942- return check (model .get_params (), 'n_jobs' , [1 , None ])
1006+ return not SklearnExtension . _check_parameter_value_recursive (model .get_params (), 'n_jobs' , [- 1 ])
9431007
9441008 ################################################################################################
9451009 # Methods for performing runs with extension modules
@@ -1112,8 +1176,11 @@ def _prediction_to_probabilities(
11121176 # but not desirable if we want to upload to OpenML).
11131177
11141178 model_copy = sklearn .base .clone (model , safe = True )
1179+ # security check
1180+ self ._prevent_optimize_n_jobs (model_copy )
11151181 # Runtime can be measured if the model is run sequentially
1116- can_measure_runtime = self ._check_n_jobs (model_copy )
1182+ can_measure_cputime = self ._can_measure_cputime (model_copy )
1183+ can_measure_wallclocktime = self ._can_measure_wallclocktime (model_copy )
11171184
11181185 train_indices , test_indices = task .get_train_test_split_indices (
11191186 repeat = rep_no , fold = fold_no , sample = sample_no )
@@ -1133,17 +1200,29 @@ def _prediction_to_probabilities(
11331200
11341201 try :
11351202 # for measuring runtime. Only available since Python 3.3
1136- if can_measure_runtime :
1137- modelfit_starttime = time .process_time ()
1203+ modelfit_start_cputime = None
1204+ modelfit_duration_cputime = None
1205+ modelpredict_start_cputime = None
1206+
1207+ modelfit_start_walltime = None
1208+ modelfit_duration_walltime = None
1209+ modelpredict_start_walltime = None
1210+ if can_measure_cputime :
1211+ modelfit_start_cputime = time .process_time ()
1212+ if can_measure_wallclocktime :
1213+ modelfit_start_walltime = time .time ()
11381214
11391215 if isinstance (task , OpenMLSupervisedTask ):
11401216 model_copy .fit (train_x , train_y )
11411217 elif isinstance (task , OpenMLClusteringTask ):
11421218 model_copy .fit (train_x )
11431219
1144- if can_measure_runtime :
1145- modelfit_duration = (time .process_time () - modelfit_starttime ) * 1000
1146- user_defined_measures ['usercpu_time_millis_training' ] = modelfit_duration
1220+ if can_measure_cputime :
1221+ modelfit_duration_cputime = (time .process_time () - modelfit_start_cputime ) * 1000
1222+ user_defined_measures ['usercpu_time_millis_training' ] = modelfit_duration_cputime
1223+ elif can_measure_wallclocktime :
1224+ modelfit_duration_walltime = (time .time () - modelfit_start_walltime ) * 1000
1225+ user_defined_measures ['wall_clock_time_millis_training' ] = modelfit_duration_walltime
11471226
11481227 except AttributeError as e :
11491228 # typically happens when training a regressor on classification task
@@ -1169,17 +1248,24 @@ def _prediction_to_probabilities(
11691248 else :
11701249 model_classes = used_estimator .classes_
11711250
1172- if can_measure_runtime :
1173- modelpredict_starttime = time .process_time ()
1251+ if can_measure_cputime :
1252+ modelpredict_start_cputime = time .process_time ()
1253+ if can_measure_wallclocktime :
1254+ modelpredict_start_walltime = time .time ()
11741255
11751256 # In supervised learning this returns the predictions for Y, in clustering
11761257 # it returns the clusters
11771258 pred_y = model_copy .predict (test_x )
11781259
1179- if can_measure_runtime :
1180- modelpredict_duration = (time .process_time () - modelpredict_starttime ) * 1000
1181- user_defined_measures ['usercpu_time_millis_testing' ] = modelpredict_duration
1182- user_defined_measures ['usercpu_time_millis' ] = modelfit_duration + modelpredict_duration
1260+ if can_measure_cputime :
1261+ modelpredict_duration_cputime = (time .process_time () - modelpredict_start_cputime ) * 1000
1262+ user_defined_measures ['usercpu_time_millis_testing' ] = modelpredict_duration_cputime
1263+ user_defined_measures ['usercpu_time_millis' ] = modelfit_duration_cputime + modelpredict_duration_cputime
1264+ if can_measure_wallclocktime :
1265+ modelpredict_duration_walltime = (time .time () - modelpredict_start_walltime ) * 1000
1266+ user_defined_measures ['wall_clock_time_millis_testing' ] = modelpredict_duration_walltime
1267+ user_defined_measures ['wall_clock_time_millis' ] = modelfit_duration_walltime + \
1268+ modelpredict_duration_walltime
11831269
11841270 # add client-side calculated metrics. These is used on the server as
11851271 # consistency check, only useful for supervised tasks
0 commit comments