Skip to content

Commit ab8a966

Browse files
authored
Merge pull request #672 from openml/add_wallclock_measurements
Add wallclock measurements
2 parents 2a468f9 + 3c39672 commit ab8a966

4 files changed

Lines changed: 154 additions & 51 deletions

File tree

openml/extensions/sklearn/extension.py

Lines changed: 129 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -888,35 +888,53 @@ def _format_external_version(
888888
) -> str:
889889
return '%s==%s' % (model_package_name, model_package_version_number)
890890

891-
def _check_n_jobs(self, model: Any) -> bool:
892-
"""Returns True if the parameter settings of model are chosen s.t. the model
893-
will run on a single core (if so, openml-python can measure runtimes)"""
894-
895-
def check(param_grid, restricted_parameter_name, legal_values):
896-
if isinstance(param_grid, dict):
897-
for param, value in param_grid.items():
898-
# n_jobs is scikitlearn parameter for paralizing jobs
899-
if param.split('__')[-1] == restricted_parameter_name:
900-
# 0 = illegal value (?), 1 / None = use one core,
901-
# n = use n cores,
902-
# -1 = use all available cores -> this makes it hard to
903-
# measure runtime in a fair way
904-
if legal_values is None or value not in legal_values:
905-
return False
906-
return True
907-
elif isinstance(param_grid, list):
908-
return all(
909-
check(sub_grid, restricted_parameter_name, legal_values)
910-
for sub_grid in param_grid
911-
)
891+
@staticmethod
892+
def _get_parameter_values_recursive(param_grid: Union[Dict, List[Dict]],
893+
parameter_name: str) -> List[Any]:
894+
"""
895+
Returns a list of values for a given hyperparameter, encountered
896+
recursively throughout the flow. (e.g., n_jobs can be defined
897+
for various flows)
912898
913-
if not (
914-
isinstance(model, sklearn.base.BaseEstimator) or self.is_hpo_class(model)
915-
):
916-
raise ValueError('model should be BaseEstimator or BaseSearchCV')
899+
Parameters
900+
----------
901+
param_grid: Union[Dict, List[Dict]]
902+
Dict mapping from hyperparameter list to value, to a list of
903+
such dicts
904+
905+
parameter_name: str
906+
The hyperparameter that needs to be inspected
907+
908+
Returns
909+
-------
910+
List
911+
A list of all values of hyperparameters with this name
912+
"""
913+
if isinstance(param_grid, dict):
914+
result = list()
915+
for param, value in param_grid.items():
916+
# n_jobs is scikit-learn parameter for parallelizing jobs
917+
if param.split('__')[-1] == parameter_name:
918+
result.append(value)
919+
return result
920+
elif isinstance(param_grid, list):
921+
result = list()
922+
for sub_grid in param_grid:
923+
result.extend(SklearnExtension._get_parameter_values_recursive(sub_grid,
924+
parameter_name))
925+
return result
926+
else:
927+
raise ValueError('Param_grid should either be a dict or list of dicts')
917928

918-
# make sure that n_jobs is not in the parameter grid of optimization
919-
# procedure
929+
def _prevent_optimize_n_jobs(self, model):
930+
"""
931+
Ensures that HPO classes will not optimize the n_jobs hyperparameter
932+
933+
Parameters:
934+
-----------
935+
model:
936+
The model that will be fitted
937+
"""
920938
if self.is_hpo_class(model):
921939
if isinstance(model, sklearn.model_selection.GridSearchCV):
922940
param_distributions = model.param_grid
@@ -933,13 +951,62 @@ def check(param_grid, restricted_parameter_name, legal_values):
933951
print('Warning! Using subclass BaseSearchCV other than '
934952
'{GridSearchCV, RandomizedSearchCV}. '
935953
'Should implement param check. ')
936-
937-
if not check(param_distributions, 'n_jobs', None):
954+
n_jobs_vals = SklearnExtension._get_parameter_values_recursive(param_distributions,
955+
'n_jobs')
956+
if len(n_jobs_vals) > 0:
938957
raise PyOpenMLError('openml-python should not be used to '
939958
'optimize the n_jobs parameter.')
940959

960+
def _can_measure_cputime(self, model: Any) -> bool:
961+
"""
962+
Returns True if the parameter settings of model are chosen s.t. the model
963+
will run on a single core (if so, openml-python can measure cpu-times)
964+
965+
Parameters:
966+
-----------
967+
model:
968+
The model that will be fitted
969+
970+
Returns:
971+
--------
972+
bool:
973+
True if all n_jobs parameters will be either set to None or 1, False otherwise
974+
"""
975+
if not (
976+
isinstance(model, sklearn.base.BaseEstimator) or self.is_hpo_class(model)
977+
):
978+
raise ValueError('model should be BaseEstimator or BaseSearchCV')
979+
941980
# check the parameters for n_jobs
942-
return check(model.get_params(), 'n_jobs', [1, None])
981+
n_jobs_vals = SklearnExtension._get_parameter_values_recursive(model.get_params(), 'n_jobs')
982+
for val in n_jobs_vals:
983+
if val is not None and val != 1:
984+
return False
985+
return True
986+
987+
def _can_measure_wallclocktime(self, model: Any) -> bool:
988+
"""
989+
Returns True if the parameter settings of model are chosen s.t. the model
990+
will run on a preset number of cores (if so, openml-python can measure wall-clock time)
991+
992+
Parameters:
993+
-----------
994+
model:
995+
The model that will be fitted
996+
997+
Returns:
998+
--------
999+
bool:
1000+
True if no n_jobs parameters is set to -1, False otherwise
1001+
"""
1002+
if not (
1003+
isinstance(model, sklearn.base.BaseEstimator) or self.is_hpo_class(model)
1004+
):
1005+
raise ValueError('model should be BaseEstimator or BaseSearchCV')
1006+
1007+
# check the parameters for n_jobs
1008+
n_jobs_vals = SklearnExtension._get_parameter_values_recursive(model.get_params(), 'n_jobs')
1009+
return -1 not in n_jobs_vals
9431010

9441011
################################################################################################
9451012
# Methods for performing runs with extension modules
@@ -1037,6 +1104,12 @@ def _run_model_on_fold(
10371104
"""Run a model on a repeat,fold,subsample triplet of the task and return prediction
10381105
information.
10391106
1107+
Furthermore, it will measure run time measures in case multi-core behaviour allows this.
1108+
* exact user cpu time will be measured if the number of cores is set (recursive throughout
1109+
the model) exactly to 1
1110+
* wall clock time will be measured if the number of cores is set (recursive throughout the
1111+
model) to any given number (but not when it is set to -1)
1112+
10401113
Returns the data that is necessary to construct the OpenML Run object. Is used by
10411114
run_task_get_arff_content. Do not use this function unless you know what you are doing.
10421115
@@ -1112,8 +1185,11 @@ def _prediction_to_probabilities(
11121185
# but not desirable if we want to upload to OpenML).
11131186

11141187
model_copy = sklearn.base.clone(model, safe=True)
1188+
# sanity check: prohibit users from optimizing n_jobs
1189+
self._prevent_optimize_n_jobs(model_copy)
11151190
# Runtime can be measured if the model is run sequentially
1116-
can_measure_runtime = self._check_n_jobs(model_copy)
1191+
can_measure_cputime = self._can_measure_cputime(model_copy)
1192+
can_measure_wallclocktime = self._can_measure_wallclocktime(model_copy)
11171193

11181194
train_indices, test_indices = task.get_train_test_split_indices(
11191195
repeat=rep_no, fold=fold_no, sample=sample_no)
@@ -1133,17 +1209,21 @@ def _prediction_to_probabilities(
11331209

11341210
try:
11351211
# for measuring runtime. Only available since Python 3.3
1136-
if can_measure_runtime:
1137-
modelfit_starttime = time.process_time()
1212+
modelfit_start_cputime = time.process_time()
1213+
modelfit_start_walltime = time.time()
11381214

11391215
if isinstance(task, OpenMLSupervisedTask):
11401216
model_copy.fit(train_x, train_y)
11411217
elif isinstance(task, OpenMLClusteringTask):
11421218
model_copy.fit(train_x)
11431219

1144-
if can_measure_runtime:
1145-
modelfit_duration = (time.process_time() - modelfit_starttime) * 1000
1146-
user_defined_measures['usercpu_time_millis_training'] = modelfit_duration
1220+
modelfit_dur_cputime = (time.process_time() - modelfit_start_cputime) * 1000
1221+
if can_measure_cputime:
1222+
user_defined_measures['usercpu_time_millis_training'] = modelfit_dur_cputime
1223+
1224+
modelfit_dur_walltime = (time.time() - modelfit_start_walltime) * 1000
1225+
if can_measure_wallclocktime:
1226+
user_defined_measures['wall_clock_time_millis_training'] = modelfit_dur_walltime
11471227

11481228
except AttributeError as e:
11491229
# typically happens when training a regressor on classification task
@@ -1169,17 +1249,24 @@ def _prediction_to_probabilities(
11691249
else:
11701250
model_classes = used_estimator.classes_
11711251

1172-
if can_measure_runtime:
1173-
modelpredict_starttime = time.process_time()
1252+
modelpredict_start_cputime = time.process_time()
1253+
modelpredict_start_walltime = time.time()
11741254

11751255
# In supervised learning this returns the predictions for Y, in clustering
11761256
# it returns the clusters
11771257
pred_y = model_copy.predict(test_x)
11781258

1179-
if can_measure_runtime:
1180-
modelpredict_duration = (time.process_time() - modelpredict_starttime) * 1000
1181-
user_defined_measures['usercpu_time_millis_testing'] = modelpredict_duration
1182-
user_defined_measures['usercpu_time_millis'] = modelfit_duration + modelpredict_duration
1259+
if can_measure_cputime:
1260+
modelpredict_duration_cputime = (time.process_time()
1261+
- modelpredict_start_cputime) * 1000
1262+
user_defined_measures['usercpu_time_millis_testing'] = modelpredict_duration_cputime
1263+
user_defined_measures['usercpu_time_millis'] = (modelfit_dur_cputime
1264+
+ modelpredict_duration_cputime)
1265+
if can_measure_wallclocktime:
1266+
modelpredict_duration_walltime = (time.time() - modelpredict_start_walltime) * 1000
1267+
user_defined_measures['wall_clock_time_millis_testing'] = modelpredict_duration_walltime
1268+
user_defined_measures['wall_clock_time_millis'] = (modelfit_dur_walltime
1269+
+ modelpredict_duration_walltime)
11831270

11841271
# add client-side calculated metrics. These is used on the server as
11851272
# consistency check, only useful for supervised tasks

openml/testing.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,10 +158,14 @@ def _check_fold_timing_evaluations(
158158
# a dict mapping from openml measure to a tuple with the minimum and
159159
# maximum allowed value
160160
check_measures = {
161+
# should take at least one millisecond (?)
161162
'usercpu_time_millis_testing': (0, max_time_allowed),
162163
'usercpu_time_millis_training': (0, max_time_allowed),
163-
# should take at least one millisecond (?)
164-
'usercpu_time_millis': (0, max_time_allowed)}
164+
'usercpu_time_millis': (0, max_time_allowed),
165+
'wall_clock_time_millis_training': (0, max_time_allowed),
166+
'wall_clock_time_millis_testing': (0, max_time_allowed),
167+
'wall_clock_time_millis': (0, max_time_allowed),
168+
}
165169

166170
if task_type in (TaskTypeEnum.SUPERVISED_CLASSIFICATION, TaskTypeEnum.LEARNING_CURVE):
167171
check_measures['predictive_accuracy'] = (0, 1.)

tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -955,7 +955,11 @@ def test_paralizable_check(self):
955955
sklearn.model_selection.GridSearchCV(singlecore_bagging,
956956
legal_param_dist),
957957
sklearn.model_selection.GridSearchCV(multicore_bagging,
958-
legal_param_dist)
958+
legal_param_dist),
959+
sklearn.ensemble.BaggingClassifier(
960+
n_jobs=-1,
961+
base_estimator=sklearn.ensemble.RandomForestClassifier(n_jobs=5)
962+
)
959963
]
960964
illegal_models = [
961965
sklearn.model_selection.GridSearchCV(singlecore_bagging,
@@ -964,14 +968,18 @@ def test_paralizable_check(self):
964968
illegal_param_dist)
965969
]
966970

967-
answers = [True, False, False, True, False, False, True, False]
971+
can_measure_cputime_answers = [True, False, False, True, False, False, True, False, False]
972+
can_measure_walltime_answers = [True, True, False, True, True, False, True, True, False]
968973

969-
for model, expected_answer in zip(legal_models, answers):
970-
self.assertEqual(self.extension._check_n_jobs(model), expected_answer)
974+
for model, allowed_cputime, allowed_walltime in zip(legal_models,
975+
can_measure_cputime_answers,
976+
can_measure_walltime_answers):
977+
self.assertEqual(self.extension._can_measure_cputime(model), allowed_cputime)
978+
self.assertEqual(self.extension._can_measure_wallclocktime(model), allowed_walltime)
971979

972980
for model in illegal_models:
973981
with self.assertRaises(PyOpenMLError):
974-
self.extension._check_n_jobs(model)
982+
self.extension._prevent_optimize_n_jobs(model)
975983

976984
def test__get_fn_arguments_with_defaults(self):
977985
if LooseVersion(sklearn.__version__) < "0.19":

tests/test_runs/test_run_functions.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,11 +286,15 @@ def _check_sample_evaluations(self, sample_evaluations, num_repeats,
286286
# a dict mapping from openml measure to a tuple with the minimum and
287287
# maximum allowed value
288288
check_measures = {
289+
# should take at least one millisecond (?)
289290
'usercpu_time_millis_testing': (0, max_time_allowed),
290291
'usercpu_time_millis_training': (0, max_time_allowed),
291-
# should take at least one millisecond (?)
292292
'usercpu_time_millis': (0, max_time_allowed),
293-
'predictive_accuracy': (0, 1)}
293+
'wall_clock_time_millis_training': (0, max_time_allowed),
294+
'wall_clock_time_millis_testing': (0, max_time_allowed),
295+
'wall_clock_time_millis': (0, max_time_allowed),
296+
'predictive_accuracy': (0, 1)
297+
}
294298

295299
self.assertIsInstance(sample_evaluations, dict)
296300
if sys.version_info[:2] >= (3, 3):

0 commit comments

Comments
 (0)