openml
diff --git a/‎.DS_Store‎
8 KB b/‎.DS_Store‎
8 KB
diff --git a/‎openml/runs/functions.py‎
Lines changed: 22 additions & 39 deletions b/‎openml/runs/functions.py‎
Lines changed: 22 additions & 39 deletions
diff --git a/‎tests/files/misc/features_with_whitespaces.xml.pkl‎
253 Bytes b/‎tests/files/misc/features_with_whitespaces.xml.pkl‎
253 Bytes
@@ -282,7 +282,9 @@ def run_flow_on_task(  # noqa: C901, PLR0912, PLR0915, PLR0913
                 setup_id = setup_exists(flow_from_server)
                 ids = run_exists(task.task_id, setup_id)
                 if ids:
-                    error_message = "One or more runs of this setup were already performed on the task."
+                    error_message = (
+                        "One or more runs of this setup were already performed on the task."
+                    )
                     raise OpenMLRunsExistError(ids, error_message)
         else:
             # Flow does not exist on server and we do not want to upload it.
@@ -512,15 +514,11 @@ def _run_task_get_arffcontent(  # noqa: PLR0915, PLR0912, C901
     # this information is multiple times overwritten, but due to the ordering
     # of tne loops, eventually it contains the information based on the full
     # dataset size
-    user_defined_measures_per_fold = (
-        OrderedDict()
-    )  # type: 'OrderedDict[str, OrderedDict]'
+    user_defined_measures_per_fold = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
     # stores sample-based evaluation measures (sublevel of fold-based)
     # will also be filled on a non sample-based task, but the information
     # is the same as the fold-based measures, and disregarded in that case
-    user_defined_measures_per_sample = (
-        OrderedDict()
-    )  # type: 'OrderedDict[str, OrderedDict]'
+    user_defined_measures_per_sample = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
 
     # TODO use different iterator to only provide a single iterator (less
     # methods, less maintenance, less confusion)
@@ -614,11 +612,7 @@ def _calculate_local_measure(  # type: ignore
                             if isinstance(test_y[i], (int, np.integer))
                             else test_y[i]
                         )
-                    pred_prob = (
-                        proba_y.iloc[i]
-                        if isinstance(proba_y, pd.DataFrame)
-                        else proba_y[i]
-                    )
+                    pred_prob = proba_y.iloc[i] if isinstance(proba_y, pd.DataFrame) else proba_y[i]
 
                     arff_line = format_prediction(
                         task=task,
@@ -681,13 +675,11 @@ def _calculate_local_measure(  # type: ignore
             if rep_no not in user_defined_measures_per_sample[measure]:
                 user_defined_measures_per_sample[measure][rep_no] = OrderedDict()
             if fold_no not in user_defined_measures_per_sample[measure][rep_no]:
-                user_defined_measures_per_sample[measure][rep_no][
-                    fold_no
-                ] = OrderedDict()
+                user_defined_measures_per_sample[measure][rep_no][fold_no] = OrderedDict()
 
-            user_defined_measures_per_fold[measure][rep_no][fold_no] = (
-                user_defined_measures_fold[measure]
-            )
+            user_defined_measures_per_fold[measure][rep_no][fold_no] = user_defined_measures_fold[
+                measure
+            ]
             user_defined_measures_per_sample[measure][rep_no][fold_no][sample_no] = (
                 user_defined_measures_fold[measure]
             )
@@ -843,9 +835,7 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:  # noqa: FBT0
     run : OpenMLRun
         Run corresponding to ID, fetched from the server.
     """
-    run_dir = Path(
-        openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id)
-    )
+    run_dir = Path(openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id))
     run_file = run_dir / "description.xml"
 
     run_dir.mkdir(parents=True, exist_ok=True)
@@ -864,9 +854,10 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:  # noqa: FBT0
     return _create_run_from_xml(run_xml)
 
 
-def _create_run_from_xml(
-    xml: str, from_server: bool = True
-) -> OpenMLRun:  # noqa: PLR0915, PLR0912, C901, FBT002
+def _create_run_from_xml(  # noqa: PLR0915, PLR0912, C901
+    xml: str,
+    from_server: bool = True,  # noqa: FBT002
+) -> OpenMLRun:
     """Create a run object from xml returned from server.
 
     Parameters
@@ -896,13 +887,11 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):  # type: ignore
         if not from_server:
             return None
 
-        raise AttributeError(
-            "Run XML does not contain required (server) field: ", fieldname
-        )
+        raise AttributeError("Run XML does not contain required (server) field: ", fieldname)
 
-    run = xmltodict.parse(
-        xml, force_list=["oml:file", "oml:evaluation", "oml:parameter_setting"]
-    )["oml:run"]
+    run = xmltodict.parse(xml, force_list=["oml:file", "oml:evaluation", "oml:parameter_setting"])[
+        "oml:run"
+    ]
     run_id = obtain_field(run, "oml:run_id", from_server, cast=int)
     uploader = obtain_field(run, "oml:uploader", from_server, cast=int)
     uploader_name = obtain_field(run, "oml:uploader_name", from_server)
@@ -1057,9 +1046,7 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):  # type: ignore
 
 def _get_cached_run(run_id: int) -> OpenMLRun:
     """Load a run from the cache."""
-    run_cache_dir = openml.utils._create_cache_directory_for_id(
-        RUNS_CACHE_DIR_NAME, run_id
-    )
+    run_cache_dir = openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id)
     run_file = run_cache_dir / "description.xml"
     try:
         with run_file.open(encoding="utf8") as fh:
@@ -1229,9 +1216,7 @@ def __list_runs(api_call: str) -> pd.DataFrame:
     runs_dict = xmltodict.parse(xml_string, force_list=("oml:run",))
     # Minimalistic check if the XML is useful
     if "oml:runs" not in runs_dict:
-        raise ValueError(
-            f'Error in return XML, does not contain "oml:runs": {runs_dict}'
-        )
+        raise ValueError(f'Error in return XML, does not contain "oml:runs": {runs_dict}')
 
     if "@xmlns:oml" not in runs_dict["oml:runs"]:
         raise ValueError(
@@ -1245,9 +1230,7 @@ def __list_runs(api_call: str) -> pd.DataFrame:
             f'"http://openml.org/openml": {runs_dict}',
         )
 
-    assert isinstance(runs_dict["oml:runs"]["oml:run"], list), type(
-        runs_dict["oml:runs"]
-    )
+    assert isinstance(runs_dict["oml:runs"]["oml:run"], list), type(runs_dict["oml:runs"])
 
     runs = {
         int(r["oml:run_id"]): {