merge the final changes

Omswastik-11 · Omswastik-11 · commit 79db2131c043 · 2026-04-01T22:34:10.000+05:30
diff --git a/openml/_api/config.py b/openml/_api/config.py
diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import builtins
 from abc import abstractmethod
 from typing import TYPE_CHECKING, Any
 
diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from collections.abc import Mapping
-from typing import Any, cast
+from typing import Any
 from xml.parsers.expat import ExpatError
 
 import xmltodict
@@ -189,7 +189,8 @@ def untag(self, resource_id: int, tag: str) -> list[str]:
 
     def _parse_xml_response(self, payload: bytes | str, **kwargs: Any) -> Mapping[str, Any]:
         try:
-            return cast("Mapping[str, Any]", xmltodict.parse(payload, **kwargs))
+            parsed_response: Mapping[str, Any] = xmltodict.parse(payload, **kwargs)
+            return parsed_response
         except ExpatError:
             payload_text = (
                 payload.decode("utf-8", errors="ignore") if isinstance(payload, bytes) else payload
@@ -201,12 +202,16 @@ def _parse_xml_response(self, payload: bytes | str, **kwargs: Any) -> Mapping[st
                 raise
 
             xml_text = payload_text[xml_start:]
-            return cast("Mapping[str, Any]", xmltodict.parse(xml_text, **kwargs))
+            parsed_fallback: Mapping[str, Any] = xmltodict.parse(xml_text, **kwargs)
+            return parsed_fallback
 
     def _get_endpoint_name(self) -> str:
         if self.resource_type == ResourceType.DATASET:
             return "data"
-        return cast("str", self.resource_type.value)
+        endpoint_name = self.resource_type.value
+        if not isinstance(endpoint_name, str):
+            raise TypeError(f"Unexpected endpoint type: {type(endpoint_name)}")
+        return endpoint_name
 
     def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int:
         """
@@ -280,4 +285,7 @@ def untag(self, resource_id: int, tag: str) -> list[str]:  # noqa: ARG002
         self._not_supported(method="untag")
 
     def _get_endpoint_name(self) -> str:
-        return cast("str", self.resource_type.value)
+        endpoint_name = self.resource_type.value
+        if not isinstance(endpoint_name, str):
+            raise TypeError(f"Unexpected endpoint type: {type(endpoint_name)}")
+        return endpoint_name
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -6,7 +6,7 @@
 import warnings
 from collections import OrderedDict
 from functools import partial
-from typing import TYPE_CHECKING, Any, cast
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
 import pandas as pd
@@ -812,10 +812,6 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:  # noqa: FBT0
     ----------
     run_id : int
 
-    ignore_cache : bool
-        Whether to ignore the cache. If ``true`` this will download and overwrite the run xml
-        even if the requested run is already cached.
-
     ignore_cache : bool
         Whether to ignore the cache. If ``true`` this will download and overwrite the run xml
         even if the requested run is already cached.
@@ -825,12 +821,9 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:  # noqa: FBT0
     run : OpenMLRun
         Run corresponding to ID, fetched from the server.
     """
-    return cast(
-        "OpenMLRun",
-        openml._backend.run.get(
-            run_id,
-            reset_cache=ignore_cache,
-        ),
+    return openml._backend.run.get(
+        run_id,
+        reset_cache=ignore_cache,
     )
 
 
@@ -906,15 +899,7 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):  # type: ignore
     run_details = obtain_field(run, "oml:run_details", from_server=False)
 
     if "oml:input_data" in run:
-        input_data = run["oml:input_data"]
-        if isinstance(input_data, list):
-            input_data = input_data[0]
-
-        dataset_data = input_data["oml:dataset"]
-        if isinstance(dataset_data, list):
-            dataset_data = dataset_data[0]
-
-        dataset_id = int(dataset_data["oml:did"])
+        dataset_id = int(run["oml:input_data"]["oml:dataset"]["oml:did"])
     elif not from_server:
         dataset_id = None
     else:
@@ -1311,4 +1296,4 @@ def delete_run(run_id: int) -> bool:
     bool
         True if the deletion was successful. False otherwise.
     """
-    return cast("bool", openml._backend.run.delete(run_id))
+    return openml._backend.run.delete(run_id)
diff --git a/openml/runs/run.py b/openml/runs/run.py
@@ -1,7 +1,6 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
-import csv
 import pickle
 import time
 from collections import OrderedDict
@@ -10,7 +9,6 @@
 from typing import (
     TYPE_CHECKING,
     Any,
-    cast,
 )
 
 import arff
@@ -163,7 +161,7 @@ def predictions(self) -> pd.DataFrame:
                 arff_text = openml._api_calls._download_text_file(self.predictions_url)
                 if arff_text is None:
                     raise RuntimeError("Could not download predictions ARFF content.")
-                arff_dict = self._load_predictions_arff(arff_text)
+                arff_dict = arff.loads(arff_text)
             else:
                 raise RuntimeError("Run has no predictions.")
             self._predictions = pd.DataFrame(
@@ -172,68 +170,6 @@ def predictions(self) -> pd.DataFrame:
             )
         return self._predictions
 
-    @staticmethod
-    def _load_predictions_arff(arff_text: str) -> dict[str, Any]:
-        try:
-            return cast("dict[str, Any]", arff.loads(arff_text))
-        except arff.ArffException:
-            normalized = arff_text.lstrip("\ufeff \t\r\n")
-            relation_indexes = [
-                idx
-                for idx in [normalized.find("@relation"), normalized.find("@RELATION")]
-                if idx >= 0
-            ]
-            if relation_indexes:
-                arff_candidate = normalized[min(relation_indexes) :]
-                try:
-                    return cast("dict[str, Any]", arff.loads(arff_candidate))
-                except arff.ArffException:
-                    sanitized = OpenMLRun._sanitize_arff_text(arff_candidate)
-                    return cast("dict[str, Any]", arff.loads(sanitized))
-            raise
-
-    @staticmethod
-    def _sanitize_arff_text(arff_text: str) -> str:
-        lines = arff_text.splitlines()
-
-        in_data = False
-        attribute_count = 0
-        cleaned_lines: list[str] = []
-
-        for line in lines:
-            stripped = line.strip()
-            lowered = stripped.lower()
-
-            if not in_data:
-                if lowered.startswith("@attribute"):
-                    attribute_count += 1
-                if lowered.startswith("@data"):
-                    in_data = True
-                cleaned_lines.append(line)
-                continue
-
-            if stripped == "" or stripped.startswith("%"):
-                cleaned_lines.append(line)
-                continue
-
-            if stripped.startswith("{"):
-                cleaned_lines.append(line)
-                continue
-
-            parsed_fields = next(
-                csv.reader(
-                    [line],
-                    delimiter=",",
-                    quotechar="'",
-                    skipinitialspace=True,
-                )
-            )
-
-            if len(parsed_fields) == attribute_count:
-                cleaned_lines.append(line)
-
-        return "\n".join(cleaned_lines) + "\n"
-
     @property
     def id(self) -> int | None:
         """The ID of the run, None if not uploaded to the server yet."""
@@ -603,7 +539,7 @@ def get_metric_fn(self, sklearn_fn: Callable, kwargs: dict | None = None) -> np.
             response = openml._api_calls._download_text_file(predictions_file_url)
             if response is None:
                 raise ValueError("Could not download predictions ARFF content.")
-            predictions_arff = self._load_predictions_arff(response)
+            predictions_arff = arff.loads(response)
             # TODO: make this a stream reader
         else:
             raise ValueError(
diff --git a/tests/test_api/test_run.py b/tests/test_api/test_run.py
@@ -12,7 +12,7 @@
 from openml.runs.run import OpenMLRun
 
 
-TEST_RUN_ID = 24
+TEST_RUN_ID = 1
 
 
 @pytest.fixture
@@ -32,20 +32,9 @@ def _assert_run_shape(run: OpenMLRun) -> None:
     assert isinstance(run.task_id, int)
 
 
-def _get_any_run_id(run_v1: RunV1API) -> int:
-    try:
-        run_v1.get(run_id=TEST_RUN_ID)
-        return TEST_RUN_ID
-    except Exception:
-        runs_df = run_v1.list(limit=1, offset=0)
-        if runs_df.empty:
-            pytest.skip("No runs available on configured test server")
-        return int(runs_df.iloc[0]["run_id"])
-
-
 @pytest.mark.test_server()
 def test_run_v1_get(run_v1):
-    run = run_v1.get(run_id=_get_any_run_id(run_v1))
+    run = run_v1.get(run_id=TEST_RUN_ID)
     _assert_run_shape(run)
 
 
@@ -133,20 +122,3 @@ def test_run_v2_publish_not_supported(run_v2):
         match="RunV2API: v2 API does not support `publish` for resource `run`",
     ):
         run_v2.publish(path="run", files={"description": "<run/>"})
-
-
-@pytest.mark.test_server()
-def test_run_v1_v2_contracts(run_v1, run_v2):
-    run_id = _get_any_run_id(run_v1)
-
-    run_from_v1 = run_v1.get(run_id=run_id)
-    _assert_run_shape(run_from_v1)
-
-    with pytest.raises(OpenMLNotSupportedError, match="does not support `get`"):
-        run_v2.get(run_id=run_id)
-
-    with pytest.raises(OpenMLNotSupportedError, match="does not support `list`"):
-        run_v2.list(limit=5, offset=0)
-
-    with pytest.raises(OpenMLNotSupportedError, match="does not support `publish`"):
-        run_v2.publish(path="run", files={"description": "<run/>"})
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
@@ -1025,6 +1025,7 @@ def _test_local_evaluations(self, run):
                 assert alt_scores[idx] <= 1
 
     @pytest.mark.sklearn()
+    @pytest.mark.skip(reason="https://github.com/openml/openml-python/issues/1586")
     @pytest.mark.test_server()
     def test_local_run_swapped_parameter_order_model(self):
         clf = DecisionTreeClassifier()
@@ -1074,6 +1075,7 @@ def test_local_run_swapped_parameter_order_flow(self):
         Version(sklearn.__version__) < Version("0.20"),
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
     )
+    @pytest.mark.skip(reason="https://github.com/openml/openml-python/issues/1586")
     @pytest.mark.test_server()
     def test_local_run_metric_score(self):
         # construct sci-kit learn classifier