Skip to content

Commit b821c82

Browse files
committed
minio path method in config
1 parent b499d7c commit b821c82

4 files changed

Lines changed: 24 additions & 20 deletions

File tree

openml/testing.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import requests
1616

1717
import openml
18-
from openml._api import HTTPClient
18+
from openml._api import HTTPClient, MinIOClient
1919
from openml.enums import APIVersion
2020
from openml.exceptions import OpenMLServerException
2121
from openml.tasks import TaskType
@@ -56,6 +56,7 @@ class TestBase(unittest.TestCase):
5656
logger.setLevel(logging.DEBUG)
5757

5858
http_client: HTTPClient = HTTPClient(api_version=APIVersion.V1)
59+
minio_client = MinIOClient()
5960

6061
def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None:
6162
"""Setup variables and temporary directories.

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ def _expected_static_cache_state(root_dir: Path) -> list[Path]:
246246
_c_root_dir / "api" / "v1" / "xml" / "data" / "features" / "2" / "body.xml",
247247
_c_root_dir / "data" / "download" / "1666876" / "phpFsFYVN" / "body.arff",
248248

249-
_c_root_dir / "datasets" / "30" / "dataset_30.pq",
249+
_c_root_dir / "minio" / "dataset30" / "dataset_30.pq"
250250
])
251251

252252
return res_paths
69.3 KB
Binary file not shown.

tests/test_datasets/test_dataset_functions.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -240,29 +240,29 @@ def test_get_datasets_by_name(self):
240240
dids = ["anneal", "kr-vs-kp"]
241241
datasets = openml.datasets.get_datasets(dids)
242242
assert len(datasets) == 2
243-
_assert_datasets_retrieved_successfully([1, 2])
243+
_assert_datasets_retrieved_successfully(datasets)
244244

245245
@pytest.mark.test_server()
246246
def test_get_datasets_by_mixed(self):
247247
# did 1 and 2 on the test server:
248248
dids = ["anneal", 2]
249249
datasets = openml.datasets.get_datasets(dids)
250250
assert len(datasets) == 2
251-
_assert_datasets_retrieved_successfully([1, 2])
251+
_assert_datasets_retrieved_successfully(datasets)
252252

253253
@pytest.mark.test_server()
254254
def test_get_datasets(self):
255255
dids = [1, 2]
256256
datasets = openml.datasets.get_datasets(dids)
257257
assert len(datasets) == 2
258-
_assert_datasets_retrieved_successfully([1, 2])
258+
_assert_datasets_retrieved_successfully(datasets)
259259

260260
@pytest.mark.test_server()
261261
def test_get_dataset_by_name(self):
262262
dataset = openml.datasets.get_dataset("anneal")
263263
assert type(dataset) == OpenMLDataset
264264
assert dataset.dataset_id == 1
265-
_assert_datasets_retrieved_successfully([1])
265+
_assert_datasets_retrieved_successfully([dataset])
266266

267267
assert len(dataset.features) > 1
268268
assert len(dataset.qualities) > 4
@@ -1868,38 +1868,41 @@ def _dataset_features_is_downloaded(did: int):
18681868
return (cache_directory / str(did) / "body.xml").exists()
18691869

18701870

1871-
def _dataset_data_file_is_downloaded(did: int):
1871+
def _dataset_data_file_is_downloaded(dataset: OpenMLDataset):
18721872
#TODO to be updated after minio paths is fixed
1873-
cache_directory = Path(openml.config.get_cache_directory()) / "minio/datasets/0000/0001"
1874-
if not cache_directory.exists():
1875-
return False
1876-
return any(f.suffix in (".pq", ".arff") for f in cache_directory.iterdir())
1873+
pq_directory = Path(openml.config.get_cache_directory()) / openml.config.get_minio_download_path(dataset._parquet_url)
1874+
arff_directory = Path(openml.config.get_cache_directory()) / "data/v1/download" / str(dataset.id)
1875+
if pq_directory.exists():
1876+
return any(f.suffix == ".pq" for f in pq_directory.iterdir())
1877+
if arff_directory.exists():
1878+
return any(f.suffix == ".arff" for f in arff_directory.iterdir())
1879+
return False
18771880

18781881

18791882
def _assert_datasets_retrieved_successfully(
1880-
dids: Iterable[int],
1883+
datasets: Iterable[OpenMLDataset],
18811884
with_qualities: bool = False,
18821885
with_features: bool = False,
18831886
with_data: bool = False,
18841887
):
1885-
"""Checks that all files for the given dids have been downloaded.
1888+
"""Checks that all files for the given datasets have been downloaded.
18861889
18871890
This includes:
18881891
- description
18891892
- qualities
18901893
- features
18911894
- absence of data arff if metadata_only, else it must be present too.
18921895
"""
1893-
for did in dids:
1894-
assert _dataset_description_is_downloaded(did)
1896+
for dataset in datasets:
1897+
assert _dataset_description_is_downloaded(dataset)
18951898

1896-
has_qualities = _dataset_qualities_is_downloaded(did)
1899+
has_qualities = _dataset_qualities_is_downloaded(dataset.id)
18971900
assert has_qualities if with_qualities else not has_qualities
18981901

1899-
has_features = _dataset_features_is_downloaded(did)
1902+
has_features = _dataset_features_is_downloaded(dataset.id)
19001903
assert has_features if with_features else not has_features
19011904

1902-
has_data = _dataset_data_file_is_downloaded(did)
1905+
has_data = _dataset_data_file_is_downloaded(dataset)
19031906
assert has_data if with_data else not has_data
19041907

19051908

@@ -1930,7 +1933,7 @@ def test_get_dataset_lazy_behavior(
19301933
assert dataset.name == "anneal"
19311934

19321935
_assert_datasets_retrieved_successfully(
1933-
[1],
1936+
[dataset],
19341937
with_qualities=with_qualities,
19351938
with_features=with_features,
19361939
with_data=with_data,
@@ -1939,7 +1942,7 @@ def test_get_dataset_lazy_behavior(
19391942
assert dataset.qualities, "Qualities should be downloaded on-demand if not during get_dataset"
19401943
assert dataset.get_data(), "Data should be downloaded on-demand if not during get_dataset"
19411944
_assert_datasets_retrieved_successfully(
1942-
[1], with_qualities=True, with_features=True, with_data=True
1945+
[dataset], with_qualities=True, with_features=True, with_data=True
19431946
)
19441947

19451948

0 commit comments

Comments
 (0)