Skip to content

Commit 2c68a32

Browse files
committed
Review fixes
1 parent d434d45 commit 2c68a32

4 files changed

Lines changed: 11 additions & 14 deletions

File tree

openml/_config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,10 @@ def get_cache_directory(self) -> str:
495495
reversed_url_suffix = os.sep.join(url_parts) # noqa: PTH118
496496
return os.path.join(self._root_cache_directory, reversed_url_suffix) # noqa: PTH118
497497

498+
def get_minio_download_path(self, url: str) -> str:
499+
parsed_url = urlparse(url)
500+
return os.path.join(self.get_cache_directory(), "minio", parsed_url.path.lstrip("/")) # noqa: PTH118
501+
498502
def set_root_cache_directory(self, root_cache_directory: str | Path) -> None:
499503
"""Set the root cache directory."""
500504
self._root_cache_directory = Path(root_cache_directory)

openml/datasets/dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -954,11 +954,11 @@ def _parse_publish_response(self, xml_response: dict) -> None:
954954
self.dataset_id = int(xml_response["oml:upload_data_set"]["oml:id"])
955955

956956
def publish(self) -> OpenMLDataset:
957-
"""Publish this flow to OpenML server.
957+
"""Publish this dataset to the OpenML server.
958958
959959
Returns
960960
-------
961-
self : OpenMLFlow
961+
self : OpenMLDataset
962962
"""
963963
file_elements = self._get_file_elements()
964964
if "description" not in file_elements:

tests/test_datasets/test_dataset.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,14 @@
22
from __future__ import annotations
33

44
import os
5-
import unittest.mock
6-
from pathlib import Path
7-
import shutil
85
from time import time
96

107
import numpy as np
118
import pandas as pd
129
import pytest
13-
from scipy import sparse
1410

1511
import openml
1612
from openml.datasets import OpenMLDataFeature, OpenMLDataset
17-
from openml.exceptions import PyOpenMLError
1813
from openml.testing import TestBase
1914

2015
import pytest

tests/test_datasets/test_dataset_functions.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import itertools
55
import os
66
import random
7-
import shutil
87
import time
98
import uuid
109
from itertools import product
@@ -17,8 +16,6 @@
1716
import pandas as pd
1817
import pytest
1918
import requests
20-
import requests_mock
21-
from requests_mock import ANY
2219
import scipy.sparse
2320
from oslo_concurrency import lockutils
2421

@@ -1870,10 +1867,11 @@ def _dataset_features_is_downloaded(did: int):
18701867

18711868
def _dataset_data_file_is_downloaded(dataset: OpenMLDataset):
18721869
#TODO to be updated after minio paths is fixed
1873-
pq_directory = Path(openml.config.get_cache_directory()) / openml.config.get_minio_download_path(dataset._parquet_url)
1870+
if dataset._parquet_url is not None:
1871+
pq_directory = Path(openml.config.get_cache_directory()) / Path(openml.config.get_minio_download_path(dataset._parquet_url))
1872+
if pq_directory.exists():
1873+
return any(f.suffix == ".pq" for f in pq_directory.iterdir())
18741874
arff_directory = Path(openml.config.get_cache_directory()) / "data/v1/download" / str(dataset.id)
1875-
if pq_directory.exists():
1876-
return any(f.suffix == ".pq" for f in pq_directory.iterdir())
18771875
if arff_directory.exists():
18781876
return any(f.suffix == ".arff" for f in arff_directory.iterdir())
18791877
return False
@@ -1894,7 +1892,7 @@ def _assert_datasets_retrieved_successfully(
18941892
- absence of data arff if metadata_only, else it must be present too.
18951893
"""
18961894
for dataset in datasets:
1897-
assert _dataset_description_is_downloaded(dataset)
1895+
assert _dataset_description_is_downloaded(dataset.id)
18981896

18991897
has_qualities = _dataset_qualities_is_downloaded(dataset.id)
19001898
assert has_qualities if with_qualities else not has_qualities

0 commit comments

Comments
 (0)