@@ -240,29 +240,29 @@ def test_get_datasets_by_name(self):
240240 dids = ["anneal" , "kr-vs-kp" ]
241241 datasets = openml .datasets .get_datasets (dids )
242242 assert len (datasets ) == 2
243- _assert_datasets_retrieved_successfully ([ 1 , 2 ] )
243+ _assert_datasets_retrieved_successfully (datasets )
244244
245245 @pytest .mark .test_server ()
246246 def test_get_datasets_by_mixed (self ):
247247 # did 1 and 2 on the test server:
248248 dids = ["anneal" , 2 ]
249249 datasets = openml .datasets .get_datasets (dids )
250250 assert len (datasets ) == 2
251- _assert_datasets_retrieved_successfully ([ 1 , 2 ] )
251+ _assert_datasets_retrieved_successfully (datasets )
252252
253253 @pytest .mark .test_server ()
254254 def test_get_datasets (self ):
255255 dids = [1 , 2 ]
256256 datasets = openml .datasets .get_datasets (dids )
257257 assert len (datasets ) == 2
258- _assert_datasets_retrieved_successfully ([ 1 , 2 ] )
258+ _assert_datasets_retrieved_successfully (datasets )
259259
260260 @pytest .mark .test_server ()
261261 def test_get_dataset_by_name (self ):
262262 dataset = openml .datasets .get_dataset ("anneal" )
263263 assert type (dataset ) == OpenMLDataset
264264 assert dataset .dataset_id == 1
265- _assert_datasets_retrieved_successfully ([1 ])
265+ _assert_datasets_retrieved_successfully ([dataset ])
266266
267267 assert len (dataset .features ) > 1
268268 assert len (dataset .qualities ) > 4
@@ -1868,38 +1868,41 @@ def _dataset_features_is_downloaded(did: int):
18681868 return (cache_directory / str (did ) / "body.xml" ).exists ()
18691869
18701870
1871- def _dataset_data_file_is_downloaded (did : int ):
1871+ def _dataset_data_file_is_downloaded (dataset : OpenMLDataset ):
18721872 #TODO to be updated after minio paths is fixed
1873- cache_directory = Path (openml .config .get_cache_directory ()) / "minio/datasets/0000/0001"
1874- if not cache_directory .exists ():
1875- return False
1876- return any (f .suffix in (".pq" , ".arff" ) for f in cache_directory .iterdir ())
1873+ pq_directory = Path (openml .config .get_cache_directory ()) / openml .config .get_minio_download_path (dataset ._parquet_url )
1874+ arff_directory = Path (openml .config .get_cache_directory ()) / "data/v1/download" / str (dataset .id )
1875+ if pq_directory .exists ():
1876+ return any (f .suffix == ".pq" for f in pq_directory .iterdir ())
1877+ if arff_directory .exists ():
1878+ return any (f .suffix == ".arff" for f in arff_directory .iterdir ())
1879+ return False
18771880
18781881
18791882def _assert_datasets_retrieved_successfully (
1880- dids : Iterable [int ],
1883+ datasets : Iterable [OpenMLDataset ],
18811884 with_qualities : bool = False ,
18821885 with_features : bool = False ,
18831886 with_data : bool = False ,
18841887):
1885- """Checks that all files for the given dids have been downloaded.
1888+ """Checks that all files for the given datasets have been downloaded.
18861889
18871890 This includes:
18881891 - description
18891892 - qualities
18901893 - features
18911894 - absence of data arff if metadata_only, else it must be present too.
18921895 """
1893- for did in dids :
1894- assert _dataset_description_is_downloaded (did )
1896+ for dataset in datasets :
1897+ assert _dataset_description_is_downloaded (dataset )
18951898
1896- has_qualities = _dataset_qualities_is_downloaded (did )
1899+ has_qualities = _dataset_qualities_is_downloaded (dataset . id )
18971900 assert has_qualities if with_qualities else not has_qualities
18981901
1899- has_features = _dataset_features_is_downloaded (did )
1902+ has_features = _dataset_features_is_downloaded (dataset . id )
19001903 assert has_features if with_features else not has_features
19011904
1902- has_data = _dataset_data_file_is_downloaded (did )
1905+ has_data = _dataset_data_file_is_downloaded (dataset )
19031906 assert has_data if with_data else not has_data
19041907
19051908
@@ -1930,7 +1933,7 @@ def test_get_dataset_lazy_behavior(
19301933 assert dataset .name == "anneal"
19311934
19321935 _assert_datasets_retrieved_successfully (
1933- [1 ],
1936+ [dataset ],
19341937 with_qualities = with_qualities ,
19351938 with_features = with_features ,
19361939 with_data = with_data ,
@@ -1939,7 +1942,7 @@ def test_get_dataset_lazy_behavior(
19391942 assert dataset .qualities , "Qualities should be downloaded on-demand if not during get_dataset"
19401943 assert dataset .get_data (), "Data should be downloaded on-demand if not during get_dataset"
19411944 _assert_datasets_retrieved_successfully (
1942- [1 ], with_qualities = True , with_features = True , with_data = True
1945+ [dataset ], with_qualities = True , with_features = True , with_data = True
19431946 )
19441947
19451948
0 commit comments