44import itertools
55import os
66import random
7- import shutil
87import time
98import uuid
109from itertools import product
1716import pandas as pd
1817import pytest
1918import requests
20- import requests_mock
21- from requests_mock import ANY
2219import scipy .sparse
2320from oslo_concurrency import lockutils
2421
@@ -1870,10 +1867,11 @@ def _dataset_features_is_downloaded(did: int):
18701867
18711868def _dataset_data_file_is_downloaded (dataset : OpenMLDataset ):
18721869 #TODO to be updated after minio paths is fixed
1873- pq_directory = Path (openml .config .get_cache_directory ()) / openml .config .get_minio_download_path (dataset ._parquet_url )
1870+ if dataset ._parquet_url is not None :
1871+ pq_directory = Path (openml .config .get_cache_directory ()) / Path (openml .config .get_minio_download_path (dataset ._parquet_url ))
1872+ if pq_directory .exists ():
1873+ return any (f .suffix == ".pq" for f in pq_directory .iterdir ())
18741874 arff_directory = Path (openml .config .get_cache_directory ()) / "data/v1/download" / str (dataset .id )
1875- if pq_directory .exists ():
1876- return any (f .suffix == ".pq" for f in pq_directory .iterdir ())
18771875 if arff_directory .exists ():
18781876 return any (f .suffix == ".arff" for f in arff_directory .iterdir ())
18791877 return False
@@ -1894,7 +1892,7 @@ def _assert_datasets_retrieved_successfully(
18941892 - absence of data arff if metadata_only, else it must be present too.
18951893 """
18961894 for dataset in datasets :
1897- assert _dataset_description_is_downloaded (dataset )
1895+ assert _dataset_description_is_downloaded (dataset . id )
18981896
18991897 has_qualities = _dataset_qualities_is_downloaded (dataset .id )
19001898 assert has_qualities if with_qualities else not has_qualities
0 commit comments