Skip to content

Commit fbc1829

Browse files
committed
Add some logging to identify the issue with file failures in CI
1 parent bfb3fe9 commit fbc1829

2 files changed

Lines changed: 23 additions & 15 deletions

File tree

openml/tasks/task.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
# License: BSD 3-Clause
22
from __future__ import annotations
33

4+
import logging
45
import warnings
56
from abc import ABC
67
from collections.abc import Sequence
78
from enum import Enum
8-
from pathlib import Path
99
from typing import TYPE_CHECKING, Any, ClassVar
1010
from typing_extensions import TypedDict
1111

12+
import arff
13+
1214
import openml._api_calls
1315
import openml.config
1416
from openml import datasets
@@ -22,6 +24,9 @@
2224
import pandas as pd
2325

2426

27+
logger = logging.getLogger(__name__)
28+
29+
2530
# TODO(eddiebergman): Should use `auto()` but might be too late if these numbers are used
2631
# and stored on server.
2732
class TaskType(Enum):
@@ -178,18 +183,6 @@ def get_train_test_split_indices(
178183

179184
return self.split.get(repeat=repeat, fold=fold, sample=sample)
180185

181-
def _download_split(self, cache_file: Path) -> None:
182-
# TODO(eddiebergman): Not sure about this try to read and error approach
183-
try:
184-
with cache_file.open(encoding="utf8"):
185-
pass
186-
except OSError:
187-
split_url = self.estimation_procedure["data_splits_url"]
188-
openml._api_calls._download_text_file(
189-
source=str(split_url),
190-
output_path=str(cache_file),
191-
)
192-
193186
def download_split(self) -> OpenMLSplit:
194187
"""Download the OpenML split for a given task."""
195188
# TODO(eddiebergman): Can this every be `None`?
@@ -199,9 +192,23 @@ def download_split(self) -> OpenMLSplit:
199192

200193
try:
201194
split = OpenMLSplit._from_arff_file(cached_split_file)
202-
except OSError:
195+
logger.debug("Loaded file from cache: %s", str(cached_split_file))
196+
except (OSError, arff.BadDataFormat):
197+
logger.info("Failed to load file from cache: %s", str(cached_split_file))
198+
if cached_split_file.exists():
199+
logger.debug("Cleaning up old file")
200+
cached_split_file.unlink()
203201
# Next, download and cache the associated split file
204-
self._download_split(cached_split_file)
202+
split_url = self.estimation_procedure["data_splits_url"]
203+
openml._api_calls._download_text_file(
204+
source=str(split_url),
205+
output_path=str(cached_split_file),
206+
)
207+
if cached_split_file.exists():
208+
logger.info("New file created of size %d", cached_split_file.stat().st_size)
209+
else:
210+
logger.info("Failed to create new file")
211+
205212
split = OpenMLSplit._from_arff_file(cached_split_file)
206213

207214
return split

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ version = {attr = "openml.__version__.__version__"}
126126

127127
# https://docs.pytest.org/en/7.2.x/reference/reference.html#ini-options-ref
128128
[tool.pytest.ini_options]
129+
log_level="DEBUG"
129130
testpaths = ["tests"]
130131
minversion = "7.0"
131132
xfail_strict = true

0 commit comments

Comments
 (0)