Skip to content

Commit 6b86327

Browse files
committed
update HTTPClient
1 parent e913fb3 commit 6b86327

1 file changed

Lines changed: 31 additions & 7 deletions

File tree

openml/_api/clients/http.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from typing import Any, cast
1313
from urllib.parse import urlencode, urljoin, urlparse
1414

15+
import arff
1516
import requests
1617
import xmltodict
1718
from requests import Response
@@ -98,16 +99,32 @@ def _get_body_filename_from_response(self, response: Response) -> str:
9899
if "text/xml" in content_type:
99100
return "body.xml"
100101

102+
if response.content.startswith(b"PK\x03\x04"):
103+
return "body.zip"
104+
105+
try:
106+
arff.loads(response.text)
107+
return "body.arff"
108+
except arff.ArffException:
109+
pass
110+
101111
return "body.txt"
102112

103113
def _get_body_filename_from_path(self, path: Path) -> str:
104-
if (path / "body.json").exists():
105-
return "body.json"
114+
candidates = []
115+
for p in path.iterdir():
116+
if p.name.startswith("body.") and len(p.suffixes) == 1:
117+
candidates.append(p)
106118

107-
if (path / "body.xml").exists():
108-
return "body.xml"
119+
if not candidates:
120+
raise FileNotFoundError(f"No body file found in path: {path}")
109121

110-
return "body.txt"
122+
if len(candidates) > 1:
123+
raise FileNotFoundError(
124+
f"Multiple body files found in path: {path} ({[p.name for p in candidates]})"
125+
)
126+
127+
return candidates[0].name
111128

112129
def load(self, key: str) -> Response:
113130
"""
@@ -132,6 +149,9 @@ def load(self, key: str) -> Response:
132149
"""
133150
path = self._key_to_path(key)
134151

152+
if not path.exists():
153+
raise FileNotFoundError(f"Cache path not found: {path}")
154+
135155
meta_path = path / "meta.json"
136156
meta_raw = meta_path.read_bytes() if meta_path.exists() else "{}"
137157
meta = json.loads(meta_raw)
@@ -141,8 +161,6 @@ def load(self, key: str) -> Response:
141161
headers = json.loads(headers_raw)
142162

143163
body_path = path / self._get_body_filename_from_path(path)
144-
if not body_path.exists():
145-
raise FileNotFoundError(f"Incomplete cache at {body_path}")
146164
body = body_path.read_bytes()
147165

148166
response = Response()
@@ -825,3 +843,9 @@ def write_to_file(response: Response, path: Path, encoding: str) -> None:
825843
handler = handler or write_to_file
826844
handler(response, file_path, encoding)
827845
return file_path
846+
847+
def cache_path_from_url(self, url: str) -> Path:
848+
full_url = urljoin(self.server, url)
849+
key = self.cache.get_key(full_url, params={})
850+
path = self.cache._key_to_path(key)
851+
return path / self.cache._get_body_filename_from_path(path)

0 commit comments

Comments
 (0)