1212from typing import Any , cast
1313from urllib .parse import urlencode , urljoin , urlparse
1414
15+ import arff
1516import requests
1617import xmltodict
1718from requests import Response
@@ -98,16 +99,32 @@ def _get_body_filename_from_response(self, response: Response) -> str:
9899 if "text/xml" in content_type :
99100 return "body.xml"
100101
102+ if response .content .startswith (b"PK\x03 \x04 " ):
103+ return "body.zip"
104+
105+ try :
106+ arff .loads (response .text )
107+ return "body.arff"
108+ except arff .ArffException :
109+ pass
110+
101111 return "body.txt"
102112
103113 def _get_body_filename_from_path (self , path : Path ) -> str :
104- if (path / "body.json" ).exists ():
105- return "body.json"
114+ candidates = []
115+ for p in path .iterdir ():
116+ if p .name .startswith ("body." ) and len (p .suffixes ) == 1 :
117+ candidates .append (p )
106118
107- if ( path / "body.xml" ). exists () :
108- return " body.xml"
119+ if not candidates :
120+ raise FileNotFoundError ( f"No body file found in path: { path } " )
109121
110- return "body.txt"
122+ if len (candidates ) > 1 :
123+ raise FileNotFoundError (
124+ f"Multiple body files found in path: { path } ({ [p .name for p in candidates ]} )"
125+ )
126+
127+ return candidates [0 ].name
111128
112129 def load (self , key : str ) -> Response :
113130 """
@@ -132,6 +149,9 @@ def load(self, key: str) -> Response:
132149 """
133150 path = self ._key_to_path (key )
134151
152+ if not path .exists ():
153+ raise FileNotFoundError (f"Cache path not found: { path } " )
154+
135155 meta_path = path / "meta.json"
136156 meta_raw = meta_path .read_bytes () if meta_path .exists () else "{}"
137157 meta = json .loads (meta_raw )
@@ -141,8 +161,6 @@ def load(self, key: str) -> Response:
141161 headers = json .loads (headers_raw )
142162
143163 body_path = path / self ._get_body_filename_from_path (path )
144- if not body_path .exists ():
145- raise FileNotFoundError (f"Incomplete cache at { body_path } " )
146164 body = body_path .read_bytes ()
147165
148166 response = Response ()
@@ -825,3 +843,9 @@ def write_to_file(response: Response, path: Path, encoding: str) -> None:
825843 handler = handler or write_to_file
826844 handler (response , file_path , encoding )
827845 return file_path
846+
847+ def cache_path_from_url (self , url : str ) -> Path :
848+ full_url = urljoin (self .server , url )
849+ key = self .cache .get_key (full_url , params = {})
850+ path = self .cache ._key_to_path (key )
851+ return path / self .cache ._get_body_filename_from_path (path )
0 commit comments