1010import os
1111import shutil
1212import tempfile
13+ import warnings
1314import zipfile
1415from typing import TYPE_CHECKING , Any
1516
@@ -94,6 +95,9 @@ class DictStore:
9495 -----
9596 - External persistence uses the following file extensions:
9697 .b2nd for NDArray, .b2f for SChunk, and .b2b for BatchStore.
98+ These suffixes are a naming convention for newly written leaves; when
99+ reopening an existing store, leaf typing is resolved from object
100+ metadata instead of trusting the suffix alone.
97101 """
98102
99103 def __init__ (
@@ -112,7 +116,7 @@ def __init__(
112116 """
113117 See :class:`DictStore` for full documentation of parameters.
114118 """
115- self .localpath = localpath if isinstance (localpath , ( str , bytes ) ) else str (localpath )
119+ self .localpath = localpath if isinstance (localpath , str | bytes ) else str (localpath )
116120 if not self .localpath .endswith ((".b2z" , ".b2d" )):
117121 raise ValueError (f"localpath must have a .b2z or .b2d extension; you passed: { self .localpath } " )
118122 if mode not in ("r" , "w" , "a" ):
@@ -182,13 +186,7 @@ def _init_read_mode(self, dparams: blosc2.DParams | None = None):
182186 mmap_mode = self .mmap_mode ,
183187 dparams = dparams ,
184188 )
185- for filepath in self .offsets :
186- if filepath .endswith ((".b2nd" , ".b2f" , ".b2b" )):
187- if filepath .endswith (".b2nd" ):
188- key = "/" + filepath [:- 5 ]
189- else :
190- key = "/" + filepath [:- 4 ]
191- self .map_tree [key ] = filepath
189+ self ._update_map_tree_from_offsets ()
192190 else : # .b2d
193191 if not os .path .isdir (self .localpath ):
194192 raise FileNotFoundError (f"Directory { self .localpath } does not exist for reading." )
@@ -204,6 +202,90 @@ def _init_read_mode(self, dparams: blosc2.DParams | None = None):
204202 self ._estore = EmbedStore (_from_schunk = schunk )
205203 self .storage .meta = self ._estore .storage .meta
206204
205+ @staticmethod
206+ def _logical_key_from_relpath (rel_path : str ) -> str :
207+ """Map an external leaf path to its logical tree key."""
208+ rel_path = rel_path .replace (os .sep , "/" )
209+ key = os .path .splitext (rel_path )[0 ]
210+ if not key .startswith ("/" ):
211+ key = "/" + key
212+ return key
213+
214+ @staticmethod
215+ def _expected_ext_from_kind (kind : str ) -> str :
216+ """Return the canonical write-time suffix for a supported external leaf kind."""
217+ if kind == "ndarray" :
218+ return ".b2nd"
219+ if kind == "batchstore" :
220+ return ".b2b"
221+ return ".b2f"
222+
223+ @classmethod
224+ def _opened_external_kind (
225+ cls ,
226+ opened : blosc2 .NDArray | SChunk | blosc2 .VLArray | blosc2 .BatchStore | C2Array | Any ,
227+ rel_path : str ,
228+ ) -> str | None :
229+ """Return the supported external leaf kind for an already opened object."""
230+ processed = _process_opened_object (opened )
231+ if isinstance (processed , blosc2 .BatchStore ):
232+ kind = "batchstore"
233+ elif isinstance (processed , blosc2 .VLArray ):
234+ kind = "vlarray"
235+ elif isinstance (processed , blosc2 .NDArray ):
236+ kind = "ndarray"
237+ elif isinstance (processed , SChunk ):
238+ kind = "schunk"
239+ else :
240+ warnings .warn (
241+ f"Ignoring unsupported Blosc2 object at '{ rel_path } ' during DictStore discovery: "
242+ f"{ type (processed ).__name__ } " ,
243+ UserWarning ,
244+ stacklevel = 2 ,
245+ )
246+ return None
247+
248+ expected_ext = cls ._expected_ext_from_kind (kind )
249+ found_ext = os .path .splitext (rel_path )[1 ]
250+ if found_ext != expected_ext :
251+ warnings .warn (
252+ f"External leaf '{ rel_path } ' uses extension '{ found_ext } ' but metadata resolves to "
253+ f"{ type (processed ).__name__ } ; expected '{ expected_ext } '." ,
254+ UserWarning ,
255+ stacklevel = 2 ,
256+ )
257+ return kind
258+
259+ def _probe_external_leaf_path (self , rel_path : str ) -> bool :
260+ """Return whether a working-dir file is a supported external leaf."""
261+ urlpath = os .path .join (self .working_dir , rel_path )
262+ try :
263+ opened = blosc2 .blosc2_ext .open (
264+ urlpath ,
265+ mode = "r" ,
266+ offset = 0 ,
267+ mmap_mode = self .mmap_mode ,
268+ dparams = self .dparams ,
269+ )
270+ except Exception :
271+ return False
272+ return self ._opened_external_kind (opened , rel_path ) is not None
273+
274+ def _probe_external_leaf_offset (self , filepath : str ) -> bool :
275+ """Return whether a zip member is a supported external leaf."""
276+ offset = self .offsets [filepath ]["offset" ]
277+ try :
278+ opened = blosc2 .blosc2_ext .open (
279+ self .b2z_path ,
280+ mode = "r" ,
281+ offset = offset ,
282+ mmap_mode = self .mmap_mode ,
283+ dparams = self .dparams ,
284+ )
285+ except Exception :
286+ return False
287+ return self ._opened_external_kind (opened , filepath ) is not None
288+
207289 def _init_write_append_mode (
208290 self ,
209291 cparams : blosc2 .CParams | None ,
@@ -229,24 +311,23 @@ def _init_write_append_mode(
229311 self ._update_map_tree ()
230312
231313 def _update_map_tree (self ):
232- # Build map_tree from .b2nd and .b2f files in working dir
314+ # Build map_tree from supported external leaves in working dir.
233315 for root , _ , files in os .walk (self .working_dir ):
234316 for file in files :
235317 filepath = os .path .join (root , file )
236- if filepath .endswith ((".b2nd" , ".b2f" , ".b2b" )):
237- # Convert filename to key: remove extension and ensure starts with /
238- rel_path = os .path .relpath (filepath , self .working_dir )
239- # Normalize path separators to forward slashes for cross-platform consistency
240- rel_path = rel_path .replace (os .sep , "/" )
241- if rel_path .endswith (".b2nd" ):
242- key = rel_path [:- 5 ]
243- elif rel_path .endswith (".b2b" ) or rel_path .endswith (".b2f" ):
244- key = rel_path [:- 4 ]
245- else :
246- continue
247- if not key .startswith ("/" ):
248- key = "/" + key
249- self .map_tree [key ] = rel_path
318+ if os .path .abspath (filepath ) == os .path .abspath (self .estore_path ):
319+ continue
320+ rel_path = os .path .relpath (filepath , self .working_dir ).replace (os .sep , "/" )
321+ if self ._probe_external_leaf_path (rel_path ):
322+ self .map_tree [self ._logical_key_from_relpath (rel_path )] = rel_path
323+
324+ def _update_map_tree_from_offsets (self ):
325+ """Build map_tree from supported external leaves in a zip store."""
326+ for filepath in self .offsets :
327+ if filepath == "embed.b2e" :
328+ continue
329+ if self ._probe_external_leaf_offset (filepath ):
330+ self .map_tree [self ._logical_key_from_relpath (filepath )] = filepath
250331
251332 @property
252333 def estore (self ) -> EmbedStore :
@@ -255,13 +336,13 @@ def estore(self) -> EmbedStore:
255336
256337 @staticmethod
257338 def _value_nbytes (value : blosc2 .Array | SChunk | blosc2 .VLArray | blosc2 .BatchStore ) -> int :
258- if isinstance (value , ( blosc2 .VLArray , blosc2 .BatchStore ) ):
339+ if isinstance (value , blosc2 .VLArray | blosc2 .BatchStore ):
259340 return value .schunk .nbytes
260341 return value .nbytes
261342
262343 @staticmethod
263344 def _is_external_value (value : blosc2 .Array | SChunk | blosc2 .VLArray | blosc2 .BatchStore ) -> bool :
264- return isinstance (value , ( blosc2 .NDArray , SChunk , blosc2 .VLArray , blosc2 .BatchStore ) ) and bool (
345+ return isinstance (value , blosc2 .NDArray | SChunk | blosc2 .VLArray | blosc2 .BatchStore ) and bool (
265346 getattr (value , "urlpath" , None )
266347 )
267348
@@ -406,12 +487,14 @@ def values(self) -> Iterator[blosc2.NDArray | SChunk | C2Array]:
406487 if self .is_zip_store :
407488 if filepath in self .offsets :
408489 offset = self .offsets [filepath ]["offset" ]
409- yield blosc2 .blosc2_ext .open (
410- self .b2z_path ,
411- mode = "r" ,
412- offset = offset ,
413- mmap_mode = self .mmap_mode ,
414- dparams = self .dparams ,
490+ yield _process_opened_object (
491+ blosc2 .blosc2_ext .open (
492+ self .b2z_path ,
493+ mode = "r" ,
494+ offset = offset ,
495+ mmap_mode = self .mmap_mode ,
496+ dparams = self .dparams ,
497+ )
415498 )
416499 else :
417500 urlpath = os .path .join (self .working_dir , filepath )
@@ -438,12 +521,14 @@ def items(self) -> Iterator[tuple[str, blosc2.NDArray | SChunk | C2Array]]:
438521 offset = self .offsets [filepath ]["offset" ]
439522 yield (
440523 key ,
441- blosc2 .blosc2_ext .open (
442- self .b2z_path ,
443- mode = "r" ,
444- offset = offset ,
445- mmap_mode = self .mmap_mode ,
446- dparams = self .dparams ,
524+ _process_opened_object (
525+ blosc2 .blosc2_ext .open (
526+ self .b2z_path ,
527+ mode = "r" ,
528+ offset = offset ,
529+ mmap_mode = self .mmap_mode ,
530+ dparams = self .dparams ,
531+ )
447532 ),
448533 )
449534 else :
0 commit comments