Skip to content

Commit 66b1c58

Browse files
committed
Add persistent store-type meta to EmbedStore, DictStore and TreeStore
Each container now writes a type-identifying metalayer into its underlying SChunk (embed.b2e) at creation time: "b2embed" for EmbedStore, "b2dict" for DictStore, and "b2tree" for TreeStore. The meta is read back on open and exposed via `store.storage.meta`. `blosc2.open()` (_open_special_store) now checks this metalayer first to dispatch to the right constructor, taking priority over the file extension heuristic. This also fixes the latent bug where a .b2z file created with DictStore would have been opened as a TreeStore. Tests added to verify meta presence and persistence for all three store types, including after close/reopen and via `blosc2.open()`. Related with #546.
1 parent fa0362c commit 66b1c58

4 files changed

Lines changed: 65 additions & 3 deletions

File tree

src/blosc2/schunk.py

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import os
1111
import pathlib
12+
import zipfile
1213
from collections import namedtuple
1314
from collections.abc import Iterator, Mapping, MutableMapping
1415
from dataclasses import asdict, replace
@@ -1486,20 +1487,52 @@ def __dealloc__(self):
14861487
super().__dealloc__()
14871488

14881489

1489-
def _open_special_store(urlpath, mode, offset, **kwargs):
1490+
def _meta_from_store(urlpath, offset):
1491+
"""Try to read the SChunk meta from a store path (b2e, b2d, or b2z)."""
1492+
1493+
def _open_meta(path, off=0):
1494+
try:
1495+
return blosc2.blosc2_ext.open(path, mode="r", offset=off).meta
1496+
except Exception:
1497+
return None
1498+
1499+
if urlpath.endswith(".b2e") and offset == 0:
1500+
return _open_meta(urlpath)
1501+
if urlpath.endswith(".b2d") and os.path.isdir(urlpath):
1502+
embed_path = os.path.join(urlpath, "embed.b2e")
1503+
if os.path.exists(embed_path):
1504+
return _open_meta(embed_path)
1505+
if urlpath.endswith(".b2z") and os.path.isfile(urlpath):
1506+
try:
1507+
with open(urlpath, "rb") as f, zipfile.ZipFile(f) as zf:
1508+
for info in zf.infolist():
1509+
if info.filename == "embed.b2e":
1510+
f.seek(info.header_offset)
1511+
local_header = f.read(30)
1512+
filename_len = int.from_bytes(local_header[26:28], "little")
1513+
extra_len = int.from_bytes(local_header[28:30], "little")
1514+
data_offset = info.header_offset + 30 + filename_len + extra_len
1515+
return _open_meta(urlpath, data_offset)
1516+
except Exception:
1517+
pass
1518+
return None
1519+
1520+
1521+
def _store_from_extension(urlpath, mode, offset, **kwargs):
1522+
"""Dispatch to the right store constructor based on file extension."""
14901523
if urlpath.endswith(".b2d"):
14911524
if offset != 0:
14921525
raise ValueError("Offset must be 0 for DictStore")
14931526
from blosc2.dict_store import DictStore
14941527

14951528
return DictStore(urlpath, mode=mode, **kwargs)
1496-
elif urlpath.endswith(".b2z"):
1529+
if urlpath.endswith(".b2z"):
14971530
if offset != 0:
14981531
raise ValueError("Offset must be 0 for TreeStore")
14991532
from blosc2.tree_store import TreeStore
15001533

15011534
return TreeStore(urlpath, mode=mode, **kwargs)
1502-
elif urlpath.endswith(".b2e"):
1535+
if urlpath.endswith(".b2e"):
15031536
if offset != 0:
15041537
raise ValueError("Offset must be 0 for EmbedStore")
15051538
from blosc2.embed_store import EmbedStore
@@ -1508,6 +1541,32 @@ def _open_special_store(urlpath, mode, offset, **kwargs):
15081541
return None
15091542

15101543

1544+
def _open_special_store(urlpath, mode, offset, **kwargs):
1545+
# Meta-based detection has priority over extension
1546+
schunk_meta = _meta_from_store(urlpath, offset)
1547+
if schunk_meta is not None:
1548+
if "b2embed" in schunk_meta:
1549+
if offset != 0:
1550+
raise ValueError("Offset must be 0 for EmbedStore")
1551+
from blosc2.embed_store import EmbedStore
1552+
1553+
return EmbedStore(urlpath, mode=mode, **kwargs)
1554+
if "b2dict" in schunk_meta:
1555+
if offset != 0:
1556+
raise ValueError("Offset must be 0 for DictStore")
1557+
from blosc2.dict_store import DictStore
1558+
1559+
return DictStore(urlpath, mode=mode, **kwargs)
1560+
if "b2tree" in schunk_meta:
1561+
if offset != 0:
1562+
raise ValueError("Offset must be 0 for TreeStore")
1563+
from blosc2.tree_store import TreeStore
1564+
1565+
return TreeStore(urlpath, mode=mode, **kwargs)
1566+
1567+
return _store_from_extension(urlpath, mode, offset, **kwargs)
1568+
1569+
15111570
def _set_default_dparams(kwargs):
15121571
dparams = kwargs.get("dparams")
15131572
if dparams is None:

tests/test_dict_store.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,7 @@ def test_open_context_manager(populated_dict_store):
449449
# Test opening via blosc2.open as a context manager
450450
with blosc2.open(path, mode="r", mmap_mode="r") as dstore:
451451
assert isinstance(dstore, DictStore)
452+
assert "b2dict" in dstore.storage.meta
452453
assert "/node1" in dstore
453454
assert np.array_equal(dstore["/node1"][:], np.array([1, 2, 3]))
454455

tests/test_embed_store.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ def test_open_context_manager(cleanup_files):
220220
# Test opening via blosc2.open as a context manager
221221
with blosc2.open(path, mode="r", mmap_mode="r") as estore_read:
222222
assert isinstance(estore_read, blosc2.EmbedStore)
223+
assert "b2embed" in estore_read.storage.meta
223224
assert "/node1" in estore_read
224225
assert np.array_equal(estore_read["/node1"][:], np.arange(10))
225226

tests/test_tree_store.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,7 @@ def test_open_context_manager(populated_tree_store):
935935
# Test opening via blosc2.open as a context manager
936936
with blosc2.open(path, mode="r", mmap_mode="r") as tstore:
937937
assert isinstance(tstore, TreeStore)
938+
assert "b2tree" in tstore.storage.meta
938939
assert "/child0/data" in tstore
939940
assert np.array_equal(tstore["/child0/data"][:], np.array([1, 2, 3]))
940941

0 commit comments

Comments
 (0)