Skip to content

Commit bf8a603

Browse files
Merge pull request #541 from bossbeagle1509/feature/456-add-b2z-b2d-b2e-support
feat: add support for .b2z, .b2d, .b2e files and update related tests
2 parents 37acccb + 5f1a0b5 commit bf8a603

12 files changed

Lines changed: 150 additions & 44 deletions

File tree

doc/reference/dict_store.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ Quick example
2929
arr_ext = blosc2.arange(3, urlpath="n3.b2nd", mode="w")
3030
dstore["/dir1/node3"] = arr_ext # external file referenced
3131
32-
# Reopen and read
33-
with blosc2.DictStore("my_dstore.b2z", mode="r") as dstore:
32+
# Reopen and read using blosc2.open
33+
with blosc2.open("my_dstore.b2z", mode="r") as dstore:
3434
print(sorted(dstore.keys())) # ['/dir1/node3', '/node1', '/node2']
3535
print(dstore["/node1"][:]) # [1 2 3]
3636

doc/reference/embed_store.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ Quickstart
4141
print(list(estore.keys()))
4242
# ['/node1', '/node2', '/node3', '/node4']
4343
44+
# Reopen using blosc2.open
45+
estore = blosc2.open("example_estore.b2e", mode="r")
46+
print(list(estore.keys()))
47+
4448
.. note::
4549
- Embedded arrays (NumPy, NDArray, and SChunk) increase the size of the ``.b2e`` container.
4650
- Remote ``C2Array`` nodes only store lightweight references; reading them requires access to the remote source. NDArrays coming from external ``.b2nd`` files are embedded into the store.

doc/reference/tree_store.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ Quick example
4747
print(sorted(subtree.keys())) # ['/child1/leaf2', '/child2', '/leaf1']
4848
print(subtree["/child1/leaf2"][:]) # [4 5 6]
4949
50+
# Reopen using blosc2.open
51+
with blosc2.open("my_tree.b2z", mode="r") as tstore:
52+
print(sorted(tstore.keys()))
53+
5054
.. currentmodule:: blosc2
5155

5256
.. autoclass:: TreeStore

examples/dict-store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
print("After deletion, keys:", list(dstore.keys()))
3232

3333
# Reading back the dstore
34-
with blosc2.DictStore("example_dstore.b2z", mode="a") as dstore2:
34+
with blosc2.open("example_dstore.b2z", mode="a") as dstore2:
3535
# Add another node to the dstore
3636
dstore2["/dir2/node5"] = np.array([4, 5, 6])
3737
print("Node5 data:", dstore2["/dir2/node5"][:])

examples/embed-store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
# Reading back the tree
3737
if persistent:
38-
estore_read = blosc2.EmbedStore(urlpath="example_estore.b2e", mode="a")
38+
estore_read = blosc2.open("example_estore.b2e", mode="a")
3939
else:
4040
estore_read = blosc2.from_cframe(estore.to_cframe())
4141

examples/tree-store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
print("After deleting '/child0/child1', keys:", sorted(tstore.keys()))
6161

6262
# Reopen and add another leaf under an existing subtree
63-
with blosc2.TreeStore("example_tree.b2z", mode="a") as tstore2:
63+
with blosc2.open("example_tree.b2z", mode="a") as tstore2:
6464
tstore2["/child0/new_leaf"] = np.array([9, 9, 9])
6565
print("Reopened keys:", sorted(tstore2.keys()))
6666
# Read via subtree view

src/blosc2/dict_store.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,15 +153,15 @@ def _init_read_mode(self, dparams: blosc2.DParams | None = None):
153153
if "embed.b2e" not in self.offsets:
154154
raise FileNotFoundError("Embed file embed.b2e not found in store.")
155155
estore_offset = self.offsets["embed.b2e"]["offset"]
156-
schunk = blosc2.open(self.b2z_path, mode="r", offset=estore_offset, dparams=dparams)
156+
schunk = blosc2.blosc2_ext.open(self.b2z_path, mode="r", offset=estore_offset, dparams=dparams)
157157
for filepath in self.offsets:
158158
if filepath.endswith((".b2nd", ".b2f")):
159159
key = "/" + filepath[: -5 if filepath.endswith(".b2nd") else -4]
160160
self.map_tree[key] = filepath
161161
else: # .b2d
162162
if not os.path.isdir(self.localpath):
163163
raise FileNotFoundError(f"Directory {self.localpath} does not exist for reading.")
164-
schunk = blosc2.open(self.estore_path, mode="r", dparams=dparams)
164+
schunk = blosc2.blosc2_ext.open(self.estore_path, mode="r", offset=0, dparams=dparams)
165165
self._update_map_tree()
166166

167167
self._estore = EmbedStore(_from_schunk=schunk)
@@ -267,7 +267,7 @@ def __getitem__(self, key: str) -> blosc2.NDArray | SChunk | C2Array:
267267
filepath = self.map_tree[key]
268268
if filepath in self.offsets:
269269
offset = self.offsets[filepath]["offset"]
270-
return blosc2.open(self.b2z_path, mode="r", offset=offset, dparams=self.dparams)
270+
return blosc2.blosc2_ext.open(self.b2z_path, mode="r", offset=offset, dparams=self.dparams)
271271
else:
272272
urlpath = os.path.join(self.working_dir, filepath)
273273
if os.path.exists(urlpath):
@@ -331,7 +331,9 @@ def values(self) -> Iterator[blosc2.NDArray | SChunk | C2Array]:
331331
if self.is_zip_store:
332332
if filepath in self.offsets:
333333
offset = self.offsets[filepath]["offset"]
334-
yield blosc2.open(self.b2z_path, mode="r", offset=offset, dparams=self.dparams)
334+
yield blosc2.blosc2_ext.open(
335+
self.b2z_path, mode="r", offset=offset, dparams=self.dparams
336+
)
335337
else:
336338
urlpath = os.path.join(self.working_dir, filepath)
337339
yield blosc2.open(urlpath, mode="r" if self.mode == "r" else "a", dparams=self.dparams)
@@ -350,7 +352,7 @@ def items(self) -> Iterator[tuple[str, blosc2.NDArray | SChunk | C2Array]]:
350352
if self.is_zip_store:
351353
if filepath in self.offsets:
352354
offset = self.offsets[filepath]["offset"]
353-
yield key, blosc2.open(self.b2z_path, mode="r", offset=offset)
355+
yield key, blosc2.blosc2_ext.open(self.b2z_path, mode="r", offset=offset)
354356
else:
355357
urlpath = os.path.join(self.working_dir, filepath)
356358
yield key, blosc2.open(urlpath, mode="r" if self.mode == "r" else "a")

src/blosc2/embed_store.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def __init__(
108108
self.storage = storage
109109

110110
if mode in ("r", "a") and urlpath:
111-
self._store = blosc2.open(urlpath, mode=mode)
111+
self._store = blosc2.blosc2_ext.open(urlpath, mode=mode, offset=0)
112112
self._load_metadata()
113113
return
114114

@@ -254,6 +254,15 @@ def to_cframe(self) -> bytes:
254254
"""Serialize embed store to CFrame format."""
255255
return self._store.to_cframe()
256256

257+
def __enter__(self):
258+
"""Context manager enter."""
259+
return self
260+
261+
def __exit__(self, exc_type, exc_val, exc_tb):
262+
"""Context manager exit."""
263+
# No need to close anything as SChunk/NDArray handles persistence automatically
264+
return False
265+
257266

258267
def estore_from_cframe(cframe: bytes, copy: bool = False) -> EmbedStore:
259268
"""

src/blosc2/schunk.py

Lines changed: 75 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1470,11 +1470,74 @@ def __dealloc__(self):
14701470
super().__dealloc__()
14711471

14721472

1473+
def _open_special_store(urlpath, mode, offset, **kwargs):
1474+
if urlpath.endswith(".b2d"):
1475+
if offset != 0:
1476+
raise ValueError("Offset must be 0 for DictStore")
1477+
from blosc2.dict_store import DictStore
1478+
1479+
return DictStore(urlpath, mode=mode, **kwargs)
1480+
elif urlpath.endswith(".b2z"):
1481+
if offset != 0:
1482+
raise ValueError("Offset must be 0 for TreeStore")
1483+
from blosc2.tree_store import TreeStore
1484+
1485+
return TreeStore(urlpath, mode=mode, **kwargs)
1486+
elif urlpath.endswith(".b2e"):
1487+
if offset != 0:
1488+
raise ValueError("Offset must be 0 for EmbedStore")
1489+
from blosc2.embed_store import EmbedStore
1490+
1491+
return EmbedStore(urlpath, mode=mode, **kwargs)
1492+
return None
1493+
1494+
1495+
def _set_default_dparams(kwargs):
1496+
dparams = kwargs.get("dparams")
1497+
if dparams is None:
1498+
# Use multiple threads for decompression by default, unless we are in WASM
1499+
# (does not support threads). The only drawback for using multiple threads
1500+
# is that access time will be slower because of the overhead of spawning threads
1501+
# (but could be fixed in the future with more intelligent thread pools).
1502+
dparams = (
1503+
blosc2.DParams(nthreads=blosc2.nthreads) if not blosc2.IS_WASM else blosc2.DParams(nthreads=1)
1504+
)
1505+
kwargs["dparams"] = dparams
1506+
1507+
1508+
def _process_opened_object(res):
1509+
meta = getattr(res, "schunk", res).meta
1510+
if "proxy-source" in meta:
1511+
proxy_src = meta["proxy-source"]
1512+
if proxy_src["local_abspath"] is not None:
1513+
src = blosc2.open(proxy_src["local_abspath"])
1514+
return blosc2.Proxy(src, _cache=res)
1515+
elif proxy_src["urlpath"] is not None:
1516+
src = blosc2.C2Array(proxy_src["urlpath"][0], proxy_src["urlpath"][1], proxy_src["urlpath"][2])
1517+
return blosc2.Proxy(src, _cache=res)
1518+
elif not proxy_src["caterva2_env"]:
1519+
raise RuntimeError("Could not find the source when opening a Proxy")
1520+
1521+
if isinstance(res, blosc2.NDArray) and "LazyArray" in res.schunk.meta:
1522+
return blosc2._open_lazyarray(res)
1523+
else:
1524+
return res
1525+
1526+
14731527
def open(
14741528
urlpath: str | pathlib.Path | blosc2.URLPath, mode: str = "a", offset: int = 0, **kwargs: dict
1475-
) -> blosc2.SChunk | blosc2.NDArray | blosc2.C2Array | blosc2.LazyArray | blosc2.Proxy:
1476-
"""Open a persistent :ref:`SChunk`, :ref:`NDArray`, a remote :ref:`C2Array`
1477-
or a :ref:`Proxy`
1529+
) -> (
1530+
blosc2.SChunk
1531+
| blosc2.NDArray
1532+
| blosc2.C2Array
1533+
| blosc2.LazyArray
1534+
| blosc2.Proxy
1535+
| blosc2.DictStore
1536+
| blosc2.TreeStore
1537+
| blosc2.EmbedStore
1538+
):
1539+
"""Open a persistent :ref:`SChunk`, :ref:`NDArray`, a remote :ref:`C2Array`,
1540+
a :ref:`Proxy`, a :ref:`DictStore`, :ref:`EmbedStore`, or :ref:`TreeStore`.
14781541
14791542
See the `Notes` section for more info on opening `Proxy` objects.
14801543
@@ -1510,9 +1573,8 @@ def open(
15101573
15111574
Returns
15121575
-------
1513-
out: :ref:`SChunk`, :ref:`NDArray` or :ref:`C2Array`
1514-
The SChunk or NDArray (if there is a "b2nd" metalayer")
1515-
or the C2Array if :paramref:`urlpath` is a :ref:`blosc2.URLPath <URLPath>` instance.
1576+
out: :ref:`SChunk`, :ref:`NDArray`, :ref:`C2Array`, :ref:`DictStore`, :ref:`EmbedStore`, or :ref:`TreeStore`
1577+
The object found in the path.
15161578
15171579
Notes
15181580
-----
@@ -1577,34 +1639,15 @@ def open(
15771639

15781640
if isinstance(urlpath, pathlib.PurePath):
15791641
urlpath = str(urlpath)
1642+
1643+
special = _open_special_store(urlpath, mode, offset, **kwargs)
1644+
if special is not None:
1645+
return special
1646+
15801647
if not os.path.exists(urlpath):
15811648
raise FileNotFoundError(f"No such file or directory: {urlpath}")
15821649

1583-
dparams = kwargs.get("dparams")
1584-
if dparams is None:
1585-
# Use multiple threads for decompression by default, unless we are in WASM
1586-
# (does not support threads). The only drawback for using multiple threads
1587-
# is that access time will be slower because of the overhead of spawning threads
1588-
# (but could be fixed in the future with more intelligent thread pools).
1589-
dparams = (
1590-
blosc2.DParams(nthreads=blosc2.nthreads) if not blosc2.IS_WASM else blosc2.DParams(nthreads=1)
1591-
)
1592-
kwargs["dparams"] = dparams
1650+
_set_default_dparams(kwargs)
15931651
res = blosc2_ext.open(urlpath, mode, offset, **kwargs)
15941652

1595-
meta = getattr(res, "schunk", res).meta
1596-
if "proxy-source" in meta:
1597-
proxy_src = meta["proxy-source"]
1598-
if proxy_src["local_abspath"] is not None:
1599-
src = blosc2.open(proxy_src["local_abspath"])
1600-
return blosc2.Proxy(src, _cache=res)
1601-
elif proxy_src["urlpath"] is not None:
1602-
src = blosc2.C2Array(proxy_src["urlpath"][0], proxy_src["urlpath"][1], proxy_src["urlpath"][2])
1603-
return blosc2.Proxy(src, _cache=res)
1604-
elif not proxy_src["caterva2_env"]:
1605-
raise RuntimeError("Could not find the source when opening a Proxy")
1606-
1607-
if isinstance(res, blosc2.NDArray) and "LazyArray" in res.schunk.meta:
1608-
return blosc2._open_lazyarray(res)
1609-
else:
1610-
return res
1653+
return _process_opened_object(res)

tests/test_dict_store.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,3 +436,16 @@ def test_get_with_different_types():
436436
finally:
437437
if os.path.exists(path):
438438
os.remove(path)
439+
440+
441+
def test_open_context_manager(populated_dict_store):
442+
"""Test opening via blosc2.open as a context manager."""
443+
dstore_fixture, path = populated_dict_store
444+
# Close the fixture store to ensure data is written to disk
445+
dstore_fixture.close()
446+
447+
# Test opening via blosc2.open as a context manager
448+
with blosc2.open(path, mode="r") as dstore:
449+
assert isinstance(dstore, DictStore)
450+
assert "/node1" in dstore
451+
assert np.array_equal(dstore["/node1"][:], np.array([1, 2, 3]))

0 commit comments

Comments
 (0)