Skip to content

Commit cb7dfeb

Browse files
feat: add support for .b2z, .b2d, .b2e files and update related tests
1 parent 2f545bf commit cb7dfeb

5 files changed

Lines changed: 107 additions & 39 deletions

File tree

src/blosc2/dict_store.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,15 +153,15 @@ def _init_read_mode(self, dparams: blosc2.DParams | None = None):
153153
if "embed.b2e" not in self.offsets:
154154
raise FileNotFoundError("Embed file embed.b2e not found in store.")
155155
estore_offset = self.offsets["embed.b2e"]["offset"]
156-
schunk = blosc2.open(self.b2z_path, mode="r", offset=estore_offset, dparams=dparams)
156+
schunk = blosc2.blosc2_ext.open(self.b2z_path, mode="r", offset=estore_offset, dparams=dparams)
157157
for filepath in self.offsets:
158158
if filepath.endswith((".b2nd", ".b2f")):
159159
key = "/" + filepath[: -5 if filepath.endswith(".b2nd") else -4]
160160
self.map_tree[key] = filepath
161161
else: # .b2d
162162
if not os.path.isdir(self.localpath):
163163
raise FileNotFoundError(f"Directory {self.localpath} does not exist for reading.")
164-
schunk = blosc2.open(self.estore_path, mode="r", dparams=dparams)
164+
schunk = blosc2.blosc2_ext.open(self.estore_path, mode="r", offset=0, dparams=dparams)
165165
self._update_map_tree()
166166

167167
self._estore = EmbedStore(_from_schunk=schunk)
@@ -267,7 +267,7 @@ def __getitem__(self, key: str) -> blosc2.NDArray | SChunk | C2Array:
267267
filepath = self.map_tree[key]
268268
if filepath in self.offsets:
269269
offset = self.offsets[filepath]["offset"]
270-
return blosc2.open(self.b2z_path, mode="r", offset=offset, dparams=self.dparams)
270+
return blosc2.blosc2_ext.open(self.b2z_path, mode="r", offset=offset, dparams=self.dparams)
271271
else:
272272
urlpath = os.path.join(self.working_dir, filepath)
273273
if os.path.exists(urlpath):
@@ -331,7 +331,9 @@ def values(self) -> Iterator[blosc2.NDArray | SChunk | C2Array]:
331331
if self.is_zip_store:
332332
if filepath in self.offsets:
333333
offset = self.offsets[filepath]["offset"]
334-
yield blosc2.open(self.b2z_path, mode="r", offset=offset, dparams=self.dparams)
334+
yield blosc2.blosc2_ext.open(
335+
self.b2z_path, mode="r", offset=offset, dparams=self.dparams
336+
)
335337
else:
336338
urlpath = os.path.join(self.working_dir, filepath)
337339
yield blosc2.open(urlpath, mode="r" if self.mode == "r" else "a", dparams=self.dparams)
@@ -350,7 +352,7 @@ def items(self) -> Iterator[tuple[str, blosc2.NDArray | SChunk | C2Array]]:
350352
if self.is_zip_store:
351353
if filepath in self.offsets:
352354
offset = self.offsets[filepath]["offset"]
353-
yield key, blosc2.open(self.b2z_path, mode="r", offset=offset)
355+
yield key, blosc2.blosc2_ext.open(self.b2z_path, mode="r", offset=offset)
354356
else:
355357
urlpath = os.path.join(self.working_dir, filepath)
356358
yield key, blosc2.open(urlpath, mode="r" if self.mode == "r" else "a")

src/blosc2/embed_store.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def __init__(
108108
self.storage = storage
109109

110110
if mode in ("r", "a") and urlpath:
111-
self._store = blosc2.open(urlpath, mode=mode)
111+
self._store = blosc2.blosc2_ext.open(urlpath, mode=mode, offset=0)
112112
self._load_metadata()
113113
return
114114

@@ -254,6 +254,15 @@ def to_cframe(self) -> bytes:
254254
"""Serialize embed store to CFrame format."""
255255
return self._store.to_cframe()
256256

257+
def __enter__(self):
258+
"""Context manager enter."""
259+
return self
260+
261+
def __exit__(self, exc_type, exc_val, exc_tb):
262+
"""Context manager exit."""
263+
# No need to close anything as SChunk/NDArray handles persistence automatically
264+
return False
265+
257266

258267
def estore_from_cframe(cframe: bytes, copy: bool = False) -> EmbedStore:
259268
"""

src/blosc2/schunk.py

Lines changed: 60 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1470,11 +1470,59 @@ def __dealloc__(self):
14701470
super().__dealloc__()
14711471

14721472

1473+
def _open_special_store(urlpath, mode, offset, **kwargs):
1474+
if urlpath.endswith(".b2z") or urlpath.endswith(".b2d"):
1475+
if offset != 0:
1476+
raise ValueError("Offset must be 0 for DictStore")
1477+
from blosc2.dict_store import DictStore
1478+
1479+
return DictStore(urlpath, mode=mode, **kwargs)
1480+
elif urlpath.endswith(".b2e"):
1481+
if offset != 0:
1482+
raise ValueError("Offset must be 0 for EmbedStore")
1483+
from blosc2.embed_store import EmbedStore
1484+
1485+
return EmbedStore(urlpath, mode=mode, **kwargs)
1486+
return None
1487+
1488+
1489+
def _set_default_dparams(kwargs):
1490+
dparams = kwargs.get("dparams")
1491+
if dparams is None:
1492+
# Use multiple threads for decompression by default, unless we are in WASM
1493+
# (does not support threads). The only drawback for using multiple threads
1494+
# is that access time will be slower because of the overhead of spawning threads
1495+
# (but could be fixed in the future with more intelligent thread pools).
1496+
dparams = (
1497+
blosc2.DParams(nthreads=blosc2.nthreads) if not blosc2.IS_WASM else blosc2.DParams(nthreads=1)
1498+
)
1499+
kwargs["dparams"] = dparams
1500+
1501+
1502+
def _process_opened_object(res):
1503+
meta = getattr(res, "schunk", res).meta
1504+
if "proxy-source" in meta:
1505+
proxy_src = meta["proxy-source"]
1506+
if proxy_src["local_abspath"] is not None:
1507+
src = blosc2.open(proxy_src["local_abspath"])
1508+
return blosc2.Proxy(src, _cache=res)
1509+
elif proxy_src["urlpath"] is not None:
1510+
src = blosc2.C2Array(proxy_src["urlpath"][0], proxy_src["urlpath"][1], proxy_src["urlpath"][2])
1511+
return blosc2.Proxy(src, _cache=res)
1512+
elif not proxy_src["caterva2_env"]:
1513+
raise RuntimeError("Could not find the source when opening a Proxy")
1514+
1515+
if isinstance(res, blosc2.NDArray) and "LazyArray" in res.schunk.meta:
1516+
return blosc2._open_lazyarray(res)
1517+
else:
1518+
return res
1519+
1520+
14731521
def open(
14741522
urlpath: str | pathlib.Path | blosc2.URLPath, mode: str = "a", offset: int = 0, **kwargs: dict
1475-
) -> blosc2.SChunk | blosc2.NDArray | blosc2.C2Array | blosc2.LazyArray | blosc2.Proxy:
1476-
"""Open a persistent :ref:`SChunk`, :ref:`NDArray`, a remote :ref:`C2Array`
1477-
or a :ref:`Proxy`
1523+
) -> blosc2.SChunk | blosc2.NDArray | blosc2.C2Array | blosc2.LazyArray | blosc2.Proxy | Any:
1524+
"""Open a persistent :ref:`SChunk`, :ref:`NDArray`, a remote :ref:`C2Array`,
1525+
a :ref:`Proxy`, a :ref:`DictStore` or an :ref:`EmbedStore`.
14781526
14791527
See the `Notes` section for more info on opening `Proxy` objects.
14801528
@@ -1510,9 +1558,8 @@ def open(
15101558
15111559
Returns
15121560
-------
1513-
out: :ref:`SChunk`, :ref:`NDArray` or :ref:`C2Array`
1514-
The SChunk or NDArray (if there is a "b2nd" metalayer")
1515-
or the C2Array if :paramref:`urlpath` is a :ref:`blosc2.URLPath <URLPath>` instance.
1561+
out: :ref:`SChunk`, :ref:`NDArray`, :ref:`C2Array`, :ref:`DictStore` or :ref:`EmbedStore`
1562+
The object found in the path.
15161563
15171564
Notes
15181565
-----
@@ -1577,34 +1624,15 @@ def open(
15771624

15781625
if isinstance(urlpath, pathlib.PurePath):
15791626
urlpath = str(urlpath)
1627+
1628+
special = _open_special_store(urlpath, mode, offset, **kwargs)
1629+
if special is not None:
1630+
return special
1631+
15801632
if not os.path.exists(urlpath):
15811633
raise FileNotFoundError(f"No such file or directory: {urlpath}")
15821634

1583-
dparams = kwargs.get("dparams")
1584-
if dparams is None:
1585-
# Use multiple threads for decompression by default, unless we are in WASM
1586-
# (does not support threads). The only drawback for using multiple threads
1587-
# is that access time will be slower because of the overhead of spawning threads
1588-
# (but could be fixed in the future with more intelligent thread pools).
1589-
dparams = (
1590-
blosc2.DParams(nthreads=blosc2.nthreads) if not blosc2.IS_WASM else blosc2.DParams(nthreads=1)
1591-
)
1592-
kwargs["dparams"] = dparams
1635+
_set_default_dparams(kwargs)
15931636
res = blosc2_ext.open(urlpath, mode, offset, **kwargs)
15941637

1595-
meta = getattr(res, "schunk", res).meta
1596-
if "proxy-source" in meta:
1597-
proxy_src = meta["proxy-source"]
1598-
if proxy_src["local_abspath"] is not None:
1599-
src = blosc2.open(proxy_src["local_abspath"])
1600-
return blosc2.Proxy(src, _cache=res)
1601-
elif proxy_src["urlpath"] is not None:
1602-
src = blosc2.C2Array(proxy_src["urlpath"][0], proxy_src["urlpath"][1], proxy_src["urlpath"][2])
1603-
return blosc2.Proxy(src, _cache=res)
1604-
elif not proxy_src["caterva2_env"]:
1605-
raise RuntimeError("Could not find the source when opening a Proxy")
1606-
1607-
if isinstance(res, blosc2.NDArray) and "LazyArray" in res.schunk.meta:
1608-
return blosc2._open_lazyarray(res)
1609-
else:
1610-
return res
1638+
return _process_opened_object(res)

tests/test_dict_store.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,3 +436,16 @@ def test_get_with_different_types():
436436
finally:
437437
if os.path.exists(path):
438438
os.remove(path)
439+
440+
441+
def test_open_context_manager(populated_dict_store):
442+
"""Test opening via blosc2.open as a context manager."""
443+
dstore_fixture, path = populated_dict_store
444+
# Close the fixture store to ensure data is written to disk
445+
dstore_fixture.close()
446+
447+
# Test opening via blosc2.open as a context manager
448+
with blosc2.open(path, mode="r") as dstore:
449+
assert isinstance(dstore, DictStore)
450+
assert "/node1" in dstore
451+
assert np.array_equal(dstore["/node1"][:], np.array([1, 2, 3]))

tests/test_embed_store.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def cleanup_files():
1919
"test_estore.b2e",
2020
"external_node3.b2nd",
2121
]
22-
yield
22+
yield files
2323
for f in files:
2424
if os.path.exists(f):
2525
os.remove(f)
@@ -201,3 +201,19 @@ def test_store_and_retrieve_schunk():
201201
assert value.nbytes == len(data)
202202
assert value[:] == data
203203
assert value.vlmeta["description"] == vlmeta
204+
205+
206+
def test_open_context_manager(cleanup_files):
207+
"""Test opening via blosc2.open as a context manager."""
208+
path = "test_embed_open.b2e"
209+
cleanup_files.append(path)
210+
211+
# Create an EmbedStore
212+
estore = blosc2.EmbedStore(path, mode="w")
213+
estore["/node1"] = np.arange(10)
214+
215+
# Test opening via blosc2.open as a context manager
216+
with blosc2.open(path, mode="r") as estore_read:
217+
assert isinstance(estore_read, blosc2.EmbedStore)
218+
assert "/node1" in estore_read
219+
assert np.array_equal(estore_read["/node1"][:], np.arange(10))

0 commit comments

Comments
 (0)