Skip to content

Commit 9c177bd

Browse files
committed
Adapt max_blocksize depending on the clevel
1 parent d21ed20 commit 9c177bd

5 files changed

Lines changed: 47 additions & 8 deletions

File tree

bench/batch_store.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def build_parser() -> argparse.ArgumentParser:
5151
)
5252
parser.add_argument("--codec", type=str, default="ZSTD", choices=[codec.name for codec in blosc2.Codec])
5353
parser.add_argument("--clevel", type=int, default=5)
54-
parser.add_argument("--use-dict", action="store_true", help="Enable dictionaries for ZSTD/LZ4 codecs.")
54+
parser.add_argument("--use-dict", action="store_true", help="Enable dictionaries for ZSTD/LZ4/LZ4HC codecs.")
5555
parser.add_argument("--in-mem", action="store_true", help="Keep the BatchStore purely in memory.")
5656
return parser
5757

@@ -65,7 +65,7 @@ def build_store(codec: blosc2.Codec, clevel: int, use_dict: bool, in_mem: bool)
6565
cparams={
6666
"codec": codec,
6767
"clevel": clevel,
68-
"use_dict": use_dict and codec in (blosc2.Codec.ZSTD, blosc2.Codec.LZ4),
68+
"use_dict": use_dict and codec in (blosc2.Codec.ZSTD, blosc2.Codec.LZ4, blosc2.Codec.LZ4HC),
6969
},
7070
)
7171
for batch_index in range(NBATCHES):
@@ -77,7 +77,7 @@ def build_store(codec: blosc2.Codec, clevel: int, use_dict: bool, in_mem: bool)
7777
cparams = {
7878
"codec": codec,
7979
"clevel": clevel,
80-
"use_dict": use_dict and codec in (blosc2.Codec.ZSTD, blosc2.Codec.LZ4),
80+
"use_dict": use_dict and codec in (blosc2.Codec.ZSTD, blosc2.Codec.LZ4, blosc2.Codec.LZ4HC),
8181
}
8282
with blosc2.BatchStore(storage=storage, max_blocksize=BLOCKSIZE_MAX, cparams=cparams) as store:
8383
for batch_index in range(NBATCHES):
@@ -107,7 +107,7 @@ def main() -> None:
107107
parser = build_parser()
108108
args = parser.parse_args()
109109
codec = blosc2.Codec[args.codec]
110-
use_dict = args.use_dict and codec in (blosc2.Codec.ZSTD, blosc2.Codec.LZ4)
110+
use_dict = args.use_dict and codec in (blosc2.Codec.ZSTD, blosc2.Codec.LZ4, blosc2.Codec.LZ4HC)
111111

112112
mode_label = "in-memory" if args.in_mem else "persistent"
113113
article = "an" if args.in_mem else "a"

src/blosc2/batch_store.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -295,13 +295,21 @@ def _persist_max_blocksize(self) -> None:
295295
def _guess_blocksize(self, payload_sizes: list[int]) -> int:
296296
if not payload_sizes:
297297
raise ValueError("BatchStore entries cannot be empty")
298-
l2_cache_size = blosc2.cpu_info.get("l2_cache_size")
299-
if not isinstance(l2_cache_size, int) or l2_cache_size <= 0:
298+
clevel = self.cparams.clevel
299+
if clevel == 9:
300+
return len(payload_sizes)
301+
if 0 < clevel < 6:
302+
budget = blosc2.cpu_info.get("l1_data_cache_size")
303+
elif 6 <= clevel < 9:
304+
budget = blosc2.cpu_info.get("l2_cache_size")
305+
else:
306+
return len(payload_sizes)
307+
if not isinstance(budget, int) or budget <= 0:
300308
return len(payload_sizes)
301309
total = 0
302310
count = 0
303311
for payload_size in payload_sizes:
304-
if count > 0 and total + payload_size > l2_cache_size:
312+
if count > 0 and total + payload_size > budget:
305313
break
306314
total += payload_size
307315
count += 1

src/blosc2/storage.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@ class CParams:
4646
(maximum compression). Default is 1.
4747
use_dict: bool
4848
Whether to use dictionaries when compressing
49-
(only for :py:obj:`blosc2.Codec.ZSTD <Codec>`). Default is `False`.
49+
(supported for :py:obj:`blosc2.Codec.ZSTD <Codec>`,
50+
:py:obj:`blosc2.Codec.LZ4 <Codec>`, and
51+
:py:obj:`blosc2.Codec.LZ4HC <Codec>`). Default is `False`.
5052
typesize: int
5153
The data type size, ranging from 1 to 255. Default is 8.
5254
nthreads: int

tests/test_batch_store.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,10 @@ def test_batchstore_respects_explicit_use_dict_and_non_zstd():
241241
assert barray.cparams.codec == blosc2.Codec.LZ4
242242
assert barray.cparams.use_dict is False
243243

244+
barray = blosc2.BatchStore(cparams={"codec": blosc2.Codec.LZ4HC, "clevel": 1, "use_dict": True})
245+
assert barray.cparams.codec == blosc2.Codec.LZ4HC
246+
assert barray.cparams.use_dict is True
247+
244248
barray = blosc2.BatchStore(cparams={"codec": blosc2.Codec.ZSTD, "clevel": 0})
245249
assert barray.cparams.codec == blosc2.Codec.ZSTD
246250
assert barray.cparams.use_dict is False
@@ -252,6 +256,27 @@ def test_batchstore_respects_explicit_use_dict_and_non_zstd():
252256
assert barray.cparams.use_dict is False
253257

254258

259+
def test_batchstore_guess_max_blocksize_uses_l1_for_low_clevel(monkeypatch):
260+
monkeypatch.setitem(blosc2.cpu_info, "l1_data_cache_size", 100)
261+
monkeypatch.setitem(blosc2.cpu_info, "l2_cache_size", 1000)
262+
barray = blosc2.BatchStore(cparams={"clevel": 5})
263+
assert barray._guess_blocksize([30, 30, 30, 30]) == 3
264+
265+
266+
def test_batchstore_guess_max_blocksize_uses_l2_for_mid_clevel(monkeypatch):
267+
monkeypatch.setitem(blosc2.cpu_info, "l1_data_cache_size", 100)
268+
monkeypatch.setitem(blosc2.cpu_info, "l2_cache_size", 150)
269+
barray = blosc2.BatchStore(cparams={"clevel": 6})
270+
assert barray._guess_blocksize([60, 60, 60, 60]) == 2
271+
272+
273+
def test_batchstore_guess_max_blocksize_uses_full_batch_for_clevel_9(monkeypatch):
274+
monkeypatch.setitem(blosc2.cpu_info, "l1_data_cache_size", 1)
275+
monkeypatch.setitem(blosc2.cpu_info, "l2_cache_size", 1)
276+
barray = blosc2.BatchStore(cparams={"clevel": 9})
277+
assert barray._guess_blocksize([100, 100, 100, 100]) == 4
278+
279+
255280
def test_vlcompress_small_blocks_roundtrip():
256281
values = [
257282
{"value": None},

tests/test_vlarray.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,10 @@ def test_vlarray_respects_explicit_use_dict_and_non_zstd():
155155
assert vlarray.cparams.codec == blosc2.Codec.LZ4
156156
assert vlarray.cparams.use_dict is False
157157

158+
vlarray = blosc2.VLArray(cparams={"codec": blosc2.Codec.LZ4HC, "clevel": 1, "use_dict": True})
159+
assert vlarray.cparams.codec == blosc2.Codec.LZ4HC
160+
assert vlarray.cparams.use_dict is True
161+
158162
vlarray = blosc2.VLArray(cparams={"codec": blosc2.Codec.ZSTD, "clevel": 0})
159163
assert vlarray.cparams.codec == blosc2.Codec.ZSTD
160164
assert vlarray.cparams.use_dict is False

0 commit comments

Comments
 (0)