Skip to content

Commit eb9d40a

Browse files
committed
Improvements in miniexpr prefilter for more broader use cases
1 parent 5b7afed commit eb9d40a

2 files changed

Lines changed: 27 additions & 8 deletions

File tree

src/blosc2/blosc2_ext.pyx

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1868,8 +1868,10 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock,
18681868
cdef float *buf
18691869
cdef void* src
18701870
cdef int32_t chunk_nbytes, chunk_cbytes, block_nbytes
1871-
cdef int start
1871+
cdef int start, blocknitems, expected_blocknitems
1872+
cdef int32_t input_typesize
18721873
cdef blosc2_context* dctx
1874+
expected_blocknitems = -1
18731875
for i in range(udata.ninputs):
18741876
ndarr = udata.inputs[i]
18751877
input_buffers[i] = malloc(ndarr.sc.blocksize)
@@ -1883,7 +1885,13 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock,
18831885
rc = blosc2_cbuffer_sizes(src, &chunk_nbytes, &chunk_cbytes, &block_nbytes)
18841886
if rc < 0:
18851887
raise ValueError("miniexpr: error getting cbuffer sizes")
1886-
start = nblock * ndarr.blocknitems
1888+
input_typesize = ndarr.sc.typesize
1889+
blocknitems = block_nbytes // input_typesize
1890+
if expected_blocknitems == -1:
1891+
expected_blocknitems = blocknitems
1892+
elif blocknitems != expected_blocknitems:
1893+
raise ValueError("miniexpr: inconsistent block element counts across inputs")
1894+
start = nblock * blocknitems
18871895
# A way to check for top speed
18881896
if False:
18891897
# Unsafe, but it works for special arrays (e.g. blosc2.ones), and can be fast
@@ -1892,7 +1900,12 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock,
18921900
# This can add a significant overhead, but it is needed for thread safety.
18931901
# Perhaps one can create a specific (serial) context just for blosc2_getitem_ctx?
18941902
dctx = blosc2_create_dctx(BLOSC2_DPARAMS_DEFAULTS)
1895-
rc = blosc2_getitem_ctx(dctx, src, chunk_cbytes, start, ndarr.blocknitems,
1903+
if nchunk * ndarr.chunknitems + start + blocknitems > ndarr.nitems:
1904+
blocknitems = ndarr.nitems - (nchunk * ndarr.chunknitems + start)
1905+
if blocknitems <= 0:
1906+
# Should never happen, but anyway
1907+
continue
1908+
rc = blosc2_getitem_ctx(dctx, src, chunk_cbytes, start, blocknitems,
18961909
input_buffers[i], block_nbytes)
18971910
blosc2_free_ctx(dctx)
18981911
if rc < 0:
@@ -1907,12 +1920,12 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock,
19071920
# Call thread-safe miniexpr C API
19081921
if udata.aux_reduc_ptr == NULL:
19091922
rc = me_eval(miniexpr_handle, <const void**>input_buffers, udata.ninputs,
1910-
<void*>params_output, ndarr.blocknitems)
1923+
<void*>params_output, blocknitems)
19111924
else:
19121925
# Reduction operation
19131926
offset_bytes = <uintptr_t> typesize * (nchunk * nblocks_per_chunk + nblock)
19141927
aux_reduc_ptr = <void *> (<uintptr_t> udata.aux_reduc_ptr + offset_bytes)
1915-
rc = me_eval(miniexpr_handle, <const void**>input_buffers, udata.ninputs, aux_reduc_ptr, ndarr.blocknitems)
1928+
rc = me_eval(miniexpr_handle, <const void**>input_buffers, udata.ninputs, aux_reduc_ptr, blocknitems)
19161929
# The output buffer is cleared in the prefilter function
19171930
# memset(<void *>params_output, 0, udata.array.sc.blocksize) # clear output buffer
19181931
if rc != 0:

src/blosc2/lazyexpr.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1299,6 +1299,11 @@ def fast_eval( # noqa: C901
12991299
if not blosc2.are_partitions_behaved(op.shape, op.chunks, op.blocks):
13001300
use_miniexpr = False
13011301
break
1302+
# Ensure blocks fit exactly in chunks
1303+
blocks_fit = builtins.all(c % b == 0 for c, b in zip(op.chunks, op.blocks, strict=True))
1304+
if not blocks_fit:
1305+
use_miniexpr = False
1306+
break
13021307

13031308
if use_miniexpr:
13041309
cparams = kwargs.pop("cparams", blosc2.CParams())
@@ -1997,8 +2002,9 @@ def reduce_slices( # noqa: C901
19972002
# Only behaved partitions are supported in miniexpr reductions
19982003
if use_miniexpr:
19992004
for op in operands.values():
2000-
# Check that partitions are well-behaved (no padding)
2001-
if not blosc2.are_partitions_behaved(op.shape, op.chunks, op.blocks):
2005+
# Check that chunksize is multiple of blocksize and blocks fit exactly in chunks
2006+
blocks_fit = builtins.all(c % b == 0 for c, b in zip(op.chunks, op.blocks, strict=True))
2007+
if not blocks_fit:
20022008
use_miniexpr = False
20032009
break
20042010

@@ -2011,7 +2017,7 @@ def reduce_slices( # noqa: C901
20112017
nblocks = res_eval.nbytes // res_eval.blocksize
20122018
aux_reduc = np.empty(nblocks, dtype=dtype)
20132019
try:
2014-
# print("expr->miniexpr:", expression, reduce_op)
2020+
print("expr->miniexpr:", expression, reduce_op)
20152021
expression = f"{reduce_op_str}({expression})"
20162022
res_eval._set_pref_expr(expression, operands, aux_reduc)
20172023
# Data won't even try to be compressed, so buffers can be unitialized and reused

0 commit comments

Comments
 (0)