@@ -1868,8 +1868,10 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock,
18681868 cdef float * buf
18691869 cdef void * src
18701870 cdef int32_t chunk_nbytes, chunk_cbytes, block_nbytes
1871- cdef int start
1871+ cdef int start, blocknitems, expected_blocknitems
1872+ cdef int32_t input_typesize
18721873 cdef blosc2_context* dctx
1874+ expected_blocknitems = - 1
18731875 for i in range (udata.ninputs):
18741876 ndarr = udata.inputs[i]
18751877 input_buffers[i] = malloc(ndarr.sc.blocksize)
@@ -1883,7 +1885,13 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock,
18831885 rc = blosc2_cbuffer_sizes(src, & chunk_nbytes, & chunk_cbytes, & block_nbytes)
18841886 if rc < 0 :
18851887 raise ValueError (" miniexpr: error getting cbuffer sizes" )
1886- start = nblock * ndarr.blocknitems
1888+ input_typesize = ndarr.sc.typesize
1889+ blocknitems = block_nbytes // input_typesize
1890+ if expected_blocknitems == - 1 :
1891+ expected_blocknitems = blocknitems
1892+ elif blocknitems != expected_blocknitems:
1893+ raise ValueError (" miniexpr: inconsistent block element counts across inputs" )
1894+ start = nblock * blocknitems
18871895 # A way to check for top speed
18881896 if False :
18891897 # Unsafe, but it works for special arrays (e.g. blosc2.ones), and can be fast
@@ -1892,7 +1900,12 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock,
18921900 # This can add a significant overhead, but it is needed for thread safety.
18931901 # Perhaps one can create a specific (serial) context just for blosc2_getitem_ctx?
18941902 dctx = blosc2_create_dctx(BLOSC2_DPARAMS_DEFAULTS)
1895- rc = blosc2_getitem_ctx(dctx, src, chunk_cbytes, start, ndarr.blocknitems,
1903+ if nchunk * ndarr.chunknitems + start + blocknitems > ndarr.nitems:
1904+ blocknitems = ndarr.nitems - (nchunk * ndarr.chunknitems + start)
1905+ if blocknitems <= 0 :
1906+ # Should never happen, but anyway
1907+ continue
1908+ rc = blosc2_getitem_ctx(dctx, src, chunk_cbytes, start, blocknitems,
18961909 input_buffers[i], block_nbytes)
18971910 blosc2_free_ctx(dctx)
18981911 if rc < 0 :
@@ -1907,12 +1920,12 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock,
19071920 # Call thread-safe miniexpr C API
19081921 if udata.aux_reduc_ptr == NULL :
19091922 rc = me_eval(miniexpr_handle, < const void ** > input_buffers, udata.ninputs,
1910- < void * > params_output, ndarr. blocknitems)
1923+ < void * > params_output, blocknitems)
19111924 else :
19121925 # Reduction operation
19131926 offset_bytes = < uintptr_t> typesize * (nchunk * nblocks_per_chunk + nblock)
19141927 aux_reduc_ptr = < void * > (< uintptr_t> udata.aux_reduc_ptr + offset_bytes)
1915- rc = me_eval(miniexpr_handle, < const void ** > input_buffers, udata.ninputs, aux_reduc_ptr, ndarr. blocknitems)
1928+ rc = me_eval(miniexpr_handle, < const void ** > input_buffers, udata.ninputs, aux_reduc_ptr, blocknitems)
19161929 # The output buffer is cleared in the prefilter function
19171930 # memset(<void *>params_output, 0, udata.array.sc.blocksize) # clear output buffer
19181931 if rc != 0 :
0 commit comments