Skip to content

Commit 1c0bc11

Browse files
committed
Handle large data, structured dtypes. Fixes #364.
1 parent b392ffc commit 1c0bc11

3 files changed

Lines changed: 58 additions & 10 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ else()
5050
include(FetchContent)
5151
FetchContent_Declare(blosc2
5252
GIT_REPOSITORY https://github.com/Blosc/c-blosc2
53-
GIT_TAG 5fc02a0009c7eac0f30a0a6e48ae9b4c08c8a09c
53+
GIT_TAG 34db770e436aa4ceaa9b9110948f8dac1f1f443d
5454
)
5555
FetchContent_MakeAvailable(blosc2)
5656
include_directories("${blosc2_SOURCE_DIR}/include")

src/blosc2/blosc2_ext.pyx

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -508,12 +508,12 @@ cdef extern from "b2nd.h":
508508
int b2nd_from_schunk(blosc2_schunk *schunk, b2nd_array_t **array)
509509

510510
void blosc2_unidim_to_multidim(uint8_t ndim, int64_t *shape, int64_t i, int64_t *index)
511-
int b2nd_copy_buffer(int8_t ndim,
512-
uint8_t itemsize,
513-
const void *src, const int64_t *src_pad_shape,
514-
const int64_t *src_start, const int64_t *src_stop,
515-
void *dst, const int64_t *dst_pad_shape,
516-
const int64_t *dst_start);
511+
int b2nd_copy_buffer2(int8_t ndim,
512+
int32_t itemsize,
513+
const void *src, const int64_t *src_pad_shape,
514+
const int64_t *src_start, const int64_t *src_stop,
515+
void *dst, const int64_t *dst_pad_shape,
516+
const int64_t *dst_start);
517517

518518

519519
ctypedef struct user_filters_udata:
@@ -1739,9 +1739,9 @@ cdef int aux_udf(udf_udata *udata, int64_t nchunk, int32_t nblock,
17391739
blockshape_int64[i] = udata.array.blockshape[i]
17401740
buf = <Py_buffer *> malloc(sizeof(Py_buffer))
17411741
PyObject_GetBuffer(output, buf, PyBUF_SIMPLE)
1742-
rc = b2nd_copy_buffer(udata.array.ndim, typesize,
1743-
buf.buf, slice_shape, start, slice_shape,
1744-
params_output, blockshape_int64, start)
1742+
rc = b2nd_copy_buffer2(udata.array.ndim, typesize,
1743+
buf.buf, slice_shape, start, slice_shape,
1744+
params_output, blockshape_int64, start)
17451745
PyBuffer_Release(buf)
17461746
_check_rc(rc, "Could not copy the result into the buffer")
17471747

tests/ndarray/test_full.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,51 @@ def test_ones():
123123
assert isinstance(a, blosc2.NDArray)
124124
b = np.ones(shape, dtype=np.float32)
125125
np.testing.assert_allclose(a[:], b)
126+
127+
128+
@pytest.mark.parametrize("asarray", [True, False])
129+
@pytest.mark.parametrize("typesize", [255, 256, 257, 261, 256 * 256])
130+
@pytest.mark.parametrize("shape", [(1,), (3,), (10,), (1024,)])
131+
def test_large_typesize(shape, typesize, asarray):
132+
dtype = np.dtype([("f_001", "<i1", (typesize,))])
133+
a = np.full(shape, 3, dtype=dtype)
134+
if asarray:
135+
b = blosc2.asarray(a)
136+
else:
137+
b = blosc2.full(shape, 3, dtype=dtype)
138+
assert np.array_equal(b[0], a[0])
139+
140+
141+
def test_complex_datatype():
142+
dtype = np.dtype(
143+
[
144+
("f_001", "<f4", (164,)),
145+
("f_002", "<f4", (11,)),
146+
("f_003", "<f4", (154,)),
147+
("f_004", "<f4", (870,)),
148+
("f_005", "<f4", (1062,)),
149+
("f_006", "<f4", (22,)),
150+
("f_007", "<f4", (44,)),
151+
("f_008", "<f4", (512,)),
152+
("f_009", "<f4", (64, 77)),
153+
("f_010", "<f4", (97, 489)),
154+
("f_011", "<f4", (75, 255)),
155+
("f_012", "<f4", (8, 293)),
156+
("f_013", "<f4", (230, 591)),
157+
("f_014", "<f4", (101, 193)),
158+
("f_015", "<f4", (12, 48)),
159+
("f_016", "<f4", (90, 699)),
160+
("f_017", "<f4", (125, 65)),
161+
("f_018", "<f4", (132, 81)),
162+
("f_019", "<f4", (27, 363)),
163+
("f_020", "S1000"),
164+
("f_021", "S1000"),
165+
]
166+
)
167+
a = np.zeros((256,), dtype=dtype)
168+
cparams = blosc2.CParams(codec=blosc2.Codec.BLOSCLZ, clevel=1, nthreads=3)
169+
b = blosc2.asarray(a, cparams=cparams, urlpath="b.b2nd", mode="w")
170+
# Iterate over the fields of the structured array and check that the data is the same
171+
for field in dtype.fields:
172+
assert np.array_equal(b[field], a[field])
173+
blosc2.remove_urlpath("b.b2nd")

0 commit comments

Comments
 (0)