Skip to content

Commit fcb9efa

Browse files
committed
For large temp arange arrays, use blosc2.arange instead of np.arange
1 parent bc4d4ff commit fcb9efa

1 file changed

Lines changed: 16 additions & 4 deletions

File tree

src/blosc2/ctable.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,18 @@ def wrapper(*args, **kwargs):
5151
# and legacy Pydantic models during the transition period.
5252
RowT = TypeVar("RowT")
5353

54+
# Arrays larger than this threshold use blosc2.arange instead of np.arange to
55+
# avoid large transient allocations when mapping logical to physical row positions.
56+
_BLOSC2_ARANGE_THRESHOLD = 1_000_000
57+
58+
59+
def _arange(start, stop=None, step=1) -> blosc2.NDArray | np.ndarray:
60+
"""Return a range array, using blosc2 for large n to save memory."""
61+
if stop is None:
62+
start, stop = 0, start
63+
n = len(range(start, stop, step))
64+
return blosc2.arange(start, stop, step) if n >= _BLOSC2_ARANGE_THRESHOLD else np.arange(start, stop, step)
65+
5466

5567
# ---------------------------------------------------------------------------
5668
# Legacy Pydantic-compat helpers
@@ -268,15 +280,15 @@ def __getitem__(self, key: int | slice | list | np.ndarray):
268280
return self._raw_col[int(pos_true)]
269281

270282
elif isinstance(key, slice):
271-
real_pos = blosc2.where(self._valid_rows, np.arange(len(self._valid_rows))).compute()
283+
real_pos = blosc2.where(self._valid_rows, _arange(len(self._valid_rows))).compute()
272284
start, stop, step = key.indices(len(real_pos))
273285
mask = blosc2.zeros(len(self._table._valid_rows), dtype=np.bool_)
274286
if step == 1:
275287
phys_start = real_pos[start]
276288
phys_stop = real_pos[stop - 1]
277289
mask[phys_start : phys_stop + 1] = True
278290
else:
279-
lindices = np.arange(start, stop, step)
291+
lindices = _arange(start, stop, step)
280292
phys_indices = real_pos[lindices]
281293
mask[phys_indices[:]] = True
282294
return Column(self._table, self._col_name, mask=mask)
@@ -294,7 +306,7 @@ def __getitem__(self, key: int | slice | list | np.ndarray):
294306
return self._raw_col[phys_indices]
295307

296308
elif isinstance(key, (list, tuple, np.ndarray)):
297-
real_pos = blosc2.where(self._valid_rows, np.arange(len(self._valid_rows))).compute()
309+
real_pos = blosc2.where(self._valid_rows, _arange(len(self._valid_rows))).compute()
298310
phys_indices = np.array([real_pos[i] for i in key], dtype=np.int64)
299311
return self._raw_col[phys_indices]
300312

@@ -326,7 +338,7 @@ def __setitem__(self, key: int | slice | list | np.ndarray, value):
326338
self._raw_col[phys_indices] = value
327339

328340
elif isinstance(key, (slice, list, tuple, np.ndarray)):
329-
real_pos = blosc2.where(self._valid_rows, np.arange(len(self._valid_rows))).compute()
341+
real_pos = blosc2.where(self._valid_rows, _arange(len(self._valid_rows))).compute()
330342
if isinstance(key, slice):
331343
lindices = range(*key.indices(len(real_pos)))
332344
phys_indices = np.array([real_pos[i] for i in lindices], dtype=np.int64)

0 commit comments

Comments
 (0)