@@ -51,6 +51,18 @@ def wrapper(*args, **kwargs):
5151# and legacy Pydantic models during the transition period.
5252RowT = TypeVar ("RowT" )
5353
54+ # Arrays larger than this threshold use blosc2.arange instead of np.arange to
55+ # avoid large transient allocations when mapping logical to physical row positions.
56+ _BLOSC2_ARANGE_THRESHOLD = 1_000_000
57+
58+
59+ def _arange (start , stop = None , step = 1 ) -> blosc2 .NDArray | np .ndarray :
60+ """Return a range array, using blosc2 for large n to save memory."""
61+ if stop is None :
62+ start , stop = 0 , start
63+ n = len (range (start , stop , step ))
64+ return blosc2 .arange (start , stop , step ) if n >= _BLOSC2_ARANGE_THRESHOLD else np .arange (start , stop , step )
65+
5466
5567# ---------------------------------------------------------------------------
5668# Legacy Pydantic-compat helpers
@@ -268,15 +280,15 @@ def __getitem__(self, key: int | slice | list | np.ndarray):
268280 return self ._raw_col [int (pos_true )]
269281
270282 elif isinstance (key , slice ):
271- real_pos = blosc2 .where (self ._valid_rows , np . arange (len (self ._valid_rows ))).compute ()
283+ real_pos = blosc2 .where (self ._valid_rows , _arange (len (self ._valid_rows ))).compute ()
272284 start , stop , step = key .indices (len (real_pos ))
273285 mask = blosc2 .zeros (len (self ._table ._valid_rows ), dtype = np .bool_ )
274286 if step == 1 :
275287 phys_start = real_pos [start ]
276288 phys_stop = real_pos [stop - 1 ]
277289 mask [phys_start : phys_stop + 1 ] = True
278290 else :
279- lindices = np . arange (start , stop , step )
291+ lindices = _arange (start , stop , step )
280292 phys_indices = real_pos [lindices ]
281293 mask [phys_indices [:]] = True
282294 return Column (self ._table , self ._col_name , mask = mask )
@@ -294,7 +306,7 @@ def __getitem__(self, key: int | slice | list | np.ndarray):
294306 return self ._raw_col [phys_indices ]
295307
296308 elif isinstance (key , (list , tuple , np .ndarray )):
297- real_pos = blosc2 .where (self ._valid_rows , np . arange (len (self ._valid_rows ))).compute ()
309+ real_pos = blosc2 .where (self ._valid_rows , _arange (len (self ._valid_rows ))).compute ()
298310 phys_indices = np .array ([real_pos [i ] for i in key ], dtype = np .int64 )
299311 return self ._raw_col [phys_indices ]
300312
@@ -326,7 +338,7 @@ def __setitem__(self, key: int | slice | list | np.ndarray, value):
326338 self ._raw_col [phys_indices ] = value
327339
328340 elif isinstance (key , (slice , list , tuple , np .ndarray )):
329- real_pos = blosc2 .where (self ._valid_rows , np . arange (len (self ._valid_rows ))).compute ()
341+ real_pos = blosc2 .where (self ._valid_rows , _arange (len (self ._valid_rows ))).compute ()
330342 if isinstance (key , slice ):
331343 lindices = range (* key .indices (len (real_pos )))
332344 phys_indices = np .array ([real_pos [i ] for i in lindices ], dtype = np .int64 )
0 commit comments