1818from blosc2 ._msgpack_utils import msgpack_packb , msgpack_unpackb
1919from blosc2 .info import InfoReporter , format_nbytes_info
2020
21- _BATCHSTORE_META = {"version" : 1 , "serializer" : "msgpack" }
21+ _BATCHSTORE_META = {"version" : 1 , "serializer" : "msgpack" , "max_blocksize" : None }
2222
2323
2424def _check_serialized_size (buffer : bytes ) -> None :
@@ -69,9 +69,9 @@ def __getitem__(self, index: int | slice) -> Any | list[Any]:
6969 items = self ._decode_items ()
7070 index = self ._normalize_index (index )
7171 return items [index ]
72- blocksize_max = self ._parent .blocksize_max
73- if blocksize_max is not None :
74- block_index , item_index = divmod (index , blocksize_max )
72+ max_blocksize = self ._parent .max_blocksize
73+ if max_blocksize is not None :
74+ block_index , item_index = divmod (index , max_blocksize )
7575 if block_index >= self ._nblocks :
7676 raise IndexError ("Batch index out of range" )
7777 block = self ._get_block (block_index )
@@ -158,6 +158,11 @@ def _attach_schunk(self, schunk: blosc2.SChunk) -> None:
158158 self .schunk = schunk
159159 self .mode = schunk .mode
160160 self .mmap_mode = getattr (schunk , "mmap_mode" , None )
161+ try :
162+ batchstore_meta = self .schunk .meta ["batchstore" ]
163+ except KeyError :
164+ batchstore_meta = {}
165+ self ._max_blocksize = batchstore_meta .get ("max_blocksize" , self ._max_blocksize )
161166 self ._validate_tag ()
162167
163168 def _maybe_open_existing (self , storage : blosc2 .Storage ) -> bool :
@@ -181,13 +186,13 @@ def _make_storage(self) -> blosc2.Storage:
181186
182187 def __init__ (
183188 self ,
184- blocksize_max : int | None = None ,
189+ max_blocksize : int | None = None ,
185190 _from_schunk : blosc2 .SChunk | None = None ,
186191 ** kwargs : Any ,
187192 ) -> None :
188- if blocksize_max is not None and blocksize_max <= 0 :
189- raise ValueError ("blocksize_max must be a positive integer" )
190- self ._blocksize_max : int | None = blocksize_max
193+ if max_blocksize is not None and max_blocksize <= 0 :
194+ raise ValueError ("max_blocksize must be a positive integer" )
195+ self ._max_blocksize : int | None = max_blocksize
191196 if _from_schunk is not None :
192197 if kwargs :
193198 unexpected = ", " .join (sorted (kwargs ))
@@ -213,7 +218,7 @@ def __init__(
213218 return
214219
215220 fixed_meta = dict (storage .meta or {})
216- fixed_meta ["batchstore" ] = dict ( _BATCHSTORE_META )
221+ fixed_meta ["batchstore" ] = { ** _BATCHSTORE_META , "max_blocksize" : self . _max_blocksize }
217222 storage .meta = fixed_meta
218223 schunk = blosc2 .SChunk (chunksize = - 1 , data = None , cparams = cparams , dparams = dparams , storage = storage )
219224 self ._attach_schunk (schunk )
@@ -263,9 +268,29 @@ def _normalize_batch(self, value: object) -> list[Any]:
263268 return values
264269
265270 def _ensure_layout_for_batch (self , batch : list [Any ]) -> None :
266- if self ._blocksize_max is None :
271+ if self ._max_blocksize is None :
267272 payload_sizes = [len (msgpack_packb (item )) for item in batch ]
268- self ._blocksize_max = self ._guess_blocksize (payload_sizes )
273+ self ._max_blocksize = self ._guess_blocksize (payload_sizes )
274+ self ._persist_max_blocksize ()
275+
276+ def _persist_max_blocksize (self ) -> None :
277+ if self ._max_blocksize is None or len (self ) > 0 :
278+ return
279+ storage = self ._make_storage ()
280+ fixed_meta = dict (storage .meta or {})
281+ fixed_meta ["batchstore" ] = {
282+ ** dict (fixed_meta .get ("batchstore" , {})),
283+ "max_blocksize" : self ._max_blocksize ,
284+ }
285+ storage .meta = fixed_meta
286+ schunk = blosc2 .SChunk (
287+ chunksize = - 1 ,
288+ data = None ,
289+ cparams = copy .deepcopy (self .cparams ),
290+ dparams = copy .deepcopy (self .dparams ),
291+ storage = storage ,
292+ )
293+ self ._attach_schunk (schunk )
269294
270295 def _guess_blocksize (self , payload_sizes : list [int ]) -> int :
271296 if not payload_sizes :
@@ -301,11 +326,11 @@ def _vl_dparams_kwargs(self) -> dict[str, Any]:
301326 return asdict (self .schunk .dparams )
302327
303328 def _compress_batch (self , batch : list [Any ]) -> bytes :
304- if self ._blocksize_max is None :
305- raise RuntimeError ("BatchStore blocksize_max is not initialized" )
329+ if self ._max_blocksize is None :
330+ raise RuntimeError ("BatchStore max_blocksize is not initialized" )
306331 blocks = [
307- self ._serialize_block (batch [i : i + self ._blocksize_max ])
308- for i in range (0 , len (batch ), self ._blocksize_max )
332+ self ._serialize_block (batch [i : i + self ._max_blocksize ])
333+ for i in range (0 , len (batch ), self ._max_blocksize )
309334 ]
310335 return blosc2 .blosc2_ext .vlcompress (blocks , ** self ._vl_cparams_kwargs ())
311336
@@ -446,8 +471,8 @@ def dparams(self):
446471 return self .schunk .dparams
447472
448473 @property
449- def blocksize_max (self ) -> int | None :
450- return self ._blocksize_max
474+ def max_blocksize (self ) -> int | None :
475+ return self ._max_blocksize
451476
452477 @property
453478 def typesize (self ) -> int :
@@ -492,7 +517,7 @@ def info_items(self) -> list:
492517 ("type" , f"{ self .__class__ .__name__ } " ),
493518 ("nbatches" , len (self )),
494519 ("batch stats" , batch_stats ),
495- ("blocksize_max " , self .blocksize_max ),
520+ ("max_blocksize " , self .max_blocksize ),
496521 ("nitems" , sum (batch_sizes )),
497522 ("nbytes" , format_nbytes_info (self .nbytes )),
498523 ("cbytes" , format_nbytes_info (self .cbytes )),
@@ -510,7 +535,7 @@ def copy(self, **kwargs: Any) -> BatchStore:
510535 raise ValueError ("meta should not be passed to copy" )
511536 kwargs ["cparams" ] = kwargs .get ("cparams" , copy .deepcopy (self .cparams ))
512537 kwargs ["dparams" ] = kwargs .get ("dparams" , copy .deepcopy (self .dparams ))
513- kwargs ["blocksize_max " ] = kwargs .get ("blocksize_max " , self .blocksize_max )
538+ kwargs ["max_blocksize " ] = kwargs .get ("max_blocksize " , self .max_blocksize )
514539
515540 if "storage" not in kwargs :
516541 kwargs ["meta" ] = self ._copy_meta ()
0 commit comments