@@ -51,17 +51,21 @@ def build_parser() -> argparse.ArgumentParser:
5151 )
5252 parser .add_argument ("--codec" , type = str , default = "ZSTD" , choices = [codec .name for codec in blosc2 .Codec ])
5353 parser .add_argument ("--clevel" , type = int , default = 5 )
54+ parser .add_argument ("--serializer" , type = str , default = "msgpack" , choices = ["msgpack" , "arrow" ])
5455 parser .add_argument ("--use-dict" , action = "store_true" , help = "Enable dictionaries for ZSTD/LZ4/LZ4HC codecs." )
5556 parser .add_argument ("--in-mem" , action = "store_true" , help = "Keep the BatchStore purely in memory." )
5657 return parser
5758
5859
59- def build_store (codec : blosc2 .Codec , clevel : int , use_dict : bool , in_mem : bool ) -> blosc2 .BatchStore | None :
60+ def build_store (
61+ codec : blosc2 .Codec , clevel : int , use_dict : bool , serializer : str , in_mem : bool
62+ ) -> blosc2 .BatchStore | None :
6063 if in_mem :
6164 storage = blosc2 .Storage (mode = "w" )
6265 store = blosc2 .BatchStore (
6366 storage = storage ,
6467 max_blocksize = BLOCKSIZE_MAX ,
68+ serializer = serializer ,
6569 cparams = {
6670 "codec" : codec ,
6771 "clevel" : clevel ,
@@ -79,7 +83,9 @@ def build_store(codec: blosc2.Codec, clevel: int, use_dict: bool, in_mem: bool)
7983 "clevel" : clevel ,
8084 "use_dict" : use_dict and codec in (blosc2 .Codec .ZSTD , blosc2 .Codec .LZ4 , blosc2 .Codec .LZ4HC ),
8185 }
82- with blosc2 .BatchStore (storage = storage , max_blocksize = BLOCKSIZE_MAX , cparams = cparams ) as store :
86+ with blosc2 .BatchStore (
87+ storage = storage , max_blocksize = BLOCKSIZE_MAX , serializer = serializer , cparams = cparams
88+ ) as store :
8389 for batch_index in range (NBATCHES ):
8490 store .append (make_batch (batch_index ))
8591 return None
@@ -114,10 +120,13 @@ def main() -> None:
114120 print (f"Building { article } { mode_label } BatchStore with 1,000,000 RGB dicts and timing 1,000 random scalar reads..." )
115121 print (f" codec: { codec .name } " )
116122 print (f" clevel: { args .clevel } " )
123+ print (f" serializer: { args .serializer } " )
117124 print (f" use_dict: { use_dict } " )
118125 print (f" in_mem: { args .in_mem } " )
119126 t0 = time .perf_counter ()
120- store = build_store (codec = codec , clevel = args .clevel , use_dict = use_dict , in_mem = args .in_mem )
127+ store = build_store (
128+ codec = codec , clevel = args .clevel , use_dict = use_dict , serializer = args .serializer , in_mem = args .in_mem
129+ )
121130 build_time_s = time .perf_counter () - t0
122131 if args .in_mem :
123132 assert store is not None
@@ -127,26 +136,26 @@ def main() -> None:
127136 samples , timings_ns = measure_random_reads (read_store )
128137 t0 = time .perf_counter ()
129138 checksum = 0
130- nobjects = 0
131- for obj in read_store .iter_objects ():
132- checksum += obj ["blue" ]
133- nobjects += 1
139+ nitems = 0
140+ for item in read_store .iter_items ():
141+ checksum += item ["blue" ]
142+ nitems += 1
134143 iter_time_s = time .perf_counter () - t0
135144
136145 print ()
137146 print ("BatchStore benchmark" )
138147 print (f" build time: { build_time_s :.3f} s" )
139148 print (f" batches: { len (read_store )} " )
140- print (f" objects : { TOTAL_OBJECTS } " )
149+ print (f" items : { TOTAL_OBJECTS } " )
141150 print (f" max_blocksize: { read_store .max_blocksize } " )
142151 print ()
143152 print (read_store .info )
144153 print (f"Random scalar reads: { N_RANDOM_READS } " )
145154 print (f" mean: { statistics .fmean (timings_ns ) / 1_000 :.2f} us" )
146155 print (f" max: { max (timings_ns ) / 1_000 :.2f} us" )
147156 print (f" min: { min (timings_ns ) / 1_000 :.2f} us" )
148- print (f"Object iteration via iter_objects (): { iter_time_s :.3f} s" )
149- print (f" per object : { iter_time_s * 1_000_000 / nobjects :.2f} us" )
157+ print (f"Item iteration via iter_items (): { iter_time_s :.3f} s" )
158+ print (f" per item : { iter_time_s * 1_000_000 / nitems :.2f} us" )
150159 print (f" checksum: { checksum } " )
151160 print ("Sample reads:" )
152161 for timing_ns , batch_index , item_index , value in samples [:5 ]:
0 commit comments