Skip to content

Commit 79cd7ab

Browse files
committed
Add a BatchStore.items accessor
1 parent 0ab340d commit 79cd7ab

5 files changed

Lines changed: 285 additions & 44 deletions

File tree

doc/getting_started/tutorials/12.batchstore.ipynb

Lines changed: 146 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,14 @@
1818
"id": "be8591f8f86952e8",
1919
"metadata": {
2020
"ExecuteTime": {
21-
"end_time": "2026-03-20T07:06:05.876287Z",
22-
"start_time": "2026-03-20T07:06:05.661801Z"
21+
"end_time": "2026-03-20T10:24:10.190550Z",
22+
"start_time": "2026-03-20T10:24:10.014859Z"
23+
},
24+
"execution": {
25+
"iopub.execute_input": "2026-03-20T10:23:51.329739Z",
26+
"iopub.status.busy": "2026-03-20T10:23:51.329437Z",
27+
"iopub.status.idle": "2026-03-20T10:23:51.556056Z",
28+
"shell.execute_reply": "2026-03-20T10:23:51.555614Z"
2329
}
2430
},
2531
"outputs": [],
@@ -53,17 +59,23 @@
5359
"id": "f8c8a2b7692e7228",
5460
"metadata": {
5561
"ExecuteTime": {
56-
"end_time": "2026-03-20T07:06:05.904277Z",
57-
"start_time": "2026-03-20T07:06:05.882545Z"
62+
"end_time": "2026-03-20T10:24:10.211954Z",
63+
"start_time": "2026-03-20T10:24:10.191296Z"
64+
},
65+
"execution": {
66+
"iopub.execute_input": "2026-03-20T10:23:51.557338Z",
67+
"iopub.status.busy": "2026-03-20T10:23:51.557245Z",
68+
"iopub.status.idle": "2026-03-20T10:23:51.564920Z",
69+
"shell.execute_reply": "2026-03-20T10:23:51.564578Z"
5870
}
5971
},
6072
"outputs": [
6173
{
6274
"name": "stdout",
6375
"output_type": "stream",
6476
"text": [
65-
"Batches: [[{'name': 'alpha', 'count': 1}, {'name': 'beta', 'count': 2}, {'name': 'gamma', 'count': 3}], [{'name': 'delta', 'count': 4}, {'name': 'epsilon', 'count': 5}], [{'name': 'zeta', 'count': 6}], [{'name': 'eta', 'count': 7}, {'name': 'theta', 'count': 8}]]\n",
66-
"Number of batches: 4\n"
77+
"Batches: [[{'name': 'alpha', 'count': 1}, {'name': 'beta', 'count': 2}, {'name': 'gamma', 'count': 3}], [{'name': 'delta', 'count': 4}, {'name': 'epsilon', 'count': 5}], [{'name': 'zeta', 'count': 6}], [{'name': 'eta', 'count': 7}, {'name': 'theta', 'count': 8}], [{'name': 'iota', 'count': 9}, {'name': 'kappa', 'count': 10}, {'name': 'lambda', 'count': 11}]]\n",
78+
"Number of batches: 5\n"
6779
]
6880
}
6981
],
@@ -86,6 +98,11 @@
8698
" [\n",
8799
" [{\"name\": \"zeta\", \"count\": 6}],\n",
88100
" [{\"name\": \"eta\", \"count\": 7}, {\"name\": \"theta\", \"count\": 8}],\n",
101+
" [\n",
102+
" {\"name\": \"iota\", \"count\": 9},\n",
103+
" {\"name\": \"kappa\", \"count\": 10},\n",
104+
" {\"name\": \"lambda\", \"count\": 11},\n",
105+
" ],\n",
89106
" ]\n",
90107
")\n",
91108
"\n",
@@ -109,8 +126,14 @@
109126
"id": "20861d3e348f9df1",
110127
"metadata": {
111128
"ExecuteTime": {
112-
"end_time": "2026-03-20T07:06:05.924634Z",
113-
"start_time": "2026-03-20T07:06:05.905576Z"
129+
"end_time": "2026-03-20T10:24:10.229980Z",
130+
"start_time": "2026-03-20T10:24:10.213198Z"
131+
},
132+
"execution": {
133+
"iopub.execute_input": "2026-03-20T10:23:51.566000Z",
134+
"iopub.status.busy": "2026-03-20T10:23:51.565919Z",
135+
"iopub.status.idle": "2026-03-20T10:23:51.569765Z",
136+
"shell.execute_reply": "2026-03-20T10:23:51.569439Z"
114137
}
115138
},
116139
"outputs": [
@@ -121,7 +144,7 @@
121144
"First batch: [{'name': 'alpha', 'count': 1}, {'name': 'beta', 'count': 2}, {'name': 'gamma', 'count': 3}]\n",
122145
"Second item in first batch: {'name': 'beta', 'count': 2}\n",
123146
"Slice of second batch: [{'name': 'delta', 'count': 4}]\n",
124-
"All items: [{'name': 'alpha', 'count': 1}, {'name': 'beta', 'count': 2}, {'name': 'gamma', 'count': 3}, {'name': 'delta', 'count': 4}, {'name': 'epsilon', 'count': 5}, {'name': 'zeta', 'count': 6}, {'name': 'eta', 'count': 7}, {'name': 'theta', 'count': 8}]\n"
147+
"All items: [{'name': 'alpha', 'count': 1}, {'name': 'beta', 'count': 2}, {'name': 'gamma', 'count': 3}, {'name': 'delta', 'count': 4}, {'name': 'epsilon', 'count': 5}, {'name': 'zeta', 'count': 6}, {'name': 'eta', 'count': 7}, {'name': 'theta', 'count': 8}, {'name': 'iota', 'count': 9}, {'name': 'kappa', 'count': 10}, {'name': 'lambda', 'count': 11}]\n"
125148
]
126149
}
127150
],
@@ -148,17 +171,23 @@
148171
"id": "df556f6da8adc369",
149172
"metadata": {
150173
"ExecuteTime": {
151-
"end_time": "2026-03-20T07:06:05.945986Z",
152-
"start_time": "2026-03-20T07:06:05.925866Z"
174+
"end_time": "2026-03-20T10:24:10.259055Z",
175+
"start_time": "2026-03-20T10:24:10.231589Z"
176+
},
177+
"execution": {
178+
"iopub.execute_input": "2026-03-20T10:23:51.570823Z",
179+
"iopub.status.busy": "2026-03-20T10:23:51.570763Z",
180+
"iopub.status.idle": "2026-03-20T10:23:51.577607Z",
181+
"shell.execute_reply": "2026-03-20T10:23:51.577269Z"
153182
}
154183
},
155184
"outputs": [
156185
{
157186
"name": "stdout",
158187
"output_type": "stream",
159188
"text": [
160-
"Popped batch: [{'name': 'eta', 'count': 7}, {'name': 'theta', 'count': 8}]\n",
161-
"After updates: [[{'name': 'between', 'count': 99}], [{'name': 'delta*', 'count': 40}, {'name': 'epsilon*', 'count': 50}], [{'name': 'zeta', 'count': 6}]]\n"
189+
"Popped batch: [{'name': 'zeta', 'count': 6}]\n",
190+
"After updates: [[{'name': 'alpha*', 'count': 10}, {'name': 'beta*', 'count': 20}], [{'name': 'delta*', 'count': 40}, {'name': 'epsilon*', 'count': 50}], [{'name': 'between-a', 'count': 99}, {'name': 'between-b', 'count': 100}], [{'name': 'eta', 'count': 7}, {'name': 'theta', 'count': 8}], [{'name': 'iota', 'count': 9}, {'name': 'kappa', 'count': 10}, {'name': 'lambda', 'count': 11}]]\n"
162191
]
163192
}
164193
],
@@ -167,9 +196,10 @@
167196
" {\"name\": \"delta*\", \"count\": 40},\n",
168197
" {\"name\": \"epsilon*\", \"count\": 50},\n",
169198
"]\n",
170-
"store.insert(1, [{\"name\": \"between\", \"count\": 99}])\n",
171-
"removed = store.pop()\n",
199+
"store.insert(2, [{\"name\": \"between-a\", \"count\": 99}, {\"name\": \"between-b\", \"count\": 100}])\n",
200+
"removed = store.pop(3)\n",
172201
"del store[0]\n",
202+
"store.insert(0, [{\"name\": \"alpha*\", \"count\": 10}, {\"name\": \"beta*\", \"count\": 20}])\n",
173203
"\n",
174204
"show(\"Popped batch\", removed)\n",
175205
"show(\"After updates\", [batch[:] for batch in store])"
@@ -191,30 +221,36 @@
191221
"id": "b32d72a68d83673e",
192222
"metadata": {
193223
"ExecuteTime": {
194-
"end_time": "2026-03-20T07:06:05.965086Z",
195-
"start_time": "2026-03-20T07:06:05.947144Z"
224+
"end_time": "2026-03-20T10:24:10.300526Z",
225+
"start_time": "2026-03-20T10:24:10.259712Z"
226+
},
227+
"execution": {
228+
"iopub.execute_input": "2026-03-20T10:23:51.578504Z",
229+
"iopub.status.busy": "2026-03-20T10:23:51.578433Z",
230+
"iopub.status.idle": "2026-03-20T10:23:51.581563Z",
231+
"shell.execute_reply": "2026-03-20T10:23:51.581191Z"
196232
}
197233
},
198234
"outputs": [
199235
{
200236
"name": "stdout",
201237
"output_type": "stream",
202238
"text": [
203-
"Batches via iteration: [[{'name': 'between', 'count': 99}], [{'name': 'delta*', 'count': 40}, {'name': 'epsilon*', 'count': 50}], [{'name': 'zeta', 'count': 6}]]\n",
239+
"Batches via iteration: [[{'name': 'alpha*', 'count': 10}, {'name': 'beta*', 'count': 20}], [{'name': 'delta*', 'count': 40}, {'name': 'epsilon*', 'count': 50}], [{'name': 'between-a', 'count': 99}, {'name': 'between-b', 'count': 100}], [{'name': 'eta', 'count': 7}, {'name': 'theta', 'count': 8}], [{'name': 'iota', 'count': 9}, {'name': 'kappa', 'count': 10}, {'name': 'lambda', 'count': 11}]]\n",
204240
"type : BatchStore\n",
205241
"serializer : msgpack\n",
206-
"nbatches : 3 (items per batch: mean=1.33, max=2, min=1)\n",
207-
"nblocks : 3 (items per block: mean=1.33, max=2, min=1)\n",
208-
"nitems : 4\n",
209-
"nbytes : 84 (84 B)\n",
210-
"cbytes : 468 (468 B)\n",
211-
"cratio : 0.18\n",
242+
"nbatches : 5 (items per batch: mean=2.20, max=3, min=2)\n",
243+
"nblocks : 6 (items per block: mean=1.83, max=2, min=1)\n",
244+
"nitems : 11\n",
245+
"nbytes : 226 (226 B)\n",
246+
"cbytes : 680 (680 B)\n",
247+
"cratio : 0.33\n",
212248
"cparams : CParams(codec=<Codec.ZSTD: 5>, codec_meta=0, clevel=5, use_dict=False, typesize=1,\n",
213-
" : nthreads=8, blocksize=0, splitmode=<SplitMode.AUTO_SPLIT: 3>,\n",
249+
" : nthreads=12, blocksize=0, splitmode=<SplitMode.AUTO_SPLIT: 3>,\n",
214250
" : filters=[<Filter.NOFILTER: 0>, <Filter.NOFILTER: 0>, <Filter.NOFILTER: 0>,\n",
215251
" : <Filter.NOFILTER: 0>, <Filter.NOFILTER: 0>, <Filter.SHUFFLE: 1>], filters_meta=[0,\n",
216252
" : 0, 0, 0, 0, 0], tuner=<Tuner.STUNE: 0>)\n",
217-
"dparams : DParams(nthreads=8)\n",
253+
"dparams : DParams(nthreads=12)\n",
218254
"\n"
219255
]
220256
}
@@ -240,16 +276,22 @@
240276
"id": "45f878b8f4414a3b",
241277
"metadata": {
242278
"ExecuteTime": {
243-
"end_time": "2026-03-20T07:06:05.990783Z",
244-
"start_time": "2026-03-20T07:06:05.965791Z"
279+
"end_time": "2026-03-20T10:24:10.334099Z",
280+
"start_time": "2026-03-20T10:24:10.301619Z"
281+
},
282+
"execution": {
283+
"iopub.execute_input": "2026-03-20T10:23:51.582437Z",
284+
"iopub.status.busy": "2026-03-20T10:23:51.582372Z",
285+
"iopub.status.idle": "2026-03-20T10:23:51.590494Z",
286+
"shell.execute_reply": "2026-03-20T10:23:51.590186Z"
245287
}
246288
},
247289
"outputs": [
248290
{
249291
"name": "stdout",
250292
"output_type": "stream",
251293
"text": [
252-
"Copied batches: [[{'name': 'between', 'count': 99}], [{'name': 'delta*', 'count': 40}, {'name': 'epsilon*', 'count': 50}], [{'name': 'zeta', 'count': 6}]]\n",
294+
"Copied batches: [[{'name': 'alpha*', 'count': 10}, {'name': 'beta*', 'count': 20}], [{'name': 'delta*', 'count': 40}, {'name': 'epsilon*', 'count': 50}], [{'name': 'between-a', 'count': 99}, {'name': 'between-b', 'count': 100}], [{'name': 'eta', 'count': 7}, {'name': 'theta', 'count': 8}], [{'name': 'iota', 'count': 9}, {'name': 'kappa', 'count': 10}, {'name': 'lambda', 'count': 11}]]\n",
253295
"Copy serializer: msgpack\n",
254296
"Copy codec: Codec.LZ4\n"
255297
]
@@ -283,8 +325,14 @@
283325
"id": "fd4957093f509bd4",
284326
"metadata": {
285327
"ExecuteTime": {
286-
"end_time": "2026-03-20T07:06:06.025738Z",
287-
"start_time": "2026-03-20T07:06:05.999799Z"
328+
"end_time": "2026-03-20T10:24:10.359063Z",
329+
"start_time": "2026-03-20T10:24:10.343012Z"
330+
},
331+
"execution": {
332+
"iopub.execute_input": "2026-03-20T10:23:51.591475Z",
333+
"iopub.status.busy": "2026-03-20T10:23:51.591415Z",
334+
"iopub.status.idle": "2026-03-20T10:23:51.594839Z",
335+
"shell.execute_reply": "2026-03-20T10:23:51.594553Z"
288336
}
289337
},
290338
"outputs": [
@@ -293,9 +341,9 @@
293341
"output_type": "stream",
294342
"text": [
295343
"from_cframe type: BatchStore\n",
296-
"from_cframe batches: [[{'name': 'between', 'count': 99}], [{'name': 'delta*', 'count': 40}, {'name': 'epsilon*', 'count': 50}], [{'name': 'zeta', 'count': 6}]]\n",
344+
"from_cframe batches: [[{'name': 'alpha*', 'count': 10}, {'name': 'beta*', 'count': 20}], [{'name': 'delta*', 'count': 40}, {'name': 'epsilon*', 'count': 50}], [{'name': 'between-a', 'count': 99}, {'name': 'between-b', 'count': 100}], [{'name': 'eta', 'count': 7}, {'name': 'theta', 'count': 8}], [{'name': 'iota', 'count': 9}, {'name': 'kappa', 'count': 10}, {'name': 'lambda', 'count': 11}]]\n",
297345
"Reopened type: BatchStore\n",
298-
"Reopened batches: [[{'name': 'between', 'count': 99}], [{'name': 'delta*', 'count': 40}, {'name': 'epsilon*', 'count': 50}], [{'name': 'zeta', 'count': 6}]]\n"
346+
"Reopened batches: [[{'name': 'alpha*', 'count': 10}, {'name': 'beta*', 'count': 20}], [{'name': 'delta*', 'count': 40}, {'name': 'epsilon*', 'count': 50}], [{'name': 'between-a', 'count': 99}, {'name': 'between-b', 'count': 100}], [{'name': 'eta', 'count': 7}, {'name': 'theta', 'count': 8}], [{'name': 'iota', 'count': 9}, {'name': 'kappa', 'count': 10}, {'name': 'lambda', 'count': 11}]]\n"
299347
]
300348
}
301349
],
@@ -326,8 +374,14 @@
326374
"id": "2214b2be1bfb5bc7",
327375
"metadata": {
328376
"ExecuteTime": {
329-
"end_time": "2026-03-20T07:06:06.050975Z",
330-
"start_time": "2026-03-20T07:06:06.034152Z"
377+
"end_time": "2026-03-20T10:24:10.386442Z",
378+
"start_time": "2026-03-20T10:24:10.365740Z"
379+
},
380+
"execution": {
381+
"iopub.execute_input": "2026-03-20T10:23:51.595854Z",
382+
"iopub.status.busy": "2026-03-20T10:23:51.595778Z",
383+
"iopub.status.idle": "2026-03-20T10:23:51.601478Z",
384+
"shell.execute_reply": "2026-03-20T10:23:51.601232Z"
331385
}
332386
},
333387
"outputs": [
@@ -348,24 +402,73 @@
348402
" [{\"name\": \"again\", \"count\": 2}, {\"name\": \"done\", \"count\": 3}],\n",
349403
" ]\n",
350404
")\n",
351-
"show(\"After clear + extend\", [batch[:] for batch in scratch])\n",
405+
"show(\"After clear + extend\", [batch[:] for batch in scratch])"
406+
]
407+
},
408+
{
409+
"cell_type": "markdown",
410+
"id": "8d8f9df58a46c4c1",
411+
"metadata": {},
412+
"source": [
413+
"## Flat item access with `.items`\n",
352414
"\n",
353-
"blosc2.remove_urlpath(urlpath)\n",
354-
"blosc2.remove_urlpath(copy_path)"
415+
"The main `BatchStore` API remains batch-oriented, but the `.items` accessor offers a read-only flat view across all items. Integer indexing returns one item and slicing returns a Python list."
355416
]
356417
},
357418
{
358419
"cell_type": "code",
359-
"execution_count": 8,
360-
"id": "27c47e4fd1332b48",
420+
"execution_count": 9,
421+
"id": "4f5c4e5a1b8f92d4",
422+
"metadata": {
423+
"ExecuteTime": {
424+
"end_time": "2026-03-20T10:24:10.403443Z",
425+
"start_time": "2026-03-20T10:24:10.387808Z"
426+
},
427+
"execution": {
428+
"iopub.execute_input": "2026-03-20T10:23:51.602502Z",
429+
"iopub.status.busy": "2026-03-20T10:23:51.602451Z",
430+
"iopub.status.idle": "2026-03-20T10:23:51.606267Z",
431+
"shell.execute_reply": "2026-03-20T10:23:51.605893Z"
432+
}
433+
},
434+
"outputs": [
435+
{
436+
"name": "stdout",
437+
"output_type": "stream",
438+
"text": [
439+
"Flat item 0: {'name': 'alpha*', 'count': 10}\n",
440+
"Flat item 6: {'name': 'eta', 'count': 7}\n",
441+
"Flat slice 3:8: [{'name': 'epsilon*', 'count': 50}, {'name': 'between-a', 'count': 99}, {'name': 'between-b', 'count': 100}, {'name': 'eta', 'count': 7}, {'name': 'theta', 'count': 8}]\n"
442+
]
443+
}
444+
],
445+
"source": [
446+
"show(\"Flat item 0\", store.items[0])\n",
447+
"show(\"Flat item 6\", store.items[6])\n",
448+
"show(\"Flat slice 3:8\", store.items[3:8])"
449+
]
450+
},
451+
{
452+
"cell_type": "code",
453+
"execution_count": 10,
454+
"id": "2a355a3fc8673692",
361455
"metadata": {
362456
"ExecuteTime": {
363-
"end_time": "2026-03-20T07:06:06.061727Z",
364-
"start_time": "2026-03-20T07:06:06.051400Z"
457+
"end_time": "2026-03-20T10:24:10.420064Z",
458+
"start_time": "2026-03-20T10:24:10.403926Z"
459+
},
460+
"execution": {
461+
"iopub.execute_input": "2026-03-20T10:23:51.607247Z",
462+
"iopub.status.busy": "2026-03-20T10:23:51.607185Z",
463+
"iopub.status.idle": "2026-03-20T10:23:51.608877Z",
464+
"shell.execute_reply": "2026-03-20T10:23:51.608598Z"
365465
}
366466
},
367467
"outputs": [],
368-
"source": []
468+
"source": [
469+
"blosc2.remove_urlpath(urlpath)\n",
470+
"blosc2.remove_urlpath(copy_path)"
471+
]
369472
}
370473
],
371474
"metadata": {

examples/batch_store.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ def main() -> None:
6161
value = reopened[batch_index][item_index]
6262
print(f" reopened[{batch_index}][{item_index}] -> {value}")
6363

64+
print()
65+
print("Flat item reads via .items:")
66+
print(f" reopened.items[0] -> {reopened.items[0]}")
67+
print(f" reopened.items[150:153] -> {reopened.items[150:153]}")
68+
6469
print(f"BatchStore file at: {reopened.urlpath}")
6570

6671

0 commit comments

Comments
 (0)