Skip to content

Commit f7cd02e

Browse files
committed
Update CTable docs, examples, and benchmarks for TreeStore-backed persistence
1 parent 5fc16b7 commit f7cd02e

3 files changed

Lines changed: 48 additions & 19 deletions

File tree

bench/ctable/bench_persistency.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,11 @@ def clean() -> None:
5959
# Section 1: bulk creation — extend()
6060
# ---------------------------------------------------------------------------
6161

62-
sep("1. extend() — bulk insert: in-memory vs file-backed")
62+
sep("1. extend() — bulk insert: in-memory vs TreeStore-backed")
6363

6464
SIZES = [1_000, 10_000, 100_000, 1_000_000]
6565

66-
print(f"{'rows':>12} {'in-memory (s)':>16} {'file-backed (s)':>16} {'overhead':>10}")
66+
print(f"{'rows':>12} {'in-memory (s)':>16} {'store-backed (s)':>16} {'overhead':>10}")
6767
print(f"{'----':>12} {'-------------':>16} {'---------------':>16} {'--------':>10}")
6868

6969
for N in SIZES:
@@ -77,6 +77,7 @@ def bench_file(N=N, data=data):
7777
clean()
7878
t = blosc2.CTable(Row, urlpath=TABLE_DIR + "/ext", mode="w", expected_size=N)
7979
t.extend(data, validate=False)
80+
t.close()
8081

8182
t_mem = tmin(bench_mem)
8283
t_file = tmin(bench_file)
@@ -89,16 +90,20 @@ def bench_file(N=N, data=data):
8990

9091
sep("2. open() — time to reopen a persistent table")
9192

92-
print(f"{'rows':>12} {'CTable.open() (s)':>20} {'CTable(..., mode=a) (s)':>24}")
93-
print(f"{'----':>12} {'------------------':>20} {'------------------------':>24}")
93+
print(f"{'rows':>12} {'blosc2.open() (s)':>18} {'CTable.open() (s)':>20} {'CTable(..., mode=a) (s)':>24}")
94+
print(f"{'----':>12} {'----------------':>18} {'------------------':>20} {'------------------------':>24}")
9495

9596
for N in SIZES:
9697
data = [(i, float(i % 100), i % 2 == 0) for i in range(N)]
9798
clean()
9899
path = TABLE_DIR + "/reopen"
99100
t = blosc2.CTable(Row, urlpath=path, mode="w", expected_size=N)
100101
t.extend(data, validate=False)
101-
del t
102+
t.close()
103+
104+
def bench_blosc2_open(path=path):
105+
t2 = blosc2.open(path, mode="r")
106+
_ = len(t2)
102107

103108
def bench_open(path=path):
104109
t2 = blosc2.CTable.open(path, mode="r")
@@ -108,15 +113,16 @@ def bench_ctor(path=path):
108113
t2 = blosc2.CTable(Row, urlpath=path, mode="a")
109114
_ = len(t2)
110115

116+
t_b2_open = tmin(bench_blosc2_open)
111117
t_open = tmin(bench_open)
112118
t_ctor = tmin(bench_ctor)
113-
print(f"{N:>12,} {t_open:>20.4f} {t_ctor:>24.4f}")
119+
print(f"{N:>12,} {t_b2_open:>18.4f} {t_open:>20.4f} {t_ctor:>24.4f}")
114120

115121
# ---------------------------------------------------------------------------
116122
# Section 3: append() — single-row inserts after reopen
117123
# ---------------------------------------------------------------------------
118124

119-
sep("3. append() — 1 000 single-row inserts: in-memory vs file-backed")
125+
sep("3. append() — 1 000 single-row inserts: in-memory vs TreeStore-backed")
120126

121127
APPEND_N = 1_000
122128
PREALLOCATE = 10_000 # avoid resize noise
@@ -146,7 +152,8 @@ def bench_append_file():
146152
# Reset file table before each run
147153
if label == "file-backed":
148154
clean()
149-
blosc2.CTable(Row, urlpath=path, mode="w", expected_size=PREALLOCATE)
155+
t = blosc2.CTable(Row, urlpath=path, mode="w", expected_size=PREALLOCATE)
156+
t.close()
150157
elapsed = tmin(fn)
151158
us_per_row = elapsed / APPEND_N * 1e6
152159
print(f"{label:>14} {elapsed:>12.4f} {us_per_row:>12.1f}")
@@ -155,9 +162,9 @@ def bench_append_file():
155162
# Section 4: column read — to_numpy() after reopen
156163
# ---------------------------------------------------------------------------
157164

158-
sep("4. column read — to_numpy() on 'id': in-memory vs file-backed")
165+
sep("4. column read — to_numpy() on 'id': in-memory vs TreeStore-backed")
159166

160-
print(f"{'rows':>12} {'in-memory (s)':>16} {'file-backed (s)':>16} {'ratio':>8}")
167+
print(f"{'rows':>12} {'in-memory (s)':>16} {'store-backed (s)':>16} {'ratio':>8}")
161168
print(f"{'----':>12} {'-------------':>16} {'---------------':>16} {'-----':>8}")
162169

163170
for N in SIZES:
@@ -170,6 +177,7 @@ def bench_append_file():
170177
path = TABLE_DIR + "/read"
171178
t_file_table = blosc2.CTable(Row, urlpath=path, mode="w", expected_size=N)
172179
t_file_table.extend(data, validate=False)
180+
t_file_table.close()
173181
# Reopen read-only (simulates a real read workload)
174182
t_ro = blosc2.CTable.open(path, mode="r")
175183

examples/ctable/persistence.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# SPDX-License-Identifier: BSD-3-Clause
66
#######################################################################
77

8-
# Persistence: write to disk, open read-only/read-write, save, load.
8+
# Persistence: write to disk, open read-only/read-write, generic open(), save, load.
99

1010
import shutil
1111
import tempfile
@@ -35,9 +35,11 @@ class Measurement:
3535

3636
try:
3737
# -- Create directly on disk (mode="w") ---------------------------------
38+
# Extensionless paths default to a directory-backed TreeStore.
3839
t = blosc2.CTable(Measurement, new_data=data, urlpath=disk_path, mode="w")
3940
print(f"Created on disk: {len(t):,} rows at '{disk_path}'")
4041
t.info()
42+
t.close()
4143

4244
# -- Open read-only (default) -------------------------------------------
4345
ro = blosc2.CTable.open(disk_path) # mode="r" by default
@@ -48,11 +50,18 @@ class Measurement:
4850
ro.append(Measurement(sensor_id=0, temperature=20.0, day=1))
4951
except ValueError as e:
5052
print(f" Write blocked (read-only): {e}")
53+
ro.close()
54+
55+
# -- Generic open() materializes the CTable -----------------------------
56+
opened = blosc2.open(disk_path, mode="r")
57+
print(f"Generic open(): {type(opened).__name__} with {len(opened):,} rows")
58+
opened.close()
5159

5260
# -- Open read-write and mutate -----------------------------------------
5361
rw = blosc2.CTable.open(disk_path, mode="a")
5462
rw.append(Measurement(sensor_id=99, temperature=99.0, day=100))
5563
print(f"\nAfter append (read-write): {len(rw):,} rows")
64+
rw.close()
5665

5766
# -- save(): copy in-memory table to disk -------------------------------
5867
mem = blosc2.CTable(Measurement, new_data=data[:100])
@@ -62,7 +71,8 @@ class Measurement:
6271
# -- load(): pull a disk table fully into RAM ---------------------------
6372
ram = blosc2.CTable.load(disk_path)
6473
print(f"Loaded into RAM: {len(ram):,} rows (cbytes={ram.cbytes:,})")
65-
assert len(ram) == len(rw)
74+
with blosc2.CTable.open(disk_path) as check:
75+
assert len(ram) == len(check)
6676

6777
finally:
6878
shutil.rmtree(tmpdir)

plans/ctable-user-guide.md

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,9 @@ t = b2.CTable(Row, cparams={"codec": b2.Codec.ZSTD, "clevel": 5})
9898

9999
### Persistent tables
100100

101-
Pass `urlpath` to store the table on disk. The table root is a directory containing
102-
compressed array files — everything is handled automatically.
101+
Pass `urlpath` to store the table on disk. Persistent `CTable` is backed by a
102+
`TreeStore`, and `blosc2.open(urlpath)` can materialize it directly from the
103+
root `/_meta` manifest.
103104

104105
```python
105106
# Create a new persistent table (overwrites any existing table at that path)
@@ -116,6 +117,9 @@ t = b2.CTable.open("people", mode="r") # explicit
116117

117118
# Open read/write via the classmethod
118119
t = b2.CTable.open("people", mode="a")
120+
121+
# Generic open() also materializes the richer object
122+
t = b2.open("people")
119123
```
120124

121125
`mode` values:
@@ -129,12 +133,18 @@ t = b2.CTable.open("people", mode="a")
129133
In-memory tables (`urlpath=None`, the default) behave exactly as before — no
130134
`mode` or path handling is involved.
131135

132-
### Disk layout
136+
Recommended conventions:
137+
138+
- extensionless paths default to directory-backed stores
139+
- `.b2d` and `.b2z` are still valid and useful conventions, but no longer required
140+
141+
### Store layout
133142

134143
```
135-
people/
136-
_meta.b2frame ← schema JSON, kind marker, version (in vlmeta)
137-
_valid_rows.b2nd ← tombstone mask
144+
people/ ← TreeStore root (extensionless directory-backed example)
145+
embed.b2e ← internal store metadata
146+
_meta.b2f ← SChunk manifest with kind/version/schema in vlmeta
147+
_valid_rows.b2nd ← tombstone mask
138148
_cols/
139149
id.b2nd
140150
score.b2nd
@@ -146,7 +156,8 @@ You can inspect the raw metadata:
146156
```python
147157
import blosc2, json
148158

149-
meta = blosc2.open("people/_meta.b2frame")
159+
store = blosc2.TreeStore("people", mode="r")
160+
meta = store["/_meta"]
150161
print(meta.vlmeta["kind"]) # "ctable"
151162
print(meta.vlmeta["version"]) # 1
152163
schema = json.loads(meta.vlmeta["schema"])

0 commit comments

Comments
 (0)