|
| 1 | +####################################################################### |
| 2 | +# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org> |
| 3 | +# All rights reserved. |
| 4 | +# |
| 5 | +# SPDX-License-Identifier: BSD-3-Clause |
| 6 | +####################################################################### |
| 7 | + |
| 8 | +# Benchmark: append() overhead introduced by the new schema pipeline |
| 9 | +# |
| 10 | +# The new append() path routes every row through: |
| 11 | +# _normalize_row_input → validate_row (Pydantic) → _coerce_row_to_storage |
| 12 | +# |
| 13 | +# This benchmark isolates how much each step costs, and shows the |
| 14 | +# total overhead vs the raw NDArray write speed. |
| 15 | + |
| 16 | +from dataclasses import dataclass |
| 17 | +from time import perf_counter |
| 18 | + |
| 19 | +import numpy as np |
| 20 | + |
| 21 | +import blosc2 |
| 22 | +from blosc2.schema_compiler import compile_schema |
| 23 | +from blosc2.schema_validation import build_validator_model, validate_row |
| 24 | + |
| 25 | + |
| 26 | +@dataclass |
| 27 | +class Row: |
| 28 | + id: int = blosc2.field(blosc2.int64(ge=0)) |
| 29 | + score: float = blosc2.field(blosc2.float64(ge=0, le=100), default=0.0) |
| 30 | + active: bool = blosc2.field(blosc2.bool(), default=True) |
| 31 | + |
| 32 | + |
| 33 | +N = 5_000 |
| 34 | +rng = np.random.default_rng(42) |
| 35 | +data = [ |
| 36 | + (int(i), float(rng.uniform(0, 100)), bool(i % 2)) |
| 37 | + for i in range(N) |
| 38 | +] |
| 39 | +schema = compile_schema(Row) |
| 40 | +# Warm up the Pydantic model cache |
| 41 | +build_validator_model(schema) |
| 42 | + |
| 43 | +print(f"append() pipeline cost breakdown | N = {N:,} rows") |
| 44 | +print("=" * 60) |
| 45 | + |
| 46 | +# ── 1. Raw NDArray writes (no CTable overhead at all) ──────────────────────── |
| 47 | +ids = np.zeros(N, dtype=np.int64) |
| 48 | +scores = np.zeros(N, dtype=np.float64) |
| 49 | +flags = np.zeros(N, dtype=np.bool_) |
| 50 | +mask = np.zeros(N, dtype=np.bool_) |
| 51 | + |
| 52 | +t0 = perf_counter() |
| 53 | +for i, (id_, score, active) in enumerate(data): |
| 54 | + ids[i] = id_ |
| 55 | + scores[i] = score |
| 56 | + flags[i] = active |
| 57 | + mask[i] = True |
| 58 | +t_raw = perf_counter() - t0 |
| 59 | +print(f"{'Raw NumPy writes (baseline)':<40} {t_raw:.4f} s") |
| 60 | + |
| 61 | +# ── 2. _normalize_row_input only ───────────────────────────────────────────── |
| 62 | +t_obj = blosc2.CTable(Row, expected_size=N, validate=False) |
| 63 | +t0 = perf_counter() |
| 64 | +for row in data: |
| 65 | + _ = t_obj._normalize_row_input(row) |
| 66 | +t_normalize = perf_counter() - t0 |
| 67 | +print(f"{'_normalize_row_input only':<40} {t_normalize:.4f} s ({t_normalize/t_raw:.1f}x baseline)") |
| 68 | + |
| 69 | +# ── 3. Pydantic validate_row only ──────────────────────────────────────────── |
| 70 | +row_dicts = [t_obj._normalize_row_input(row) for row in data] |
| 71 | +t0 = perf_counter() |
| 72 | +for rd in row_dicts: |
| 73 | + _ = validate_row(schema, rd) |
| 74 | +t_validate = perf_counter() - t0 |
| 75 | +print(f"{'validate_row (Pydantic) only':<40} {t_validate:.4f} s ({t_validate/t_raw:.1f}x baseline)") |
| 76 | + |
| 77 | +# ── 4. _coerce_row_to_storage only ─────────────────────────────────────────── |
| 78 | +t0 = perf_counter() |
| 79 | +for rd in row_dicts: |
| 80 | + _ = t_obj._coerce_row_to_storage(rd) |
| 81 | +t_coerce = perf_counter() - t0 |
| 82 | +print(f"{'_coerce_row_to_storage only':<40} {t_coerce:.4f} s ({t_coerce/t_raw:.1f}x baseline)") |
| 83 | + |
| 84 | +# ── 5. Full append(), validate=False (3 runs, take minimum) ───────────────── |
| 85 | +RUNS = 3 |
| 86 | +best_off = float("inf") |
| 87 | +for _ in range(RUNS): |
| 88 | + t_obj2 = blosc2.CTable(Row, expected_size=N, validate=False) |
| 89 | + t0 = perf_counter() |
| 90 | + for row in data: |
| 91 | + t_obj2.append(row) |
| 92 | + best_off = min(best_off, perf_counter() - t0) |
| 93 | +t_append_off = best_off |
| 94 | +print(f"{'Full append(), validate=False':<40} {t_append_off:.4f} s ({t_append_off/t_raw:.1f}x baseline)") |
| 95 | + |
| 96 | +# ── 6. Full append(), validate=True (3 runs, take minimum) ────────────────── |
| 97 | +best_on = float("inf") |
| 98 | +for _ in range(RUNS): |
| 99 | + t_obj3 = blosc2.CTable(Row, expected_size=N, validate=True) |
| 100 | + t0 = perf_counter() |
| 101 | + for row in data: |
| 102 | + t_obj3.append(row) |
| 103 | + best_on = min(best_on, perf_counter() - t0) |
| 104 | +t_append_on = best_on |
| 105 | +print(f"{'Full append(), validate=True':<40} {t_append_on:.4f} s ({t_append_on/t_raw:.1f}x baseline)") |
| 106 | + |
| 107 | +print() |
| 108 | +print("=" * 60) |
| 109 | +pydantic_cost = max(t_append_on - t_append_off, 0.0) |
| 110 | +print(f"{'Pydantic overhead in append()':<40} {pydantic_cost:.4f} s") |
| 111 | +if t_append_on > 0: |
| 112 | + print(f"{'Validation fraction of total':<40} {pydantic_cost/t_append_on*100:.1f}%") |
| 113 | +print(f"{'Per-row Pydantic cost (isolated)':<40} {(t_validate/N)*1e6:.2f} µs/row") |
| 114 | +print() |
| 115 | +print(f"Note: append() is dominated by blosc2 I/O ({t_append_off/t_raw:.0f}x raw numpy),") |
| 116 | +print(" not by the validation pipeline.") |
| 117 | +print(" The main bottleneck is the last_true_pos backward scan per row.") |
0 commit comments