Add PyTorch OOC bench

lshaw8317 · lshaw8317 · commit c412ad2c5425 · 2025-10-27T17:47:51.000+01:00
diff --git a/bench/ndarray/matmul_Blosc2PyTorch.py b/bench/ndarray/matmul_Blosc2PyTorch.py
@@ -0,0 +1,149 @@
+### Matmul performance comparison between Blosc2 and PyTorch with persistent storage
+import numpy as np
+import blosc2
+import matplotlib.pyplot as plt
+import torch
+import pickle
+from time import time
+import h5py
+import hdf5plugin
+from tqdm import tqdm  # progress bar (pip install tqdm)
+import os
+cparams = {
+    "codec": blosc2.Codec.LZ4,
+    "filters": [blosc2.Filter.SHUFFLE],
+    "clevel": 1,
+}
+batch_size = 32
+CREATE = True
+dtype = np.float32
+if CREATE:
+    def build_dense_rowwarp_matrix(out_h=2000, in_h=2167,
+                                scale=1.0,
+                                ripple_amplitude=30.0,
+                                ripple_period=400.0,
+                                blur_radius=1,
+                                row_gain_amplitude=0.15):
+        """
+        Same function as before — builds a vertical warp matrix A of shape (out_h, in_h)
+        that can be applied as A @ img.
+        """
+        A = np.zeros((out_h, in_h), dtype=dtype)
+        i = np.arange(out_h, dtype=dtype)
+        t = i / max(out_h - 1, 1)
+        linear_src = t * (in_h - 1) * scale
+        ripple = ripple_amplitude * np.sin(2.0 * np.pi * i / ripple_period)
+        src = linear_src + ripple
+        row_gain = 1.0 + row_gain_amplitude * np.cos(2.0 * np.pi * i / (ripple_period * 0.5))
+        for out_r in range(out_h):
+            s = src[out_r]
+            k_min = int(np.floor(s)) - blur_radius
+            k_max = int(np.floor(s)) + blur_radius + 1
+            k_min_clamped = max(k_min, 0)
+            k_max_clamped = min(k_max, in_h - 1) + 1
+            ks = np.arange(k_min_clamped, k_max_clamped, dtype=np.int32)
+            d = np.abs(ks - s)
+            w = np.maximum(0.0, 1.0 - d / (blur_radius + 1e-6))
+            if w.sum() > 0:
+                w = w / w.sum()
+            w = w * row_gain[out_r]
+            A[out_r, ks] = w.astype(dtype)
+        return A
+
+    NUM_IMAGES = 2000
+    IN_H, OUT_H, W = 2167, 2000, 2070
+
+    out = blosc2.empty(shape=(NUM_IMAGES, OUT_H, IN_H), dtype=dtype, urlpath="transform.b2nd", mode='w', cparams=cparams)
+
+    for i in tqdm(range(NUM_IMAGES), desc="Generating and saving transform matrices to Blosc2"):
+        # Randomize warp parameters a little per image
+        ripple_amp = 20 + np.random.uniform(-5, 5)
+        ripple_period = 300 + np.random.uniform(-30, 30)
+        row_gain_amp = 0.10 + np.random.uniform(-0.05, 0.05)
+        blur_r = np.random.choice([0, 1, 2])
+
+        # Build and apply matrix
+        A = build_dense_rowwarp_matrix(out_h=OUT_H, in_h=IN_H,
+                                        ripple_amplitude=ripple_amp,
+                                        ripple_period=ripple_period,
+                                        blur_radius=blur_r,
+                                        row_gain_amplitude=row_gain_amp)
+        out[i] = A
+
+
+
+    fname_in = "kevlar.h5"  # input file with the kevlar dataset
+    fname_out = "my_kevlar.h5"
+    b2im = blosc2.empty(shape=(2000, 2167, 2070), dtype=dtype, cparams=cparams, urlpath="kevlar.b2nd", mode="w")
+    with h5py.File(fname_in, "r") as fr:  # load file and process to blosc2 array
+        dset = fr["/entry/data/data"][:]
+        b2im[:1000] = dset
+        b2im[1000:] = dset
+        del dset
+    print("Saved data to Blosc2.")
+
+    b2im = blosc2.open(urlpath="kevlar.b2nd", mode="r")
+    b2im_trans = blosc2.open(urlpath="transform.b2nd", mode="r")
+    s, d = b2im.shape, b2im.dtype
+    # Write to .h5 file #
+    with h5py.File(fname_out, "w") as fw:
+        b2comp = hdf5plugin.Blosc2(cname='lz4', clevel=1, filters=hdf5plugin.Blosc2.SHUFFLE) # just for identification, no compression algorithm specified
+        dset_out1 = fw.create_dataset(
+            "data",
+            b2im.shape, b2im.dtype,
+            **b2comp,
+        )
+        dset_out2 = fw.create_dataset(
+            "transform",
+            b2im_trans.shape, b2im_trans.dtype,
+            **b2comp,
+        )
+        # Write individual blosc2 chunks directly to hdf5
+        # hdf5 requires a cframe, which is only available via blosc2 schunks (not chunks)
+        for i in tqdm(range(len(b2im), batch_size), desc="Converting transform and data matrices to HDF5"):
+            dset_out1[i:i+32] = b2im[i:i+batch_size]
+            dset_out2[i:i+32] = b2im_trans[i:i+batch_size]
+
+
+# Re-open the arrays
+dset_a = blosc2.open("transform.b2nd", mode="r")
+dset_b = blosc2.open("kevlar.b2nd", mode="r")
+print(f'Total working set size: {round((np.prod(dset_a.shape) + np.prod(dset_a.shape[:-1]+dset_b.shape[-1:])
+                                        + np.prod(dset_b.shape)) * dtype.itemsize / 2 ** 30, 1)} GB.')
+
+# --- Matmul Blosc2 ---
+t0 = time()
+out_blosc = blosc2.matmul(dset_a, dset_b, urlpath='out.b2nd', mode="w", cparams=cparams)
+blosc_time = time() - t0
+chunks_blosc = [dset_a.chunks, dset_b.chunks]
+chunks_blosc_out = out_blosc.chunks
+in_shapes = [dset_a.shape, dset_b.shape]
+print(f"Blosc2 Performance = {blosc_time:.2f} s")
+
+h5compressor = hdf5plugin.Blosc2(cname='lz4', clevel=1, filters=hdf5plugin.Blosc2.SHUFFLE)
+t0 = time()
+f = h5py.File("my_kevlar.h5", "r+")
+if not ("out" in f):
+    f.create_dataset("out", shape=out_blosc.shape, dtype=out_blosc.dtype, **h5compressor)
+# Re-open the HDF5 arrays
+t0 = time()
+with h5py.File("my_kevlar.h5", "r+") as f:
+    dset_a = f["transform"]
+    dset_b = f["data"]
+    dset_out = f["out"]
+
+    for i in tqdm(range(0, len(dset_a), batch_size), desc="PyTorch Matmul"):  # batch of 32
+        batch_a = torch.from_numpy(dset_a[i:i+batch_size])     # NumPy array slice
+        batch_b = torch.from_numpy(dset_b[i:i+batch_size])      # NumPy array slice
+        dset_out[i:i+batch_size] = torch.matmul(batch_a, batch_b)
+    hdf5_chunks = [dset_a.chunks, dset_b.chunks]
+    hdf5_chunks_out = dset_out.chunks
+torch_time = time() - t0
+print(f"PyTorch Performance = {torch_time:.2f} s")
+
+results = {'blosc_chunks_out': chunks_blosc_out, 'blosc_chunks': chunks_blosc,
+           'hdf5_chunks_out': hdf5_chunks_out, 'hdf5_chunks': hdf5_chunks,
+           'ABshape': in_shapes, 'dtype': out_blosc.dtype, 'PyTorch': torch_time, 'Blosc2': blosc_time}
+fname = 'matmul_OOC'
+with open(f'{fname}.pkl', 'wb') as f:
+    pickle.dump(results, f)