|
| 1 | +### Matmul performance comparison between Blosc2 and PyTorch with persistent storage |
| 2 | +import numpy as np |
| 3 | +import blosc2 |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +import torch |
| 6 | +import pickle |
| 7 | +from time import time |
| 8 | +import h5py |
| 9 | +import hdf5plugin |
| 10 | +from tqdm import tqdm # progress bar (pip install tqdm) |
| 11 | +import os |
| 12 | +cparams = { |
| 13 | + "codec": blosc2.Codec.LZ4, |
| 14 | + "filters": [blosc2.Filter.SHUFFLE], |
| 15 | + "clevel": 1, |
| 16 | +} |
| 17 | +batch_size = 32 |
| 18 | +CREATE = True |
| 19 | +dtype = np.float32 |
| 20 | +if CREATE: |
| 21 | + def build_dense_rowwarp_matrix(out_h=2000, in_h=2167, |
| 22 | + scale=1.0, |
| 23 | + ripple_amplitude=30.0, |
| 24 | + ripple_period=400.0, |
| 25 | + blur_radius=1, |
| 26 | + row_gain_amplitude=0.15): |
| 27 | + """ |
| 28 | + Same function as before — builds a vertical warp matrix A of shape (out_h, in_h) |
| 29 | + that can be applied as A @ img. |
| 30 | + """ |
| 31 | + A = np.zeros((out_h, in_h), dtype=dtype) |
| 32 | + i = np.arange(out_h, dtype=dtype) |
| 33 | + t = i / max(out_h - 1, 1) |
| 34 | + linear_src = t * (in_h - 1) * scale |
| 35 | + ripple = ripple_amplitude * np.sin(2.0 * np.pi * i / ripple_period) |
| 36 | + src = linear_src + ripple |
| 37 | + row_gain = 1.0 + row_gain_amplitude * np.cos(2.0 * np.pi * i / (ripple_period * 0.5)) |
| 38 | + for out_r in range(out_h): |
| 39 | + s = src[out_r] |
| 40 | + k_min = int(np.floor(s)) - blur_radius |
| 41 | + k_max = int(np.floor(s)) + blur_radius + 1 |
| 42 | + k_min_clamped = max(k_min, 0) |
| 43 | + k_max_clamped = min(k_max, in_h - 1) + 1 |
| 44 | + ks = np.arange(k_min_clamped, k_max_clamped, dtype=np.int32) |
| 45 | + d = np.abs(ks - s) |
| 46 | + w = np.maximum(0.0, 1.0 - d / (blur_radius + 1e-6)) |
| 47 | + if w.sum() > 0: |
| 48 | + w = w / w.sum() |
| 49 | + w = w * row_gain[out_r] |
| 50 | + A[out_r, ks] = w.astype(dtype) |
| 51 | + return A |
| 52 | + |
| 53 | + NUM_IMAGES = 2000 |
| 54 | + IN_H, OUT_H, W = 2167, 2000, 2070 |
| 55 | + |
| 56 | + out = blosc2.empty(shape=(NUM_IMAGES, OUT_H, IN_H), dtype=dtype, urlpath="transform.b2nd", mode='w', cparams=cparams) |
| 57 | + |
| 58 | + for i in tqdm(range(NUM_IMAGES), desc="Generating and saving transform matrices to Blosc2"): |
| 59 | + # Randomize warp parameters a little per image |
| 60 | + ripple_amp = 20 + np.random.uniform(-5, 5) |
| 61 | + ripple_period = 300 + np.random.uniform(-30, 30) |
| 62 | + row_gain_amp = 0.10 + np.random.uniform(-0.05, 0.05) |
| 63 | + blur_r = np.random.choice([0, 1, 2]) |
| 64 | + |
| 65 | + # Build and apply matrix |
| 66 | + A = build_dense_rowwarp_matrix(out_h=OUT_H, in_h=IN_H, |
| 67 | + ripple_amplitude=ripple_amp, |
| 68 | + ripple_period=ripple_period, |
| 69 | + blur_radius=blur_r, |
| 70 | + row_gain_amplitude=row_gain_amp) |
| 71 | + out[i] = A |
| 72 | + |
| 73 | + |
| 74 | + |
| 75 | + fname_in = "kevlar.h5" # input file with the kevlar dataset |
| 76 | + fname_out = "my_kevlar.h5" |
| 77 | + b2im = blosc2.empty(shape=(2000, 2167, 2070), dtype=dtype, cparams=cparams, urlpath="kevlar.b2nd", mode="w") |
| 78 | + with h5py.File(fname_in, "r") as fr: # load file and process to blosc2 array |
| 79 | + dset = fr["/entry/data/data"][:] |
| 80 | + b2im[:1000] = dset |
| 81 | + b2im[1000:] = dset |
| 82 | + del dset |
| 83 | + print("Saved data to Blosc2.") |
| 84 | + |
| 85 | + b2im = blosc2.open(urlpath="kevlar.b2nd", mode="r") |
| 86 | + b2im_trans = blosc2.open(urlpath="transform.b2nd", mode="r") |
| 87 | + s, d = b2im.shape, b2im.dtype |
| 88 | + # Write to .h5 file # |
| 89 | + with h5py.File(fname_out, "w") as fw: |
| 90 | + b2comp = hdf5plugin.Blosc2(cname='lz4', clevel=1, filters=hdf5plugin.Blosc2.SHUFFLE) # just for identification, no compression algorithm specified |
| 91 | + dset_out1 = fw.create_dataset( |
| 92 | + "data", |
| 93 | + b2im.shape, b2im.dtype, |
| 94 | + **b2comp, |
| 95 | + ) |
| 96 | + dset_out2 = fw.create_dataset( |
| 97 | + "transform", |
| 98 | + b2im_trans.shape, b2im_trans.dtype, |
| 99 | + **b2comp, |
| 100 | + ) |
| 101 | + # Write individual blosc2 chunks directly to hdf5 |
| 102 | + # hdf5 requires a cframe, which is only available via blosc2 schunks (not chunks) |
| 103 | + for i in tqdm(range(len(b2im), batch_size), desc="Converting transform and data matrices to HDF5"): |
| 104 | + dset_out1[i:i+32] = b2im[i:i+batch_size] |
| 105 | + dset_out2[i:i+32] = b2im_trans[i:i+batch_size] |
| 106 | + |
| 107 | + |
| 108 | +# Re-open the arrays |
| 109 | +dset_a = blosc2.open("transform.b2nd", mode="r") |
| 110 | +dset_b = blosc2.open("kevlar.b2nd", mode="r") |
| 111 | +print(f'Total working set size: {round((np.prod(dset_a.shape) + np.prod(dset_a.shape[:-1]+dset_b.shape[-1:]) |
| 112 | + + np.prod(dset_b.shape)) * dtype.itemsize / 2 ** 30, 1)} GB.') |
| 113 | + |
| 114 | +# --- Matmul Blosc2 --- |
| 115 | +t0 = time() |
| 116 | +out_blosc = blosc2.matmul(dset_a, dset_b, urlpath='out.b2nd', mode="w", cparams=cparams) |
| 117 | +blosc_time = time() - t0 |
| 118 | +chunks_blosc = [dset_a.chunks, dset_b.chunks] |
| 119 | +chunks_blosc_out = out_blosc.chunks |
| 120 | +in_shapes = [dset_a.shape, dset_b.shape] |
| 121 | +print(f"Blosc2 Performance = {blosc_time:.2f} s") |
| 122 | + |
| 123 | +h5compressor = hdf5plugin.Blosc2(cname='lz4', clevel=1, filters=hdf5plugin.Blosc2.SHUFFLE) |
| 124 | +t0 = time() |
| 125 | +f = h5py.File("my_kevlar.h5", "r+") |
| 126 | +if not ("out" in f): |
| 127 | + f.create_dataset("out", shape=out_blosc.shape, dtype=out_blosc.dtype, **h5compressor) |
| 128 | +# Re-open the HDF5 arrays |
| 129 | +t0 = time() |
| 130 | +with h5py.File("my_kevlar.h5", "r+") as f: |
| 131 | + dset_a = f["transform"] |
| 132 | + dset_b = f["data"] |
| 133 | + dset_out = f["out"] |
| 134 | + |
| 135 | + for i in tqdm(range(0, len(dset_a), batch_size), desc="PyTorch Matmul"): # batch of 32 |
| 136 | + batch_a = torch.from_numpy(dset_a[i:i+batch_size]) # NumPy array slice |
| 137 | + batch_b = torch.from_numpy(dset_b[i:i+batch_size]) # NumPy array slice |
| 138 | + dset_out[i:i+batch_size] = torch.matmul(batch_a, batch_b) |
| 139 | + hdf5_chunks = [dset_a.chunks, dset_b.chunks] |
| 140 | + hdf5_chunks_out = dset_out.chunks |
| 141 | +torch_time = time() - t0 |
| 142 | +print(f"PyTorch Performance = {torch_time:.2f} s") |
| 143 | + |
| 144 | +results = {'blosc_chunks_out': chunks_blosc_out, 'blosc_chunks': chunks_blosc, |
| 145 | + 'hdf5_chunks_out': hdf5_chunks_out, 'hdf5_chunks': hdf5_chunks, |
| 146 | + 'ABshape': in_shapes, 'dtype': out_blosc.dtype, 'PyTorch': torch_time, 'Blosc2': blosc_time} |
| 147 | +fname = 'matmul_OOC' |
| 148 | +with open(f'{fname}.pkl', 'wb') as f: |
| 149 | + pickle.dump(results, f) |
0 commit comments