11### Matmul performance comparison between Blosc2 and PyTorch with persistent storage
2+ # It is important to force numpy to use mkl as it can speed up the
3+ # blosc2 matmul (which uses np.matmul as a backend) by a factor of 2x:
4+ # conda install numpy mkl
5+ # To download the kevlar.h5 dataset use:
6+ # curl http://www.silx.org/pub/pyFAI/pyFAI_UM_2020/data_ID13/kevlar.h5 --output "kevlar.h5"
27import numpy as np
38import blosc2
49import matplotlib .pyplot as plt
@@ -70,21 +75,24 @@ def build_dense_rowwarp_matrix(out_h=2000, in_h=2167,
7075 row_gain_amplitude = row_gain_amp )
7176 out [i ] = A
7277
73-
74-
7578 fname_in = "kevlar.h5" # input file with the kevlar dataset
76- fname_out = "my_kevlar.h5"
77- b2im = blosc2 .empty (shape = (2000 , 2167 , 2070 ), dtype = dtype , cparams = cparams , urlpath = "kevlar.b2nd" , mode = "w" )
7879 with h5py .File (fname_in , "r" ) as fr : # load file and process to blosc2 array
79- dset = fr ["/entry/data/data" ][:]
80- b2im [:1000 ] = dset
81- b2im [1000 :] = dset
80+ dset = fr ["/entry/data/data" ]
81+ b2im = blosc2 .empty (shape = (2 * len (dset ), 2167 , 2070 ), dtype = dtype , cparams = cparams , urlpath = "kevlar.b2nd" , mode = "w" )
82+ for i in tqdm (range (0 , len (dset ), batch_size ), desc = "Converting data matrices to Blosc2" ):
83+ end = min ((i + batch_size ), len (dset ))
84+ res = dset [i :end ]
85+ res = np .where (res > 10 , 0 , res )
86+ # For visibility, zero-out pixels
87+ b2im [i :end ] = res
88+ b2im [i + 1000 , end + 1000 ] = res
8289 del dset
8390 print ("Saved data to Blosc2." )
8491
8592 b2im = blosc2 .open (urlpath = "kevlar.b2nd" , mode = "r" )
8693 b2im_trans = blosc2 .open (urlpath = "transform.b2nd" , mode = "r" )
8794 s , d = b2im .shape , b2im .dtype
95+ fname_out = "my_kevlar.h5"
8896 # Write to .h5 file #
8997 with h5py .File (fname_out , "w" ) as fw :
9098 b2comp = hdf5plugin .Blosc2 (cname = 'lz4' , clevel = 1 , filters = hdf5plugin .Blosc2 .SHUFFLE ) # just for identification, no compression algorithm specified
@@ -98,18 +106,15 @@ def build_dense_rowwarp_matrix(out_h=2000, in_h=2167,
98106 b2im_trans .shape , b2im_trans .dtype ,
99107 ** b2comp ,
100108 )
101- # Write individual blosc2 chunks directly to hdf5
102- # hdf5 requires a cframe, which is only available via blosc2 schunks (not chunks)
103- for i in tqdm (range (len (b2im ), batch_size ), desc = "Converting transform and data matrices to HDF5" ):
104- dset_out1 [i :i + 32 ] = b2im [i :i + batch_size ]
105- dset_out2 [i :i + 32 ] = b2im_trans [i :i + batch_size ]
109+ for i in tqdm (range (0 , len (b2im ), batch_size ), desc = "Converting transform and data matrices to HDF5" ):
110+ dset_out1 [i :i + batch_size ] = b2im [i :i + batch_size ]
111+ dset_out2 [i :i + batch_size ] = b2im_trans [i :i + batch_size ]
106112
107113
108114# Re-open the arrays
109115dset_a = blosc2 .open ("transform.b2nd" , mode = "r" )
110116dset_b = blosc2 .open ("kevlar.b2nd" , mode = "r" )
111- print (f'Total working set size: { round ((np .prod (dset_a .shape ) + np .prod (dset_a .shape [:- 1 ]+ dset_b .shape [- 1 :])
112- + np .prod (dset_b .shape )) * dtype .itemsize / 2 ** 30 , 1 )} GB.' )
117+ print (f'Total working set size: { round ((np .prod (dset_a .shape )/ 2 ** 30 + np .prod (dset_a .shape [:- 1 ]+ dset_b .shape [- 1 :])/ 2 ** 30 + np .prod (dset_b .shape )/ 2 ** 30 ) * dset_b .dtype .itemsize , 1 )} GB.' )
113118
114119# --- Matmul Blosc2 ---
115120t0 = time ()
@@ -132,7 +137,7 @@ def build_dense_rowwarp_matrix(out_h=2000, in_h=2167,
132137 dset_b = f ["data" ]
133138 dset_out = f ["out" ]
134139
135- for i in tqdm (range (0 , len (dset_a ), batch_size ), desc = "PyTorch Matmul" ): # batch of 32
140+ for i in tqdm (range (0 , len (dset_out ), batch_size ), desc = "PyTorch Matmul" ): # batch of 32
136141 batch_a = torch .from_numpy (dset_a [i :i + batch_size ]) # NumPy array slice
137142 batch_b = torch .from_numpy (dset_b [i :i + batch_size ]) # NumPy array slice
138143 dset_out [i :i + batch_size ] = torch .matmul (batch_a , batch_b )
0 commit comments