Skip to content

Commit f630eaa

Browse files
committed
Require chunks to be equal for Blosc2, Zarr and HDF5
1 parent 6cb1be2 commit f630eaa

2 files changed

Lines changed: 32 additions & 17 deletions

File tree

bench/ndarray/fancy_index.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ def genarray(r, ndims=2, verbose=True):
3737
chunks = (d // 4,) * ndims
3838
blocks = (max(d // 10, 1),) * ndims
3939
t = time.time()
40-
arr = blosc2.linspace(0, 1000, num=np.prod(shape), shape=shape, dtype=np.float64, urlpath=f'linspace{r}{ndims}D.b2nd', mode='w')
40+
arr = blosc2.linspace(0, 1000, num=np.prod(shape), shape=shape, dtype=np.float64,
41+
urlpath=f'linspace{r}{ndims}D.b2nd', mode='w')
4142
t = time.time() - t
4243
arrsize = np.prod(arr.shape) * arr.dtype.itemsize / 2 ** 30
4344
if verbose:
@@ -48,6 +49,7 @@ def genarray(r, ndims=2, verbose=True):
4849

4950

5051
target_sizes = np.int64(np.array([1, 2, 4, 8, 16, 24, 32]))
52+
# target_sizes = np.int64(np.array([1, 2, 4, 8])) # for quick testing
5153
rng = np.random.default_rng()
5254
blosctimes = []
5355
nptimes = []
@@ -97,23 +99,29 @@ def timer(arr):
9799
if NUMPY:
98100
nptimes += [timer(nparr)]
99101
if ZARR:
100-
z_test = zarr.create_array(store='data/example.zarr', shape=nparr.shape, dtype=nparr.dtype, overwrite=True)
102+
z_test = zarr.create_array(store='data/example.zarr', shape=arr.shape, chunks=arr.chunks,
103+
dtype=nparr.dtype, overwrite=True)
101104
z_test[:] = nparr
102105
zarrtimes += [timer(z_test)]
103106
if HDF5:
104107
with h5py.File('my_hdf5_file.h5', 'w') as f:
105-
dset = f.create_dataset("init", data=nparr)
108+
dset = f.create_dataset("init", data=nparr, chunks=arr.chunks)
106109
h5pytimes += [timer(dset)]
107110

108111
blosctimes = np.array(blosctimes)
109112
nptimes = np.array(nptimes)
110113
zarrtimes = np.array(zarrtimes)
111114
h5pytimes = np.array(h5pytimes)
112115
labs=''
113-
result_tuple = (["Numpy",nptimes,-2*width],["Blosc2",blosctimes, -width],["Zarr",zarrtimes, 0],["HDF5",h5pytimes, width])
116+
width = 0.2
117+
result_tuple = (
118+
["Numpy", nptimes, -2 * width],
119+
["Blosc2", blosctimes, -width],
120+
["Zarr", zarrtimes, 0],
121+
["HDF5", h5pytimes, width]
122+
)
114123

115124
x = np.arange(len(genuine_sizes))
116-
width = 0.2
117125
# Create barplot for Numpy vs Blosc vs Zarr vs H5py
118126
for i, r in enumerate(result_tuple):
119127
if r[1].shape != (0,):
@@ -125,7 +133,7 @@ def timer(arr):
125133
error_kw=dict(lw=2, capthick=2, ecolor='k'))
126134
labs+=label
127135

128-
filename = "results{labs}{NDIMS}D"
136+
filename = f"results{labs}{NDIMS}D"
129137

130138
with open(f"{filename}.pkl", 'wb') as f:
131139
pickle.dump(result_tuple, f)
@@ -134,7 +142,7 @@ def timer(arr):
134142
plt.legend()
135143
plt.xticks(x-width, np.round(genuine_sizes, 2))
136144
plt.ylabel("Time (s)")
137-
plt.title('Fancy indexing performance comparison, {NDIMS}D')
145+
plt.title(f"Fancy indexing performance comparison, {NDIMS}D")
138146
plt.gca().set_yscale('log')
139147
plt.savefig(f'plots/fancyIdx{filename}.png', format="png")
140148
plt.show()

bench/ndarray/fancy_index1D.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import h5py
1818
import pickle
1919
import os
20+
2021
plt.rcParams.update({'text.usetex':False,'font.serif': ['cm'],'font.size':16})
2122
plt.rcParams['figure.dpi'] = 300
2223
plt.rcParams['savefig.dpi'] = 300
@@ -26,7 +27,7 @@
2627

2728
NUMPY = True
2829
BLOSC = True
29-
ZARR = False
30+
ZARR = True
3031
HDF5 = True
3132
SPARSE = True
3233

@@ -75,35 +76,41 @@ def timer(arr):
7576

7677
nparr = arr[:]
7778
if BLOSC:
78-
blosctimes += [timer(arr, row=idx, col=idx)]
79+
blosctimes += [timer(arr)]
7980
if NUMPY:
80-
nptimes += [timer(nparr, row=idx, col=idx)]
81+
nptimes += [timer(nparr)]
8182
if ZARR:
82-
z_test = zarr.create_array(store='data/example.zarr', shape=nparr.shape, dtype=nparr.dtype, overwrite=True)
83+
z_test = zarr.create_array(store='data/example.zarr', shape=arr.shape, chunks=arr.chunks,
84+
dtype=nparr.dtype, overwrite=True)
8385
z_test[:] = nparr
84-
zarrtimes += [timer(z_test, row=idx, col=idx)]
86+
zarrtimes += [timer(z_test)]
8587
if HDF5:
8688
with h5py.File('my_hdf5_file.h5', 'w') as f:
87-
dset = f.create_dataset("init", data=nparr)
89+
dset = f.create_dataset("init", data=nparr, chunks=arr.chunks)
8890
h5pytimes += [timer(dset)]
8991

9092
blosctimes = np.array(blosctimes)
9193
nptimes = np.array(nptimes)
9294
zarrtimes = np.array(zarrtimes)
9395
h5pytimes = np.array(h5pytimes)
9496
labs=''
95-
result_tuple = (["Numpy",nptimes,-2*width],["Blosc2",blosctimes, -width],["Zarr",zarrtimes, 0],["HDF5",h5pytimes, width])
97+
width = 0.2
98+
result_tuple = (
99+
["Numpy", nptimes, -2 * width],
100+
["Blosc2", blosctimes, -width],
101+
["Zarr", zarrtimes, 0],
102+
["HDF5", h5pytimes, width]
103+
)
96104

97105
x = np.arange(len(genuine_sizes))
98-
width = 0.2
99106
# Create barplot for Numpy vs Blosc vs Zarr vs H5py
100107
for i, r in enumerate(result_tuple):
101108
if r[1].shape != (0,):
102109
label, times, w = r
103110
c = ['b', 'r', 'g', 'm'][i]
104111
mean = times.mean(axis=1)
105112
err = (mean - times.min(axis=1), times.max(axis=1)-mean)
106-
plt.bar(x + w, mean , width, color=c, label=label, yerr=err, capsize=5, ecolor='k',
113+
plt.bar(x + w, mean, width, color=c, label=label, yerr=err, capsize=5, ecolor='k',
107114
error_kw=dict(lw=2, capthick=2, ecolor='k'))
108115
labs+=label
109116

@@ -115,7 +122,7 @@ def timer(arr):
115122
plt.legend()
116123
plt.xticks(x-width, np.round(genuine_sizes, 2))
117124
plt.ylabel("Time (s)")
118-
plt.title('Fancy indexing performance comparison, 1D' + {" sparse" if SPARSE else ""})
125+
plt.title(f"Fancy indexing performance comparison, 1D {' sparse' if SPARSE else ''}")
119126
plt.gca().set_yscale('log')
120127
plt.savefig(f'plots/{filename}.png', format="png")
121128
plt.show()

0 commit comments

Comments
 (0)