Skip to content

Commit 0c9af6a

Browse files
committed
Remove optimisation and finish benchmark
1 parent 6c15f8e commit 0c9af6a

2 files changed

Lines changed: 21 additions & 18 deletions

File tree

bench/ndarray/fancy_index.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,28 @@
1616
import zarr
1717
import h5py
1818
import pickle
19+
import os
1920
plt.rcParams.update({'text.usetex':False,'font.serif': ['cm'],'font.size':16})
2021
plt.rcParams['figure.dpi'] = 1000
2122
plt.rcParams['savefig.dpi'] = 1000
2223
plt.rc('text', usetex=False)
2324
plt.rc('font',**{'serif':['cm']})
2425
plt.style.use('seaborn-v0_8-paper')
2526

26-
NUMPY_BLOSC = True
27-
NUMPY_BLOSC_ZARR = False
27+
NUMPY_BLOSC = True # activate NUMPY and BLOSC tests
28+
NUMPY_BLOSC_ZARR = False # activate NUMPY, BLOSC and Zarr tests
29+
# default if both are false is to run tests for Numpy, Blosc, Zarr and HDF5
2830

2931
def genarray(r, ndims=1, verbose=True):
3032
d = int((r*2**30/8)**(1/ndims))
3133
shape = (d,) * ndims
3234
chunks = (d // 4,) * ndims
3335
blocks = (max(d // 10, 1),) * ndims
3436
t = time.time()
35-
arr = blosc2.ones(shape=shape, dtype=np.int64)
37+
if os.path.exists(f'linspace{r}.b2nd'):
38+
arr = blosc2.open(urlpath=f'linspace{r}.b2nd')
39+
else:
40+
arr = blosc2.linspace(0, 1000, num=np.prod(shape), shape=shape, dtype=np.float64, urlpath=f'linspace{r}.b2nd', mode='w')
3641
t = time.time() - t
3742
if verbose:
3843
print(f"Array shape: {arr.shape}")
@@ -41,15 +46,15 @@ def genarray(r, ndims=1, verbose=True):
4146
return arr
4247

4348

44-
sizes = np.int64(np.array([1, 2, 4, 8, 16]))
49+
sizes = np.int64(np.array([1, 2, 4, 8, 16, 24]))
4550
rng = np.random.default_rng()
4651
blosctimes = []
4752
nptimes = []
4853
zarrtimes = []
4954
h5pytimes = []
5055
x = np.arange(len(sizes))
5156
width = 0.2
52-
labs = 'NumpyBlosc2' if NUMPY_BLOSC else 'NumpyBlosc2ZarrHDF5'
57+
labs = 'NumpyBlosc2' if NUMPY_BLOSC else 'NumpyBlosc2ZarrHDF5'
5358
labs = 'NumpyBlosc2Zarr' if NUMPY_BLOSC_ZARR else labs
5459
try:
5560
with open(f"results{labs}.pkl", 'rb') as f:
@@ -118,8 +123,8 @@ def timer(arr, row=row, col=col):
118123
z_test[:] = arr
119124
zarrtimes += [timer(z_test)]
120125
with h5py.File('my_hdf5_file.h5', 'w') as f:
121-
dset = f.create_dataset("init", data=arr)
122-
h5pytimes += [timer(dset)]
126+
dset = f.create_dataset("init", data=arr)
127+
h5pytimes += [timer(dset)]
123128

124129
blosctimes = np.array(blosctimes)
125130
nptimes = np.array(nptimes)
@@ -135,8 +140,8 @@ def timer(arr, row=row, col=col):
135140
c = ['b', 'r', 'g', 'm'][i]
136141
mean = times.mean(axis=1)
137142
err = (mean - times.min(axis=1), times.max(axis=1)-mean)
138-
plt.bar(x + w, mean , width, color=c, label=label, yerr=err, capsize=5, ecolor='k',
139-
error_kw=dict(lw=2, capthick=2, ecolor='k'))
143+
plt.bar(x + w, mean , width, color=c, label=label, yerr=err, capsize=5, ecolor='k',
144+
error_kw=dict(lw=2, capthick=2, ecolor='k'))
140145
labs+=label
141146

142147
with open(f"results{labs}.pkl", 'wb') as f:
@@ -147,7 +152,8 @@ def timer(arr, row=row, col=col):
147152
plt.xticks(x-width, np.round(sizes, 2))
148153
plt.ylabel("Time (s)")
149154
plt.title('Fancy indexing performance comparison')
150-
plt.ylim([0,10])
155+
# plt.ylim([0,10])
156+
plt.gca().set_yscale('log')
151157
plt.savefig(f'plots/fancyIdx{labs}.png', format="png")
152158
plt.show()
153159

src/blosc2/ndarray.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,9 +1442,6 @@ def T(self):
14421442
def get_fselection_numpy(self, key):
14431443
# TODO: Make this faster for broadcasted keys
14441444
shape = self.shape
1445-
if math.prod(shape) * self.dtype.itemsize < blosc2.MAX_FAST_PATH_SIZE:
1446-
# just load whole array into memory and do numpy indexing
1447-
return self[:][key]
14481445
chunks = self.chunks
14491446
_slice = ndindex.ndindex(key).expand(shape)
14501447
chunk_size = ndindex.ChunkSize(chunks)
@@ -1466,20 +1463,20 @@ def get_fselection_numpy(self, key):
14661463
return out
14671464

14681465
def get_oselection_numpy(self, key):
1469-
'''
1466+
"""
14701467
Select independently from self along axes specified in key. Key must be same length as self shape.
14711468
See Zarr https://zarr.readthedocs.io/en/stable/user-guide/arrays.html#orthogonal-indexing.
1472-
'''
1469+
"""
14731470
shape = tuple(len(k) for k in key) + self.shape[len(key) :]
14741471
# Create the array to store the result
14751472
arr = np.empty(shape, dtype=self.dtype)
14761473
return super().get_oindex_numpy(arr, key)
14771474

14781475
def set_oselection_numpy(self, key, arr: np.ndarray):
1479-
'''
1476+
"""
14801477
Select independently from self along axes specified in key and set to entries in arr. Key must be same length as self shape.
14811478
See Zarr https://zarr.readthedocs.io/en/stable/user-guide/arrays.html#orthogonal-indexing.
1482-
'''
1479+
"""
14831480
return super().set_oindex_numpy(key, arr)
14841481

14851482
def __getitem__( # noqa: C901
@@ -4409,7 +4406,7 @@ def __init__(self, array: NDArray):
44094406

44104407
# TODO: all this
44114408
def __getitem__(self, selection) -> np.ndarray:
4412-
return NotImplementedError
4409+
return NotImplementedError
44134410

44144411
def __setitem__(self, selection, input) -> np.ndarray:
44154412
return NotImplementedError

0 commit comments

Comments
 (0)