Skip to content

Commit c76e4a4

Browse files
committed
Add a plot for the different concatenate methods
1 parent d26ba2b commit c76e4a4

1 file changed

Lines changed: 144 additions & 13 deletions

File tree

bench/ndarray/concatenate.py

Lines changed: 144 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
import numpy as np
1010
import blosc2
1111
import time
12+
import matplotlib.pyplot as plt
13+
import os
14+
from matplotlib.ticker import ScalarFormatter
1215

1316

1417
def run_benchmark(num_arrays=10, size=500, aligned_chunks=False, axis=0):
@@ -20,6 +23,11 @@ def run_benchmark(num_arrays=10, size=500, aligned_chunks=False, axis=0):
2023
- size: Base size for array dimensions
2124
- aligned_chunks: Whether to use aligned chunk shapes
2225
- axis: Axis along which to concatenate (0 or 1)
26+
27+
Returns:
28+
- duration: Time taken in seconds
29+
- result_shape: Shape of the resulting array
30+
- data_size_gb: Size of data processed in GB
2331
"""
2432
if axis == 0:
2533
# For concatenating along axis 0, the second dimension must be consistent
@@ -46,12 +54,16 @@ def run_benchmark(num_arrays=10, size=500, aligned_chunks=False, axis=0):
4654
)
4755
arrays.append(arr)
4856

57+
# Calculate total data size in GB (4 bytes per int32)
58+
total_elements = sum(np.prod(shape) for shape in shapes)
59+
data_size_gb = total_elements * 4 / (1024**3) # Convert bytes to GB
60+
4961
# Time the concatenation
5062
start_time = time.time()
5163
result = blosc2.concatenate(arrays, axis=axis)
5264
duration = time.time() - start_time
5365

54-
return duration, result.shape
66+
return duration, result.shape, data_size_gb
5567

5668

5769
def run_numpy_benchmark(num_arrays=10, size=500, axis=0):
@@ -62,6 +74,11 @@ def run_numpy_benchmark(num_arrays=10, size=500, axis=0):
6274
- num_arrays: Number of arrays to concatenate
6375
- size: Base size for array dimensions
6476
- axis: Axis along which to concatenate (0 or 1)
77+
78+
Returns:
79+
- duration: Time taken in seconds
80+
- result_shape: Shape of the resulting array
81+
- data_size_gb: Size of data processed in GB
6582
"""
6683
if axis == 0:
6784
# For concatenating along axis 0, the second dimension must be consistent
@@ -83,12 +100,96 @@ def run_numpy_benchmark(num_arrays=10, size=500, axis=0):
83100
).reshape(shape)
84101
numpy_arrays.append(arr)
85102

103+
# Calculate total data size in GB (4 bytes per int32)
104+
total_elements = sum(np.prod(shape) for shape in shapes)
105+
data_size_gb = total_elements * 4 / (1024**3) # Convert bytes to GB
106+
86107
# Time the concatenation
87108
start_time = time.time()
88109
result = np.concatenate(numpy_arrays, axis=axis)
89110
duration = time.time() - start_time
90111

91-
return duration, result.shape
112+
return duration, result.shape, data_size_gb
113+
114+
115+
def create_combined_plot(sizes, numpy_speeds_axis0, unaligned_speeds_axis0, aligned_speeds_axis0,
116+
numpy_speeds_axis1, unaligned_speeds_axis1, aligned_speeds_axis1, output_dir="plots"):
117+
"""
118+
Create a figure with two side-by-side bar plots comparing the performance for both axes.
119+
120+
Parameters:
121+
- sizes: List of array sizes
122+
- *_speeds_axis0: Lists of speeds (GB/s) for axis 0 concatenation
123+
- *_speeds_axis1: Lists of speeds (GB/s) for axis 1 concatenation
124+
- output_dir: Directory to save the plot
125+
"""
126+
# Create output directory if it doesn't exist
127+
os.makedirs(output_dir, exist_ok=True)
128+
129+
# Set up the figure with two subplots side by side
130+
fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(20, 8), sharey=True)
131+
132+
# Convert sizes to strings for the x-axis
133+
x_labels = [str(size) for size in sizes]
134+
x = np.arange(len(sizes))
135+
width = 0.25
136+
137+
# Create bars for axis 0 plot
138+
rect1_axis0 = ax0.bar(x - width, numpy_speeds_axis0, width, label='NumPy', color='#1f77b4')
139+
rect2_axis0 = ax0.bar(x, unaligned_speeds_axis0, width, label='Blosc2 Unaligned', color='#ff7f0e')
140+
rect3_axis0 = ax0.bar(x + width, aligned_speeds_axis0, width, label='Blosc2 Aligned', color='#2ca02c')
141+
142+
# Create bars for axis 1 plot
143+
rect1_axis1 = ax1.bar(x - width, numpy_speeds_axis1, width, label='NumPy', color='#1f77b4')
144+
rect2_axis1 = ax1.bar(x, unaligned_speeds_axis1, width, label='Blosc2 Unaligned', color='#ff7f0e')
145+
rect3_axis1 = ax1.bar(x + width, aligned_speeds_axis1, width, label='Blosc2 Aligned', color='#2ca02c')
146+
147+
# Add labels and titles
148+
for ax, axis in [(ax0, 0), (ax1, 1)]:
149+
ax.set_xlabel('Array Size (N for NxN array)', fontsize=12)
150+
ax.set_title(f'Concatenation Performance (axis={axis})', fontsize=14)
151+
ax.set_xticks(x)
152+
ax.set_xticklabels(x_labels)
153+
ax.grid(True, axis='y', linestyle='--', alpha=0.7)
154+
ax.yaxis.set_major_formatter(ScalarFormatter(useOffset=False))
155+
156+
# Add legend inside each plot
157+
ax.legend(title="Concatenation Methods",
158+
loc='upper left',
159+
fontsize=12,
160+
frameon=True,
161+
facecolor='white',
162+
edgecolor='black',
163+
framealpha=0.8)
164+
165+
# Add y-label only to the left subplot
166+
ax0.set_ylabel('Throughput (GB/s)', fontsize=12)
167+
168+
# Add value labels on top of the bars
169+
def autolabel(rects, ax):
170+
for rect in rects:
171+
height = rect.get_height()
172+
ax.annotate(f'{height:.2f} GB/s',
173+
xy=(rect.get_x() + rect.get_width() / 2, height),
174+
xytext=(0, 3), # 3 points vertical offset
175+
textcoords="offset points",
176+
ha='center', va='bottom', rotation=90, fontsize=8)
177+
178+
autolabel(rect1_axis0, ax0)
179+
autolabel(rect2_axis0, ax0)
180+
autolabel(rect3_axis0, ax0)
181+
182+
autolabel(rect1_axis1, ax1)
183+
autolabel(rect2_axis1, ax1)
184+
autolabel(rect3_axis1, ax1)
185+
186+
# Save the plot
187+
plt.tight_layout()
188+
plt.savefig(os.path.join(output_dir, 'concatenate_benchmark_combined.png'), dpi=300)
189+
plt.show()
190+
plt.close()
191+
192+
print(f"Combined plot saved to {os.path.join(output_dir, 'concatenate_benchmark_combined.png')}")
92193

93194

94195
def main():
@@ -100,24 +201,47 @@ def main():
100201
sizes = [500, 1000, 2000, 4000] # must be divisible by 4 for aligned chunks
101202
num_arrays = 10
102203

204+
# Lists to store results for both axes
205+
numpy_speeds_axis0 = []
206+
unaligned_speeds_axis0 = []
207+
aligned_speeds_axis0 = []
208+
numpy_speeds_axis1 = []
209+
unaligned_speeds_axis1 = []
210+
aligned_speeds_axis1 = []
211+
103212
for axis in [0, 1]:
104213
print(f"\nConcatenating {num_arrays} arrays along axis {axis}")
105-
print(f"{'Size':<10} {'NumPy':<10} {'Unaligned':<10} {'Aligned':<10} {'Alig vs Unalig':<16} {'Speedup vs NumPy':<16}")
106-
print(f"{'-' * 80}")
214+
print(f"{'Size':<10} {'NumPy (GB/s)':<14} {'Unaligned (GB/s)':<18} {'Aligned (GB/s)':<16} {'Alig vs Unalig':<16} {'Aligned vs NumPy':<16}")
215+
print(f"{'-' * 90}")
107216

108217
for size in sizes:
109218
# Run the benchmarks
110-
numpy_time, numpy_shape = run_numpy_benchmark(num_arrays, size, axis=axis)
111-
unaligned_time, shape1 = run_benchmark(num_arrays, size, aligned_chunks=False, axis=axis)
112-
aligned_time, shape2 = run_benchmark(num_arrays, size, aligned_chunks=True, axis=axis)
219+
numpy_time, numpy_shape, data_size_gb = run_numpy_benchmark(num_arrays, size, axis=axis)
220+
unaligned_time, shape1, _ = run_benchmark(num_arrays, size, aligned_chunks=False, axis=axis)
221+
aligned_time, shape2, _ = run_benchmark(num_arrays, size, aligned_chunks=True, axis=axis)
113222

114-
# Calculate speedups
115-
numpy_vs_aligned = numpy_time / aligned_time if aligned_time > 0 else float("inf")
116-
aligned_vs_unaligned = unaligned_time / aligned_time if aligned_time > 0 else float("inf")
223+
# Calculate throughputs in GB/s
224+
numpy_speed = data_size_gb / numpy_time if numpy_time > 0 else float("inf")
225+
unaligned_speed = data_size_gb / unaligned_time if unaligned_time > 0 else float("inf")
226+
aligned_speed = data_size_gb / aligned_time if aligned_time > 0 else float("inf")
227+
228+
# Store speeds in the appropriate list
229+
if axis == 0:
230+
numpy_speeds_axis0.append(numpy_speed)
231+
unaligned_speeds_axis0.append(unaligned_speed)
232+
aligned_speeds_axis0.append(aligned_speed)
233+
else:
234+
numpy_speeds_axis1.append(numpy_speed)
235+
unaligned_speeds_axis1.append(unaligned_speed)
236+
aligned_speeds_axis1.append(aligned_speed)
237+
238+
# Calculate speedup ratios
239+
aligned_vs_unaligned = aligned_speed / unaligned_speed if unaligned_speed > 0 else float("inf")
240+
aligned_vs_numpy = aligned_speed / numpy_speed if numpy_speed > 0 else float("inf")
117241

118242
# Print results
119-
print(f"{size:<10} {numpy_time:<10.4f} {unaligned_time:<10.4f} {aligned_time:<10.4f} "
120-
f"{aligned_vs_unaligned:>10.2f}x {numpy_vs_aligned:>10.2f}x")
243+
print(f"{size:<10} {numpy_speed:<14.2f} {unaligned_speed:<18.2f} {aligned_speed:<16.2f} "
244+
f"{aligned_vs_unaligned:>10.2f}x {aligned_vs_numpy:>10.2f}x")
121245

122246
# Quick verification of result shape
123247
if axis == 0:
@@ -132,7 +256,14 @@ def main():
132256
if shapes[i] != expected_shape:
133257
print(f"Warning: {shape_name} shape {shapes[i]} does not match expected {expected_shape}")
134258

135-
print(f"{'=' * 80}")
259+
print(f"{'=' * 90}")
260+
261+
# Create the combined plot with both axes
262+
create_combined_plot(
263+
sizes,
264+
numpy_speeds_axis0, unaligned_speeds_axis0, aligned_speeds_axis0,
265+
numpy_speeds_axis1, unaligned_speeds_axis1, aligned_speeds_axis1
266+
)
136267

137268

138269
if __name__ == "__main__":

0 commit comments

Comments
 (0)