99import numpy as np
1010import blosc2
1111import time
12+ import matplotlib .pyplot as plt
13+ import os
14+ from matplotlib .ticker import ScalarFormatter
1215
1316
1417def run_benchmark (num_arrays = 10 , size = 500 , aligned_chunks = False , axis = 0 ):
@@ -20,6 +23,11 @@ def run_benchmark(num_arrays=10, size=500, aligned_chunks=False, axis=0):
2023 - size: Base size for array dimensions
2124 - aligned_chunks: Whether to use aligned chunk shapes
2225 - axis: Axis along which to concatenate (0 or 1)
26+
27+ Returns:
28+ - duration: Time taken in seconds
29+ - result_shape: Shape of the resulting array
30+ - data_size_gb: Size of data processed in GB
2331 """
2432 if axis == 0 :
2533 # For concatenating along axis 0, the second dimension must be consistent
@@ -46,12 +54,16 @@ def run_benchmark(num_arrays=10, size=500, aligned_chunks=False, axis=0):
4654 )
4755 arrays .append (arr )
4856
57+ # Calculate total data size in GB (4 bytes per int32)
58+ total_elements = sum (np .prod (shape ) for shape in shapes )
59+ data_size_gb = total_elements * 4 / (1024 ** 3 ) # Convert bytes to GB
60+
4961 # Time the concatenation
5062 start_time = time .time ()
5163 result = blosc2 .concatenate (arrays , axis = axis )
5264 duration = time .time () - start_time
5365
54- return duration , result .shape
66+ return duration , result .shape , data_size_gb
5567
5668
5769def run_numpy_benchmark (num_arrays = 10 , size = 500 , axis = 0 ):
@@ -62,6 +74,11 @@ def run_numpy_benchmark(num_arrays=10, size=500, axis=0):
6274 - num_arrays: Number of arrays to concatenate
6375 - size: Base size for array dimensions
6476 - axis: Axis along which to concatenate (0 or 1)
77+
78+ Returns:
79+ - duration: Time taken in seconds
80+ - result_shape: Shape of the resulting array
81+ - data_size_gb: Size of data processed in GB
6582 """
6683 if axis == 0 :
6784 # For concatenating along axis 0, the second dimension must be consistent
@@ -83,12 +100,96 @@ def run_numpy_benchmark(num_arrays=10, size=500, axis=0):
83100 ).reshape (shape )
84101 numpy_arrays .append (arr )
85102
103+ # Calculate total data size in GB (4 bytes per int32)
104+ total_elements = sum (np .prod (shape ) for shape in shapes )
105+ data_size_gb = total_elements * 4 / (1024 ** 3 ) # Convert bytes to GB
106+
86107 # Time the concatenation
87108 start_time = time .time ()
88109 result = np .concatenate (numpy_arrays , axis = axis )
89110 duration = time .time () - start_time
90111
91- return duration , result .shape
112+ return duration , result .shape , data_size_gb
113+
114+
115+ def create_combined_plot (sizes , numpy_speeds_axis0 , unaligned_speeds_axis0 , aligned_speeds_axis0 ,
116+ numpy_speeds_axis1 , unaligned_speeds_axis1 , aligned_speeds_axis1 , output_dir = "plots" ):
117+ """
118+ Create a figure with two side-by-side bar plots comparing the performance for both axes.
119+
120+ Parameters:
121+ - sizes: List of array sizes
122+ - *_speeds_axis0: Lists of speeds (GB/s) for axis 0 concatenation
123+ - *_speeds_axis1: Lists of speeds (GB/s) for axis 1 concatenation
124+ - output_dir: Directory to save the plot
125+ """
126+ # Create output directory if it doesn't exist
127+ os .makedirs (output_dir , exist_ok = True )
128+
129+ # Set up the figure with two subplots side by side
130+ fig , (ax0 , ax1 ) = plt .subplots (1 , 2 , figsize = (20 , 8 ), sharey = True )
131+
132+ # Convert sizes to strings for the x-axis
133+ x_labels = [str (size ) for size in sizes ]
134+ x = np .arange (len (sizes ))
135+ width = 0.25
136+
137+ # Create bars for axis 0 plot
138+ rect1_axis0 = ax0 .bar (x - width , numpy_speeds_axis0 , width , label = 'NumPy' , color = '#1f77b4' )
139+ rect2_axis0 = ax0 .bar (x , unaligned_speeds_axis0 , width , label = 'Blosc2 Unaligned' , color = '#ff7f0e' )
140+ rect3_axis0 = ax0 .bar (x + width , aligned_speeds_axis0 , width , label = 'Blosc2 Aligned' , color = '#2ca02c' )
141+
142+ # Create bars for axis 1 plot
143+ rect1_axis1 = ax1 .bar (x - width , numpy_speeds_axis1 , width , label = 'NumPy' , color = '#1f77b4' )
144+ rect2_axis1 = ax1 .bar (x , unaligned_speeds_axis1 , width , label = 'Blosc2 Unaligned' , color = '#ff7f0e' )
145+ rect3_axis1 = ax1 .bar (x + width , aligned_speeds_axis1 , width , label = 'Blosc2 Aligned' , color = '#2ca02c' )
146+
147+ # Add labels and titles
148+ for ax , axis in [(ax0 , 0 ), (ax1 , 1 )]:
149+ ax .set_xlabel ('Array Size (N for NxN array)' , fontsize = 12 )
150+ ax .set_title (f'Concatenation Performance (axis={ axis } )' , fontsize = 14 )
151+ ax .set_xticks (x )
152+ ax .set_xticklabels (x_labels )
153+ ax .grid (True , axis = 'y' , linestyle = '--' , alpha = 0.7 )
154+ ax .yaxis .set_major_formatter (ScalarFormatter (useOffset = False ))
155+
156+ # Add legend inside each plot
157+ ax .legend (title = "Concatenation Methods" ,
158+ loc = 'upper left' ,
159+ fontsize = 12 ,
160+ frameon = True ,
161+ facecolor = 'white' ,
162+ edgecolor = 'black' ,
163+ framealpha = 0.8 )
164+
165+ # Add y-label only to the left subplot
166+ ax0 .set_ylabel ('Throughput (GB/s)' , fontsize = 12 )
167+
168+ # Add value labels on top of the bars
169+ def autolabel (rects , ax ):
170+ for rect in rects :
171+ height = rect .get_height ()
172+ ax .annotate (f'{ height :.2f} GB/s' ,
173+ xy = (rect .get_x () + rect .get_width () / 2 , height ),
174+ xytext = (0 , 3 ), # 3 points vertical offset
175+ textcoords = "offset points" ,
176+ ha = 'center' , va = 'bottom' , rotation = 90 , fontsize = 8 )
177+
178+ autolabel (rect1_axis0 , ax0 )
179+ autolabel (rect2_axis0 , ax0 )
180+ autolabel (rect3_axis0 , ax0 )
181+
182+ autolabel (rect1_axis1 , ax1 )
183+ autolabel (rect2_axis1 , ax1 )
184+ autolabel (rect3_axis1 , ax1 )
185+
186+ # Save the plot
187+ plt .tight_layout ()
188+ plt .savefig (os .path .join (output_dir , 'concatenate_benchmark_combined.png' ), dpi = 300 )
189+ plt .show ()
190+ plt .close ()
191+
192+ print (f"Combined plot saved to { os .path .join (output_dir , 'concatenate_benchmark_combined.png' )} " )
92193
93194
94195def main ():
@@ -100,24 +201,47 @@ def main():
100201 sizes = [500 , 1000 , 2000 , 4000 ] # must be divisible by 4 for aligned chunks
101202 num_arrays = 10
102203
204+ # Lists to store results for both axes
205+ numpy_speeds_axis0 = []
206+ unaligned_speeds_axis0 = []
207+ aligned_speeds_axis0 = []
208+ numpy_speeds_axis1 = []
209+ unaligned_speeds_axis1 = []
210+ aligned_speeds_axis1 = []
211+
103212 for axis in [0 , 1 ]:
104213 print (f"\n Concatenating { num_arrays } arrays along axis { axis } " )
105- print (f"{ 'Size' :<10} { 'NumPy' :<10 } { 'Unaligned' :<10 } { 'Aligned' :<10 } { 'Alig vs Unalig' :<16} { 'Speedup vs NumPy' :<16} " )
106- print (f"{ '-' * 80 } " )
214+ print (f"{ 'Size' :<10} { 'NumPy (GB/s) ' :<14 } { 'Unaligned (GB/s) ' :<18 } { 'Aligned (GB/s) ' :<16 } { 'Alig vs Unalig' :<16} { 'Aligned vs NumPy' :<16} " )
215+ print (f"{ '-' * 90 } " )
107216
108217 for size in sizes :
109218 # Run the benchmarks
110- numpy_time , numpy_shape = run_numpy_benchmark (num_arrays , size , axis = axis )
111- unaligned_time , shape1 = run_benchmark (num_arrays , size , aligned_chunks = False , axis = axis )
112- aligned_time , shape2 = run_benchmark (num_arrays , size , aligned_chunks = True , axis = axis )
219+ numpy_time , numpy_shape , data_size_gb = run_numpy_benchmark (num_arrays , size , axis = axis )
220+ unaligned_time , shape1 , _ = run_benchmark (num_arrays , size , aligned_chunks = False , axis = axis )
221+ aligned_time , shape2 , _ = run_benchmark (num_arrays , size , aligned_chunks = True , axis = axis )
113222
114- # Calculate speedups
115- numpy_vs_aligned = numpy_time / aligned_time if aligned_time > 0 else float ("inf" )
116- aligned_vs_unaligned = unaligned_time / aligned_time if aligned_time > 0 else float ("inf" )
223+ # Calculate throughputs in GB/s
224+ numpy_speed = data_size_gb / numpy_time if numpy_time > 0 else float ("inf" )
225+ unaligned_speed = data_size_gb / unaligned_time if unaligned_time > 0 else float ("inf" )
226+ aligned_speed = data_size_gb / aligned_time if aligned_time > 0 else float ("inf" )
227+
228+ # Store speeds in the appropriate list
229+ if axis == 0 :
230+ numpy_speeds_axis0 .append (numpy_speed )
231+ unaligned_speeds_axis0 .append (unaligned_speed )
232+ aligned_speeds_axis0 .append (aligned_speed )
233+ else :
234+ numpy_speeds_axis1 .append (numpy_speed )
235+ unaligned_speeds_axis1 .append (unaligned_speed )
236+ aligned_speeds_axis1 .append (aligned_speed )
237+
238+ # Calculate speedup ratios
239+ aligned_vs_unaligned = aligned_speed / unaligned_speed if unaligned_speed > 0 else float ("inf" )
240+ aligned_vs_numpy = aligned_speed / numpy_speed if numpy_speed > 0 else float ("inf" )
117241
118242 # Print results
119- print (f"{ size :<10} { numpy_time :<10.4f } { unaligned_time :<10.4f } { aligned_time :<10.4f } "
120- f"{ aligned_vs_unaligned :>10.2f} x { numpy_vs_aligned :>10.2f} x" )
243+ print (f"{ size :<10} { numpy_speed :<14.2f } { unaligned_speed :<18.2f } { aligned_speed :<16.2f } "
244+ f"{ aligned_vs_unaligned :>10.2f} x { aligned_vs_numpy :>10.2f} x" )
121245
122246 # Quick verification of result shape
123247 if axis == 0 :
@@ -132,7 +256,14 @@ def main():
132256 if shapes [i ] != expected_shape :
133257 print (f"Warning: { shape_name } shape { shapes [i ]} does not match expected { expected_shape } " )
134258
135- print (f"{ '=' * 80 } " )
259+ print (f"{ '=' * 90 } " )
260+
261+ # Create the combined plot with both axes
262+ create_combined_plot (
263+ sizes ,
264+ numpy_speeds_axis0 , unaligned_speeds_axis0 , aligned_speeds_axis0 ,
265+ numpy_speeds_axis1 , unaligned_speeds_axis1 , aligned_speeds_axis1
266+ )
136267
137268
138269if __name__ == "__main__" :
0 commit comments