@@ -54,46 +54,85 @@ def run_benchmark(num_arrays=10, size=500, aligned_chunks=False, axis=0):
5454 return duration , result .shape
5555
5656
57+ def run_numpy_benchmark (num_arrays = 10 , size = 500 , axis = 0 ):
58+ """
59+ Benchmark numpy.concatenate performance for comparison.
60+
61+ Parameters:
62+ - num_arrays: Number of arrays to concatenate
63+ - size: Base size for array dimensions
64+ - axis: Axis along which to concatenate (0 or 1)
65+ """
66+ if axis == 0 :
67+ # For concatenating along axis 0, the second dimension must be consistent
68+ shapes = [(size // num_arrays , size ) for _ in range (num_arrays )]
69+ elif axis == 1 :
70+ # For concatenating along axis 1, the first dimension must be consistent
71+ shapes = [(size , size // num_arrays ) for _ in range (num_arrays )]
72+ else :
73+ raise ValueError ("Only axis 0 and 1 are supported" )
74+
75+ # Create arrays
76+ numpy_arrays = []
77+ for i , shape in enumerate (shapes ):
78+ arr = np .arange (
79+ i * np .prod (shape ),
80+ (i + 1 ) * np .prod (shape ),
81+ 1 ,
82+ dtype = "i4"
83+ ).reshape (shape )
84+ numpy_arrays .append (arr )
85+
86+ # Time the concatenation
87+ start_time = time .time ()
88+ result = np .concatenate (numpy_arrays , axis = axis )
89+ duration = time .time () - start_time
90+
91+ return duration , result .shape
92+
93+
5794def main ():
58- print (f"{ '=' * 50 } " )
59- print (f"Blosc2 concatenation benchmark" )
60- print (f"{ '=' * 50 } " )
95+ print (f"{ '=' * 60 } " )
96+ print (f"Blosc2 vs NumPy concatenation benchmark" )
97+ print (f"{ '=' * 60 } " )
6198
6299 # Parameters
63- sizes = [400 , 800 , 1600 , 3200 ] # must be divisible by 4 for aligned chunks
100+ sizes = [500 , 1000 , 2000 , 4000 ] # must be divisible by 4 for aligned chunks
64101 num_arrays = 10
65102
66103 for axis in [0 , 1 ]:
67104 print (f"\n Concatenating { num_arrays } arrays along axis { axis } " )
68- print (f"{ 'Size' :<10} { 'Unaligned (s)' :<15 } { 'Aligned (s)' :<15 } { 'Speedup' :<10 } " )
69- print (f"{ '-' * 50 } " )
105+ print (f"{ 'Size' :<10} { 'NumPy' :<10 } { 'Unaligned' :<10 } { 'Aligned' :<10 } { 'Alig vs Unalig' :<16 } { 'Speedup vs NumPy ' :<16 } " )
106+ print (f"{ '-' * 80 } " )
70107
71108 for size in sizes :
72- # Run both benchmarks
109+ # Run the benchmarks
110+ numpy_time , numpy_shape = run_numpy_benchmark (num_arrays , size , axis = axis )
73111 unaligned_time , shape1 = run_benchmark (num_arrays , size , aligned_chunks = False , axis = axis )
74112 aligned_time , shape2 = run_benchmark (num_arrays , size , aligned_chunks = True , axis = axis )
75113
76- # Calculate speedup
77- speedup = unaligned_time / aligned_time if aligned_time > 0 else float ("inf" )
114+ # Calculate speedups
115+ numpy_vs_aligned = numpy_time / aligned_time if aligned_time > 0 else float ("inf" )
116+ aligned_vs_unaligned = unaligned_time / aligned_time if aligned_time > 0 else float ("inf" )
78117
79118 # Print results
80- print (f"{ size :<10} { unaligned_time :<15.4f} { aligned_time :<15.4f} { speedup :<10.2f} x" )
119+ print (f"{ size :<10} { numpy_time :<10.4f} { unaligned_time :<10.4f} { aligned_time :<10.4f} "
120+ f"{ aligned_vs_unaligned :>10.2f} x { numpy_vs_aligned :>10.2f} x" )
81121
82122 # Quick verification of result shape
83123 if axis == 0 :
84124 expected_shape = (size , size ) # After concatenation along axis 0
85125 else :
86126 expected_shape = (size , size ) # After concatenation along axis 1
87- if shape1 != expected_shape :
88- print (
89- f"Warning: result shape unaligned { shape1 } does not match expected shape { expected_shape } "
90- )
91- if shape2 != expected_shape :
92- print (
93- f"Warning: result shape aligned { shape2 } does not match expected shape { expected_shape } "
94- )
95-
96- print (f"{ '=' * 50 } " )
127+
128+ # Verify shapes match
129+ shapes = [numpy_shape , shape1 , shape2 ]
130+ if any (shape != expected_shape for shape in shapes ):
131+ for i , shape_name in enumerate (["NumPy" , "Blosc2 unaligned" , "Blosc2 aligned" ]):
132+ if shapes [i ] != expected_shape :
133+ print (f"Warning: { shape_name } shape { shapes [i ]} does not match expected { expected_shape } " )
134+
135+ print (f"{ '=' * 80 } " )
97136
98137
99138if __name__ == "__main__" :
0 commit comments