Add new fp_accuracy param for LazyArray.compute()

FrancescAlted · FrancescAlted · commit 90565a87c9b9 · 2026-01-21T19:03:01.000+01:00
diff --git a/doc/reference/classes.rst b/doc/reference/classes.rst
@@ -54,3 +54,4 @@ Other Classes
     Storage
     Tuner
     URLPath
+    FPAccuracy
diff --git a/doc/reference/misc.rst b/doc/reference/misc.rst
@@ -57,6 +57,7 @@ This page documents the miscellaneous members of the ``blosc2`` module that do n
         SpecialValue,
         SplitMode,
         Tuner,
+        FPAccuracy,
         compute_chunks_blocks,
         get_slice_nchunks,
         remove_urlpath,
diff --git a/src/blosc2/__init__.py b/src/blosc2/__init__.py
@@ -113,6 +113,21 @@ class Tuner(Enum):
     BTUNE = 32
 
 
+class FPAccuracy(Enum):
+    """
+    Floating point accuracy modes for Blosc2 computing with lazy expressions.
+
+    This is only relevant when using floating point dtypes with miniexpr.
+    """
+
+    #: Use 1.0 ULPs (Units in the Last Place) for floating point functions
+    HIGH = 1
+    #: Use 3.5 ULPs (Units in the Last Place) for floating point functions
+    LOW = 2
+    #: Use default accuracy. This is LOW, which is enough for most applications.
+    DEFAULT = LOW
+
+
 from .blosc2_ext import (
     DEFINED_CODECS_STOP,
     EXTENDED_HEADER_LENGTH,
diff --git a/src/blosc2/blosc2_ext.pyx b/src/blosc2/blosc2_ext.pyx
@@ -578,7 +578,7 @@ cdef extern from "miniexpr.h":
                       const int64_t *shape, const int32_t *chunkshape,
                       const int32_t *blockshape, int *error, me_expr **out)
 
-    cdef enum me_compile_status:
+    ctypedef enum me_compile_status:
         ME_COMPILE_SUCCESS
         ME_COMPILE_ERR_OOM
         ME_COMPILE_ERR_PARSE
@@ -590,7 +590,7 @@ cdef extern from "miniexpr.h":
         ME_COMPILE_ERR_INVALID_ARG_TYPE
         ME_COMPILE_ERR_MIXED_TYPE_NESTED
 
-    cdef enum me_simd_ulp_mode:
+    ctypedef enum me_simd_ulp_mode:
         ME_SIMD_ULP_DEFAULT
         ME_SIMD_ULP_1
         ME_SIMD_ULP_3_5
@@ -647,7 +647,8 @@ ctypedef struct udf_udata:
 ctypedef struct me_udata:
     b2nd_array_t** inputs
     int ninputs
-    b2nd_array_t *array
+    me_eval_params* eval_params
+    b2nd_array_t* array
     void* aux_reduc_ptr
     int64_t chunks_in_array[B2ND_MAX_DIM]
     int64_t blocks_in_chunk[B2ND_MAX_DIM]
@@ -1819,6 +1820,8 @@ cdef class SChunk:
                         free(me_data.inputs)
                     if me_data.miniexpr_handle != NULL:  # XXX do we really need the conditional?
                         me_free(me_data.miniexpr_handle)
+                    if me_data.eval_params != NULL:
+                        free(me_data.eval_params)
                     free(me_data)
         elif self.schunk.storage.cparams.prefilter != NULL:
             # From Python the preparams->udata with always have the field py_func
@@ -2015,7 +2018,7 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock,
         # NOTE: miniexpr handles scalar outputs in me_eval_nd without touching tail bytes.
         aux_reduc_ptr = <void *> (<uintptr_t> udata.aux_reduc_ptr + offset_bytes)
     rc = me_eval_nd(miniexpr_handle, <const void**> input_buffers, udata.ninputs,
-                    aux_reduc_ptr, blocknitems, nchunk, nblock, NULL)
+                    aux_reduc_ptr, blocknitems, nchunk, nblock, udata.eval_params)
     if rc != 0:
         raise RuntimeError(f"miniexpr: issues during evaluation; error code: {rc}")
 
@@ -2916,7 +2919,7 @@ cdef class NDArray:
 
         return udata
 
-    cdef me_udata *_fill_me_udata(self, inputs, aux_reduc):
+    cdef me_udata *_fill_me_udata(self, inputs, fp_accuracy, aux_reduc):
         cdef me_udata *udata = <me_udata *> malloc(sizeof(me_udata))
         operands = list(inputs.values())
         ninputs = len(operands)
@@ -2927,6 +2930,10 @@ cdef class NDArray:
             inputs_[i].chunk_cache.data = NULL
         udata.inputs = inputs_
         udata.ninputs = ninputs
+        cdef me_eval_params* eval_params = <me_eval_params*> malloc(sizeof(me_eval_params))
+        eval_params.disable_simd = False
+        eval_params.simd_ulp_mode = ME_SIMD_ULP_3_5 if fp_accuracy == blosc2.FPAccuracy.LOW else ME_SIMD_ULP_1
+        udata.eval_params = eval_params
         udata.array = self.array
         cdef void* aux_reduc_ptr = NULL
         if aux_reduc is not None:
@@ -2941,12 +2948,12 @@ cdef class NDArray:
 
         return udata
 
-    def _set_pref_expr(self, expression, inputs, aux_reduc=None):
+    def _set_pref_expr(self, expression, inputs, fp_accuracy, aux_reduc=None):
         # Set prefilter for miniexpr
         cdef blosc2_cparams* cparams = self.array.sc.storage.cparams
         cparams.prefilter = <blosc2_prefilter_fn> miniexpr_prefilter
 
-        cdef me_udata* udata = self._fill_me_udata(inputs, aux_reduc)
+        cdef me_udata* udata = self._fill_me_udata(inputs, fp_accuracy, aux_reduc)
 
         # Get the compiled expression handle for multi-threading
         cdef Py_ssize_t n = len(inputs)
diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
@@ -302,7 +302,12 @@ def sort(self, order: str | list[str] | None = None) -> blosc2.LazyArray:
         pass
 
     @abstractmethod
-    def compute(self, item: slice | list[slice] | None = None, **kwargs: Any) -> blosc2.NDArray:
+    def compute(
+        self,
+        item: slice | list[slice] | None = None,
+        fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT,
+        **kwargs: Any,
+    ) -> blosc2.NDArray:
         """
         Return a :ref:`NDArray` containing the evaluation of the :ref:`LazyArray`.
 
@@ -313,9 +318,14 @@ def compute(self, item: slice | list[slice] | None = None, **kwargs: Any) -> blo
             the evaluated result. This difference between slicing operands and slicing the final expression
             is important when reductions or a where clause are used in the expression.
 
+        fp_accuracy: :ref:`blosc2.FPAccuracy`, optional
+            Specifies the floating-point accuracy to be used during computation.
+            By default, :ref:`blosc2.FPAccuracy.DEFAULT` is used.
+
         kwargs: Any, optional
             Keyword arguments that are supported by the :func:`empty` constructor.
             These arguments will be set in the resulting :ref:`NDArray`.
+            Additionally, the following special kwargs are supported:
 
         Returns
         -------
@@ -1296,10 +1306,11 @@ def fast_eval(  # noqa: C901
     if use_miniexpr:
         cparams = kwargs.pop("cparams", blosc2.CParams())
         # All values will be overwritten, so we can use an uninitialized array
+        fp_accuracy = kwargs.pop("fp_accuracy", blosc2.FPAccuracy.DEFAULT)
         res_eval = blosc2.uninit(shape, dtype, chunks=chunks, blocks=blocks, cparams=cparams, **kwargs)
         try:
             print("expr->miniexpr:", expression)
-            res_eval._set_pref_expr(expression, operands)
+            res_eval._set_pref_expr(expression, operands, fp_accuracy=fp_accuracy)
             # Data to compress is fetched from operands, so it can be uninitialized here
             data = np.empty(res_eval.schunk.chunksize, dtype=np.uint8)
             # Exercise prefilter for each chunk
@@ -2001,6 +2012,7 @@ def reduce_slices(  # noqa: C901
     if use_miniexpr:
         # Experiments say that not splitting is best (at least on Apple Silicon M4 Pro)
         cparams = kwargs.pop("cparams", blosc2.CParams(splitmode=blosc2.SplitMode.NEVER_SPLIT))
+        fp_accuracy = kwargs.pop("fp_accuracy", blosc2.FPAccuracy.DEFAULT)
         # Create a fake NDArray just to drive the miniexpr evaluation (values won't be used)
         res_eval = blosc2.uninit(shape, dtype, chunks=chunks, blocks=blocks, cparams=cparams, **kwargs)
         # Compute the number of blocks in the result
@@ -2027,7 +2039,7 @@ def reduce_slices(  # noqa: C901
         try:
             print("expr->miniexpr:", expression, reduce_op)
             expression = f"{reduce_op_str}({expression})"
-            res_eval._set_pref_expr(expression, operands, aux_reduc)
+            res_eval._set_pref_expr(expression, operands, fp_accuracy, aux_reduc)
             # Data won't even try to be compressed, so buffers can be unitialized and reused
             data = np.empty(res_eval.schunk.chunksize, dtype=np.uint8)
             chunk_data = np.empty(res_eval.schunk.chunksize + blosc2.MAX_OVERHEAD, dtype=np.uint8)
@@ -3142,7 +3154,9 @@ def sort(self, order: str | list[str] | None = None) -> blosc2.LazyArray:
             lazy_expr._order = order
         return lazy_expr
 
-    def compute(self, item=(), **kwargs) -> blosc2.NDArray:
+    def compute(
+        self, item=(), fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, **kwargs
+    ) -> blosc2.NDArray:
         # When NumPy ufuncs are called, the user may add an `out` parameter to kwargs
         if "out" in kwargs:  # use provided out preferentially
             kwargs["_output"] = kwargs.pop("out")
@@ -3452,7 +3466,7 @@ def sort(self, order: str | list[str] | None = None) -> blosc2.LazyArray:
             lazy_expr._order = order
         return lazy_expr
 
-    def compute(self, item=(), **kwargs):
+    def compute(self, item=(), fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, **kwargs):
         # Get kwargs
         if kwargs is None:
             kwargs = {}
diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py
@@ -278,6 +278,20 @@ def test_expression_with_constants(array_fixture):
         np.testing.assert_allclose(res[:], nres)
 
 
+@pytest.mark.parametrize("accuracy", [blosc2.FPAccuracy.LOW, blosc2.FPAccuracy.HIGH])
+def test_fp_precision(array_fixture, accuracy):
+    a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture
+    # Test with operands with same chunks and blocks
+    expr = blosc2.sin(a1) ** 2 - blosc2.cos(a2) ** 2 + blosc2.sqrt(a3)
+    # All precisions in miniexpr should be quite good for this expression
+    res = expr.compute(fp_accuracy=accuracy)
+    nres = ne_evaluate("sin(na1) ** 2 - cos(na2) ** 2 + sqrt(na3)")
+    if na1.dtype == np.float32:
+        np.testing.assert_allclose(res[:], nres, rtol=1e-6, atol=1e-6)
+    else:
+        np.testing.assert_allclose(res[:], nres)
+
+
 @pytest.mark.parametrize("compare_expressions", [True, False])
 @pytest.mark.parametrize("comparison_operator", ["==", "!=", ">=", ">", "<=", "<"])
 def test_comparison_operators(dtype_fixture, compare_expressions, comparison_operator):

-Original file line number
+Diff line change
     Storage
     Tuner
     URLPath
 +    FPAccuracy