[mx] RCEIL rounding for non-CUDA backends

ugolowic · ugolowic · commit a921a02a9d1e · 2026-04-08T07:12:47.000Z
The USE_PTX flag in MXFP8 triton kernels was gated by `not IS_ROCM`,
which assumed only CUDA and ROCm backends exist. This would cause
errors on other backends such as XPU.

Signed-off-by: Ula Golowicz &lt;urszula.golowicz@intel.com&gt;
diff --git a/torchao/prototype/mx_formats/kernels.py b/torchao/prototype/mx_formats/kernels.py
@@ -24,6 +24,7 @@
     is_MI350,
     is_ROCM,
     is_sm_at_least_100,
+    is_XPU,
     torch_version_at_least,
 )
 
@@ -458,9 +459,14 @@ def triton_mxfp8_dequant_dim0(
 _triton_kernels_available = (
     torch_version_at_least("2.7.0")
     and has_triton()
-    and torch.cuda.is_available()
-    and (is_sm_at_least_100() and is_cuda_version_at_least(12, 8))
-    or (is_ROCM() and is_MI350())
+    and (
+        (
+            torch.cuda.is_available()
+            and (is_sm_at_least_100() and is_cuda_version_at_least(12, 8))
+        )
+        or (is_ROCM() and is_MI350())
+        or is_XPU()
+    )
 )
 
 if _triton_kernels_available:
@@ -469,6 +475,7 @@ def triton_mxfp8_dequant_dim0(
     from torch.library import triton_op, wrap_triton
 
     IS_ROCM = tl.constexpr(is_ROCM())
+    IS_XPU = tl.constexpr(is_XPU())
 
     @triton.jit
     def _triton_calculate_scale_rceil(x, axis, USE_PTX: tl.constexpr):
@@ -686,7 +693,7 @@ def to_mxfp8_dim1_kernel(
             col_scale_r, col_scale_e8m0_r = _triton_calculate_scale_rceil(
                 x_block_abs_t_r,
                 axis=1,
-                USE_PTX=not IS_ROCM,
+                USE_PTX=(not IS_ROCM and not IS_XPU),
             )
         else:
             tl.static_assert(SCALING_MODE == "floor")
@@ -796,7 +803,7 @@ def to_mxfp8_dim0_kernel(
             scale_fp32_r, scale_e8m0_r = _triton_calculate_scale_rceil(
                 x_block_abs_r,
                 axis=1,
-                USE_PTX=not IS_ROCM,
+                USE_PTX=(not IS_ROCM and not IS_XPU),
             )
         else:
             tl.static_assert(SCALING_MODE == "floor")
diff --git a/torchao/utils.py b/torchao/utils.py
@@ -1245,6 +1245,10 @@ def check_cpu_version(device, version="2.6.0"):
     return device == "cpu" and torch_version_at_least(version)
 
 
+def is_XPU():
+    return hasattr(torch, "xpu") and torch.xpu.is_available()
+
+
 def check_xpu_version(device, version="2.8.0"):
     if isinstance(device, torch.device):
         device = device.type