From 9795e686417f4127ffb2c086844d84e3080c7ac9 Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <katharine.hyatt@ugent.be>
Date: Thu, 25 Jun 2026 15:34:09 +0200
Subject: [PATCH 1/4] Add in planar tests for ROCTensorMap

---
 test/amd/planar.jl | 302 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 302 insertions(+)
 create mode 100644 test/amd/planar.jl

diff --git a/test/amd/planar.jl b/test/amd/planar.jl
new file mode 100644
index 000000000..339373ac2
--- /dev/null
+++ b/test/amd/planar.jl
@@ -0,0 +1,302 @@
+using Test, TestExtras
+using Adapt
+using TensorOperations, AMDGPU 
+using TensorKit
+using TensorKit: type_repr
+using TensorKit: PlanarTrivial, ℙ
+using TensorKit: planaradd!, planartrace!, planarcontract!
+
+const AMDGPUExt = Base.get_extension(TensorKit, :TensorKitAMDGPUExt)
+@assert !isnothing(AMDGPUExt)
+const ROCTensorMap = getglobal(AMDGPUExt, :ROCTensorMap)
+
+spacelist = default_spacelist(fast_tests)
+
+for V in spacelist
+    I = sectortype(first(V))
+    Istr = type_repr(I)
+    BraidingStyle(I) isa NoBraiding && continue
+    @timedtestset "Braiding tensor + AMDGPU with symmetry: $Istr" verbose = true begin
+        W = V[1] ⊗ V[2] ← V[2] ⊗ V[1]
+        T = isreal(sectortype(W)) ? Float64 : ComplexF64
+        t1 = @constinferred BraidingTensor{T, spacetype(V[2]), ROCVector{T, AMDGPU.Mem.HIPBuffer}}(W)
+        @test space(t1) == W
+        @test codomain(t1) == codomain(W)
+        @test domain(t1) == domain(W)
+        @test scalartype(t1) == (isreal(sectortype(W)) ? Float64 : ComplexF64)
+        @test storagetype(t1) == ROCVector{scalartype(t1), AMDGPU.Mem.HIPBuffer}
+        t2 = @constinferred BraidingTensor{ComplexF64, spacetype(V[2]), ROCVector{ComplexF64, AMDGPU.Mem.HIPBuffer}}(W)
+        @test scalartype(t2) == ComplexF64
+        @test storagetype(t2) == ROCVector{ComplexF64, AMDGPU.Mem.HIPBuffer}
+        t3 = @testinferred adapt(storagetype(t2), t1)
+        @test storagetype(t3) == storagetype(t2)
+        t4 = @testinferred adapt(scalartype(t2), t1)
+        @test storagetype(t3) == storagetype(t2)
+        # allowscalar needed for the StridedView comparison
+        @test t3 ≈ t2
+
+        W2 = reverse(codomain(W)) ← domain(W)
+        @test_throws SpaceMismatch BraidingTensor(W2)
+
+        @test adjoint(t1) isa BraidingTensor
+        @test complex(t1) isa BraidingTensor
+        @test scalartype(complex(t1)) <: Complex
+
+        t3 = @inferred TensorMap(t2)
+        @test storagetype(t3) == ROCVector{ComplexF64, AMDGPU.Mem.HIPBuffer}
+        t4 = braid(adapt(ROCArray, id(scalartype(t2), domain(t2))), ((2, 1), (3, 4)), (1, 2, 3, 4))
+        @test t1 ≈ t4
+        for (c, b) in blocks(t1)
+            @test block(t1, c) ≈ b ≈ block(t3, c)
+        end
+
+        AMDGPU.@allowscalar begin
+            for (f1, f2) in fusiontrees(t1)
+                @test t1[f1, f2] ≈ t3[f1, f2]
+            end
+        end
+
+        t5 = @inferred TensorMap(t2')
+        @test storagetype(t5) == ROCVector{ComplexF64, AMDGPU.Mem.HIPBuffer}
+        t6 = braid(adapt(ROCArray, id(scalartype(t2), domain(t2'))), ((2, 1), (3, 4)), (4, 3, 2, 1))
+        AMDGPU.@allowscalar begin
+            @test t5 ≈ t6
+            for (c, b) in blocks(t1')
+                @test block(t1', c) ≈ b ≈ block(t5, c)
+            end
+            for (f1, f2) in fusiontrees(t1')
+                # needed here for broadcasting the - in isapprox
+                @test t1'[f1, f2] ≈ t5[f1, f2]
+            end
+        end
+    end
+end
+
+@testset "planar methods" verbose = true begin
+    @testset "planaradd" begin
+        A = AMDGPU.randn(ℂ^2 ⊗ ℂ^3 ← ℂ^6 ⊗ ℂ^5 ⊗ ℂ^4)
+        C = AMDGPU.randn((ℂ^5)' ⊗ (ℂ^6)' ← ℂ^4 ⊗ (ℂ^3)' ⊗ (ℂ^2)')
+        A′ = force_planar(A)
+        C′ = force_planar(C)
+        p = ((4, 3), (5, 2, 1))
+
+        @test force_planar(tensoradd!(C, A, p, false, true, true)) ≈
+            planaradd!(C′, A′, p, true, true)
+    end
+
+    @testset "planartrace" begin
+        A = AMDGPU.randn(ℂ^2 ⊗ ℂ^3 ← ℂ^2 ⊗ ℂ^5 ⊗ ℂ^4)
+        C = AMDGPU.randn((ℂ^5)' ⊗ ℂ^3 ← ℂ^4)
+        A′ = force_planar(A)
+        C′ = force_planar(C)
+        p = ((4, 2), (5,))
+        q = ((1,), (3,))
+
+        @test force_planar(tensortrace!(C, A, p, q, false, true, true)) ≈
+            planartrace!(C′, A′, p, q, true, true)
+    end
+
+    @testset "planarcontract" begin
+        A = AMDGPU.randn(ℂ^2 ⊗ ℂ^3 ← ℂ^2 ⊗ ℂ^5 ⊗ ℂ^4)
+        B = AMDGPU.randn(ℂ^2 ⊗ ℂ^4 ← ℂ^4 ⊗ ℂ^3)
+        C = AMDGPU.randn((ℂ^5)' ⊗ (ℂ^2)' ⊗ ℂ^2 ← (ℂ^2)' ⊗ ℂ^4)
+
+        A′ = force_planar(A)
+        B′ = force_planar(B)
+        C′ = force_planar(C)
+
+        pA = ((1, 3, 4), (5, 2))
+        pB = ((2, 4), (1, 3))
+        pAB = ((3, 2, 1), (4, 5))
+
+        @test force_planar(tensorcontract!(C, A, pA, false, B, pB, false, pAB, true, true)) ≈
+            planarcontract!(C′, A′, pA, B′, pB, pAB, true, true)
+    end
+end
+
+@testset "@planar" verbose = true begin
+    T = ComplexF64
+
+    @testset "contractcheck" begin
+        V = ℂ^2
+        A = AMDGPU.rand(T, V ⊗ V ← V)
+        B = AMDGPU.rand(T, V ⊗ V ← V')
+        @tensor C1[i j; k l] := A[i j; m] * B[k l; m]
+        @tensor contractcheck = true C2[i j; k l] := A[i j; m] * B[k l; m]
+        @test C1 ≈ C2
+        B2 = AMDGPU.rand(T, V ⊗ V ← V) # wrong duality for third space
+        @test_throws SpaceMismatch("incompatible spaces for m: $V ≠ $(V')") begin
+            @tensor contractcheck = true C3[i j; k l] := A[i j; m] * B2[k l; m]
+        end
+
+        #= # TODO NEEDS UPDATES TO planar/preprocessors
+        A = AMDGPU.rand(T, V ← V ⊗ V)
+        B = AMDGPU.rand(T, V ⊗ V ← V)
+        @planar C1[i; j] := A[i; k l] * τ[k l; m n] * B[m n; j]
+        @planar contractcheck = true C2[i; j] := A[i; k l] * τ[k l; m n] * B[m n; j]
+        @test C1 ≈ C2
+        @test_throws SpaceMismatch("incompatible spaces for m: $V ≠ $(V')") begin
+            @planar contractcheck = true C3[i; j] := A[i; k l] * τ[k l; m n] * B[n j; m]
+        end=#
+    end
+
+    @testset "MPS networks" begin
+        P = ℂ^2
+        Vmps = ℂ^12
+        Vmpo = ℂ^4
+
+        # ∂AC
+        # -------
+        x = AMDGPU.randn(T, Vmps ⊗ P ← Vmps)
+        O = AMDGPU.randn(T, Vmpo ⊗ P ← P ⊗ Vmpo)
+        GL = AMDGPU.randn(T, Vmps ⊗ Vmpo' ← Vmps)
+        GR = AMDGPU.randn(T, Vmps ⊗ Vmpo ← Vmps)
+
+        x′ = force_planar(x)
+        O′ = force_planar(O)
+        GL′ = force_planar(GL)
+        GR′ = force_planar(GR)
+
+        for alloc in
+            (TensorOperations.DefaultAllocator(),)
+            @tensor allocator = alloc y[-1 -2; -3] := GL[-1 2; 1] * x[1 3; 4] *
+                O[2 -2; 3 5] * GR[4 5; -3]
+            @planar allocator = alloc y′[-1 -2; -3] := GL′[-1 2; 1] * x′[1 3; 4] *
+                O′[2 -2; 3 5] * GR′[4 5; -3]
+            @test force_planar(y) ≈ y′
+        end
+
+        # ∂AC2
+        # -------
+        x2 = AMDGPU.randn(T, Vmps ⊗ P ← Vmps ⊗ P')
+        x2′ = force_planar(x2)
+        @tensor contractcheck = true y2[-1 -2; -3 -4] := GL[-1 7; 6] * x2[6 5; 1 3] *
+            O[7 -2; 5 4] * O[4 -4; 3 2] *
+            GR[1 2; -3]
+        @planar y2′[-1 -2; -3 -4] := GL′[-1 7; 6] * x2′[6 5; 1 3] * O′[7 -2; 5 4] *
+            O′[4 -4; 3 2] * GR′[1 2; -3]
+        @test force_planar(y2) ≈ y2′
+
+        # transfer matrix
+        # ----------------
+        v = AMDGPU.randn(T, Vmps ← Vmps)
+        v′ = force_planar(v)
+        @tensor ρ[-1; -2] := x[-1 2; 1] * conj(x[-2 2; 3]) * v[1; 3]
+        @planar ρ′[-1; -2] := x′[-1 2; 1] * conj(x′[-2 2; 3]) * v′[1; 3]
+        @test force_planar(ρ) ≈ ρ′
+
+        @tensor ρ2[-1 -2; -3] := GL[1 -2; 3] * x[3 2; -3] * conj(x[1 2; -1])
+        @plansor ρ3[-1 -2; -3] := GL[1 2; 4] * x[4 5; -3] * τ[2 3; 5 -2] * conj(x[1 3; -1])
+        #@planar ρ2′[-1 -2; -3] := GL′[1 2; 4] * x′[4 5; -3] * τ[2 3; 5 -2] *
+        #    conj(x′[1 3; -1])
+        #@test force_planar(ρ2) ≈ ρ2′
+        @test ρ2 ≈ ρ3
+
+        # Periodic boundary conditions
+        # ----------------------------
+        f1 = isomorphism(storagetype(O), fuse(Vmpo^3), Vmpo ⊗ Vmpo' ⊗ Vmpo)
+        f2 = isomorphism(storagetype(O), fuse(Vmpo^3), Vmpo ⊗ Vmpo' ⊗ Vmpo)
+        f1′ = force_planar(f1)
+        f2′ = force_planar(f2)
+        @tensor O_periodic1[-1 -2; -3 -4] := O[1 -2; -3 2] * f1[-1; 1 3 4] *
+            conj(f2[-4; 2 3 4])
+        @plansor O_periodic2[-1 -2; -3 -4] := O[1 2; -3 6] * f1[-1; 1 3 5] *
+            conj(f2[-4; 6 7 8]) * τ[2 3; 7 4] *
+            τ[4 5; 8 -2]
+        #=@planar O_periodic′[-1 -2; -3 -4] := O′[1 2; -3 6] * f1′[-1; 1 3 5] *
+            conj(f2′[-4; 6 7 8]) * τ[2 3; 7 4] *
+            τ[4 5; 8 -2]=#
+        @test O_periodic1 ≈ O_periodic2
+        #@test force_planar(O_periodic1) ≈ O_periodic′
+    end
+
+    @testset "MERA networks" begin
+        Vmera = ℂ^2
+
+        u = AMDGPU.randn(T, Vmera ⊗ Vmera ← Vmera ⊗ Vmera)
+        w = AMDGPU.randn(T, Vmera ⊗ Vmera ← Vmera)
+        ρ = AMDGPU.randn(T, Vmera ⊗ Vmera ⊗ Vmera ← Vmera ⊗ Vmera ⊗ Vmera)
+        h = AMDGPU.randn(T, Vmera ⊗ Vmera ⊗ Vmera ← Vmera ⊗ Vmera ⊗ Vmera)
+
+        u′ = force_planar(u)
+        w′ = force_planar(w)
+        ρ′ = force_planar(ρ)
+        h′ = force_planar(h)
+
+        for alloc in
+            (TensorOperations.DefaultAllocator(),)
+            @tensor allocator = alloc begin
+                C = (
+                    (
+                        (
+                            (
+                                (
+                                    ((h[9 3 4; 5 1 2] * u[1 2; 7 12]) * conj(u[3 4; 11 13])) *
+                                        (u[8 5; 15 6] * w[6 7; 19])
+                                ) *
+                                    (conj(u[8 9; 17 10]) * conj(w[10 11; 22]))
+                            ) *
+                                ((w[12 14; 20] * conj(w[13 14; 23])) * ρ[18 19 20; 21 22 23])
+                        ) *
+                            w[16 15; 18]
+                    ) * conj(w[16 17; 21])
+                )
+            end
+            @planar allocator = alloc begin
+                C′ = (
+                    (
+                        (
+                            (
+                                (
+                                    ((h′[9 3 4; 5 1 2] * u′[1 2; 7 12]) * conj(u′[3 4; 11 13])) *
+                                        (u′[8 5; 15 6] * w′[6 7; 19])
+                                ) *
+                                    (conj(u′[8 9; 17 10]) * conj(w′[10 11; 22]))
+                            ) *
+                                ((w′[12 14; 20] * conj(w′[13 14; 23])) * ρ′[18 19 20; 21 22 23])
+                        ) *
+                            w′[16 15; 18]
+                    ) * conj(w′[16 17; 21])
+                )
+            end
+            @test C ≈ C′
+        end
+    end
+
+    @testset "Issue 93" begin
+        T = Float64
+        V1 = ℂ^2
+        V2 = ℂ^3
+        t1 = AMDGPU.randn(T, V1 ← V2)
+        t2 = AMDGPU.randn(T, V2 ← V1)
+
+        tr1 = @planar opt = true t1[a; b] * t2[b; a] / 2
+        tr2 = @planar opt = true t1[d; a] * t2[b; c] * 1 / 2 * τ[c b; a d]
+        tr3 = @planar opt = true t1[d; a] * t2[b; c] * τ[a c; d b] / 2
+        tr4 = @planar opt = true t1[f; a] * 1 / 2 * t2[c; d] * τ[d b; c e] * τ[e b; a f]
+        tr5 = @planar opt = true t1[f; a] * t2[c; d] / 2 * τ[d b; c e] * τ[a e; f b]
+        tr6 = @planar opt = true t1[f; a] * t2[c; d] * τ[c d; e b] / 2 * τ[e b; a f]
+        tr7 = @planar opt = true t1[f; a] * t2[c; d] * (τ[c d; e b] * τ[a e; f b] / 2)
+
+        @test tr1 ≈ tr2 ≈ tr3 ≈ tr4 ≈ tr5 ≈ tr6 ≈ tr7
+
+        tr1 = @plansor opt = true t1[a; b] * t2[b; a] / 2
+        tr2 = @plansor opt = true t1[d; a] * t2[b; c] * 1 / 2 * τ[c b; a d]
+        tr3 = @plansor opt = true t1[d; a] * t2[b; c] * τ[a c; d b] / 2
+        tr4 = @plansor opt = true t1[f; a] * 1 / 2 * t2[c; d] * τ[d b; c e] * τ[e b; a f]
+        tr5 = @plansor opt = true t1[f; a] * t2[c; d] / 2 * τ[d b; c e] * τ[a e; f b]
+        tr6 = @plansor opt = true t1[f; a] * t2[c; d] * τ[c d; e b] / 2 * τ[e b; a f]
+        tr7 = @plansor opt = true t1[f; a] * t2[c; d] * (τ[c d; e b] * τ[a e; f b] / 2)
+
+        @test tr1 ≈ tr2 ≈ tr3 ≈ tr4 ≈ tr5 ≈ tr6 ≈ tr7
+    end
+    @testset "Issue 262" begin
+        V = ℂ^2
+        A = AMDGPU.randn(T, V ← V)
+        B = AMDGPU.randn(T, V ← V')
+        C = AMDGPU.randn(T, V' ← V)
+        @planar D1[i; j] := A[i; j] + B[i; k] * C[k; j]
+        @planar D2[i; j] := B[i; k] * C[k; j] + A[i; j]
+        @test D1 ≈ D2
+    end
+end

From 3db6336f1be1fbca64ee3357673e11db0f608d5b Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <katharine.hyatt@ugent.be>
Date: Thu, 25 Jun 2026 15:45:57 +0200
Subject: [PATCH 2/4] Formatter

---
 test/amd/planar.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/amd/planar.jl b/test/amd/planar.jl
index 339373ac2..e2c74b9ee 100644
--- a/test/amd/planar.jl
+++ b/test/amd/planar.jl
@@ -1,6 +1,6 @@
 using Test, TestExtras
 using Adapt
-using TensorOperations, AMDGPU 
+using TensorOperations, AMDGPU
 using TensorKit
 using TensorKit: type_repr
 using TensorKit: PlanarTrivial, ℙ

From 3160e769e1147c45a9f0552fce9b323587060ccb Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <katharine.hyatt@ugent.be>
Date: Thu, 25 Jun 2026 17:01:03 +0200
Subject: [PATCH 3/4] Add ROCTensorMap factorizations tests

---
 test/amd/factorizations.jl | 622 +++++++++++++++++++++++++++++++++++++
 1 file changed, 622 insertions(+)
 create mode 100644 test/amd/factorizations.jl

diff --git a/test/amd/factorizations.jl b/test/amd/factorizations.jl
new file mode 100644
index 000000000..b94f8fc60
--- /dev/null
+++ b/test/amd/factorizations.jl
@@ -0,0 +1,622 @@
+using Adapt, AMDGPU
+using Test, TestExtras
+using TensorKit
+using LinearAlgebra: LinearAlgebra
+using MatrixAlgebraKit: DefaultAlgorithm, diagview
+const AMDGPUExt = Base.get_extension(TensorKit, :TensorKitAMDGPUExt)
+@assert !isnothing(AMDGPUExt) "Failed to load TensorKit - AMDGPU extension"
+const ROCTensorMap = getglobal(AMDGPUExt, :ROCTensorMap)
+
+using AMDGPU.rocBLAS
+
+spacelist = factorization_spacelist(fast_tests)
+eltypes = (Float32, ComplexF64)
+
+for V in spacelist
+    I = sectortype(first(V))
+    Istr = TensorKit.type_repr(I)
+    println("---------------------------------------")
+    println("Factorizations with symmetry: $Istr")
+    println("---------------------------------------")
+    @timedtestset "Factorizations with symmetry: $Istr" verbose = true begin
+        V1, V2, V3, V4, V5 = V
+        W = V1 ⊗ V2
+
+        @testset "QR decomposition" begin
+            for T in eltypes,
+                    t in (
+                        AMDGPU.rand(T, W, W), AMDGPU.rand(T, W, W)',
+                        AMDGPU.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), AMDGPU.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)')',
+                        AMDGPU.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5)), AMDGPU.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))',
+                        DiagonalTensorMap(AMDGPU.rand(T, reduceddim(V1)), V1),
+                    )
+
+                Q, R = @constinferred qr_full(t)
+                @test Q * R ≈ t
+                @test isunitary(Q)
+
+                Q, R = @constinferred qr_full(t, DefaultAlgorithm())
+                @test Q * R ≈ t
+                @test isunitary(Q)
+
+                Q, R = @constinferred qr_compact(t)
+                @test Q * R ≈ t
+                @test isisometric(Q)
+
+                Q, R = @constinferred qr_compact(t, DefaultAlgorithm())
+                @test Q * R ≈ t
+                @test isisometric(Q)
+
+                Q, R = @constinferred left_orth(t)
+                @test Q * R ≈ t
+                @test isisometric(Q)
+
+                Q, R = @constinferred left_orth(t, DefaultAlgorithm())
+                @test Q * R ≈ t
+                @test isisometric(Q)
+
+                N = @constinferred qr_null(t)
+                @test isisometric(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+
+                N = @constinferred qr_null(t, DefaultAlgorithm())
+                @test isisometric(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+
+                N = @constinferred left_null(t)
+                @test isisometric(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+
+                N = @constinferred left_null(t, DefaultAlgorithm())
+                @test isisometric(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+            end
+
+            # empty tensor
+            for T in eltypes
+                t = AMDGPU.rand(T, V1 ⊗ V2, zerospace(V1))
+
+                Q, R = @constinferred qr_full(t)
+                @test Q * R ≈ t
+                @test isunitary(Q)
+                @test dim(R) == dim(t) == 0
+
+                Q, R = @constinferred qr_full(t, DefaultAlgorithm())
+                @test Q * R ≈ t
+                @test isunitary(Q)
+                @test dim(R) == dim(t) == 0
+
+                Q, R = @constinferred qr_compact(t)
+                @test Q * R ≈ t
+                @test isisometric(Q)
+                @test dim(Q) == dim(R) == dim(t)
+
+                Q, R = @constinferred qr_compact(t, DefaultAlgorithm())
+                @test Q * R ≈ t
+                @test isisometric(Q)
+                @test dim(Q) == dim(R) == dim(t)
+
+                Q, R = @constinferred left_orth(t)
+                @test Q * R ≈ t
+                @test isisometric(Q)
+                @test dim(Q) == dim(R) == dim(t)
+
+                Q, R = @constinferred left_orth(t, DefaultAlgorithm())
+                @test Q * R ≈ t
+                @test isisometric(Q)
+                @test dim(Q) == dim(R) == dim(t)
+
+                N = @constinferred qr_null(t)
+                @test isunitary(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+
+                N = @constinferred qr_null(t, DefaultAlgorithm())
+                @test isunitary(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+            end
+        end
+
+        @testset "LQ decomposition" begin
+            for T in eltypes,
+                    t in (
+                        AMDGPU.rand(T, W, W), AMDGPU.rand(T, W, W)',
+                        AMDGPU.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), AMDGPU.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)')',
+                        AMDGPU.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5)), AMDGPU.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))',
+                        DiagonalTensorMap(AMDGPU.rand(T, reduceddim(V1)), V1),
+                    )
+
+                L, Q = @constinferred lq_full(t)
+                @test L * Q ≈ t
+                @test isunitary(Q)
+
+                L, Q = @constinferred lq_full(t, DefaultAlgorithm())
+                @test L * Q ≈ t
+                @test isunitary(Q)
+
+                L, Q = @constinferred lq_compact(t)
+                @test L * Q ≈ t
+                @test isisometric(Q; side = :right)
+
+                L, Q = @constinferred lq_compact(t, DefaultAlgorithm())
+                @test L * Q ≈ t
+                @test isisometric(Q; side = :right)
+
+                L, Q = @constinferred right_orth(t)
+                @test L * Q ≈ t
+                @test isisometric(Q; side = :right)
+
+                L, Q = @constinferred right_orth(t, DefaultAlgorithm())
+                @test L * Q ≈ t
+                @test isisometric(Q; side = :right)
+
+                Nᴴ = @constinferred lq_null(t)
+                @test isisometric(Nᴴ; side = :right)
+                @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
+
+                Nᴴ = @constinferred lq_null(t, DefaultAlgorithm())
+                @test isisometric(Nᴴ; side = :right)
+                @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
+            end
+
+            for T in eltypes
+                # empty tensor
+                t = AMDGPU.rand(T, zerospace(V1), V1 ⊗ V2)
+
+                L, Q = @constinferred lq_full(t)
+                @test L * Q ≈ t
+                @test isunitary(Q)
+                @test dim(L) == dim(t) == 0
+
+                L, Q = @constinferred lq_full(t, DefaultAlgorithm())
+                @test L * Q ≈ t
+                @test isunitary(Q)
+                @test dim(L) == dim(t) == 0
+
+                L, Q = @constinferred lq_compact(t)
+                @test L * Q ≈ t
+                @test isisometric(Q; side = :right)
+                @test dim(Q) == dim(L) == dim(t)
+
+                L, Q = @constinferred lq_compact(t, DefaultAlgorithm())
+                @test L * Q ≈ t
+                @test isisometric(Q; side = :right)
+                @test dim(Q) == dim(L) == dim(t)
+
+                L, Q = @constinferred right_orth(t)
+                @test L * Q ≈ t
+                @test isisometric(Q; side = :right)
+                @test dim(Q) == dim(L) == dim(t)
+
+                L, Q = @constinferred right_orth(t, DefaultAlgorithm())
+                @test L * Q ≈ t
+                @test isisometric(Q; side = :right)
+                @test dim(Q) == dim(L) == dim(t)
+
+                Nᴴ = @constinferred lq_null(t)
+                @test isunitary(Nᴴ)
+                @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
+
+                Nᴴ = @constinferred lq_null(t, DefaultAlgorithm())
+                @test isunitary(Nᴴ)
+                @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
+            end
+        end
+
+        @testset "Polar decomposition" begin
+            @testset for T in eltypes,
+                    t in (
+                        AMDGPU.rand(T, W, W),
+                        AMDGPU.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'),
+                        AMDGPU.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))',
+                        DiagonalTensorMap(AMDGPU.rand(T, reduceddim(V1)), V1),
+                    )
+
+                @assert domain(t) ≾ codomain(t)
+                w, p = @constinferred left_polar(t)
+                @test w * p ≈ t
+                @test isisometric(w)
+                @test isposdef(p)
+
+                w, p = @constinferred left_polar(t, DefaultAlgorithm())
+                @test w * p ≈ t
+                @test isisometric(w)
+                @test isposdef(p)
+
+                w, p = @constinferred left_orth(t; alg = :polar)
+                @test w * p ≈ t
+                @test isisometric(w)
+            end
+
+            @testset for T in eltypes,
+                    t in (
+                        AMDGPU.rand(T, W, W),
+                        AMDGPU.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'),
+                        AMDGPU.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))',
+                        DiagonalTensorMap(AMDGPU.rand(T, reduceddim(V1)), V1),
+                    )
+
+                @assert codomain(t) ≾ domain(t)
+                p, wᴴ = @constinferred right_polar(t)
+                @test p * wᴴ ≈ t
+                @test isisometric(wᴴ; side = :right)
+                @test isposdef(p)
+
+                p, wᴴ = @constinferred right_polar(t, DefaultAlgorithm())
+                @test p * wᴴ ≈ t
+                @test isisometric(wᴴ; side = :right)
+                @test isposdef(p)
+
+                p, wᴴ = @constinferred right_orth(t; alg = :polar)
+                @test p * wᴴ ≈ t
+                @test isisometric(wᴴ; side = :right)
+            end
+        end
+
+        @testset "SVD" begin
+            for T in eltypes,
+                    t in (
+                        AMDGPU.rand(T, W, W), AMDGPU.rand(T, W, W)',
+                        AMDGPU.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), AMDGPU.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))',
+                        AMDGPU.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), AMDGPU.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))',
+                        DiagonalTensorMap(AMDGPU.rand(T, reduceddim(V1)), V1),
+                    )
+
+                u, s, vᴴ = @constinferred svd_full(t)
+                @test u * s * vᴴ ≈ t
+                @test isunitary(u)
+                @test isunitary(vᴴ)
+
+                u, s, vᴴ = @constinferred svd_full(t, DefaultAlgorithm())
+                @test u * s * vᴴ ≈ t
+                @test isunitary(u)
+                @test isunitary(vᴴ)
+
+                u, s, vᴴ = @constinferred svd_compact(t)
+                @test u * s * vᴴ ≈ t
+                @test isisometric(u)
+                @test isposdef(s)
+                @test isisometric(vᴴ; side = :right)
+
+                u, s, vᴴ = @constinferred svd_compact(t, DefaultAlgorithm())
+                @test u * s * vᴴ ≈ t
+                @test isisometric(u)
+                @test isposdef(s)
+                @test isisometric(vᴴ; side = :right)
+
+                s′ = @constinferred svd_vals(t)
+                @test parent(s′) ≈ parent(diagview(s))
+                @test s′ isa TensorKit.SectorVector
+
+                s′ = @constinferred svd_vals(t, DefaultAlgorithm())
+                @test parent(s′) ≈ parent(diagview(s))
+                @test s′ isa TensorKit.SectorVector
+
+                s2 = @constinferred DiagonalTensorMap(s′)
+                @test s2 ≈ s
+
+                v, c = @constinferred left_orth(t; alg = :svd)
+                @test v * c ≈ t
+                @test isisometric(v)
+
+                c, vᴴ = @constinferred right_orth(t; alg = :svd)
+                @test c * vᴴ ≈ t
+                @test isisometric(vᴴ; side = :right)
+
+                N = @constinferred left_null(t; alg = :svd)
+                @test isisometric(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+
+                N = @constinferred left_null(t; trunc = (; atol = 100 * eps(norm(t))))
+                @test isisometric(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+
+                Nᴴ = @constinferred right_null(t; alg = :svd)
+                @test isisometric(Nᴴ; side = :right)
+                @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
+
+                Nᴴ = @constinferred right_null(t; trunc = (; atol = 100 * eps(norm(t))))
+                @test isisometric(Nᴴ; side = :right)
+                @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
+            end
+
+            # empty tensor
+            for T in eltypes, t in (
+                        AMDGPU.rand(T, W, zerospace(V1)),
+                        AMDGPU.rand(T, zerospace(V1), W),
+                    )
+                U, S, Vᴴ = @constinferred svd_full(t)
+                @test U * S * Vᴴ ≈ t
+                @test isunitary(U)
+                @test isunitary(Vᴴ)
+
+                U, S, Vᴴ = @constinferred svd_full(t, DefaultAlgorithm())
+                @test U * S * Vᴴ ≈ t
+                @test isunitary(U)
+                @test isunitary(Vᴴ)
+
+                U, S, Vᴴ = @constinferred svd_compact(t)
+                @test U * S * Vᴴ ≈ t
+                @test dim(U) == dim(S) == dim(Vᴴ) == dim(t) == 0
+
+                U, S, Vᴴ = @constinferred svd_compact(t, DefaultAlgorithm())
+                @test U * S * Vᴴ ≈ t
+                @test dim(U) == dim(S) == dim(Vᴴ) == dim(t) == 0
+            end
+        end
+
+        @testset "truncated SVD" begin
+            for T in eltypes,
+                    t in (
+                        AMDGPU.randn(T, W, W), AMDGPU.randn(T, W, W)',
+                        AMDGPU.randn(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), AMDGPU.randn(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))',
+                        AMDGPU.randn(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), AMDGPU.randn(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))',
+                        #DiagonalTensorMap(AMDGPU.randn(T, reduceddim(V1)), V1),
+                    )
+
+                @constinferred normalize!(t)
+
+                U, S, Vᴴ, ϵ = @constinferred svd_trunc(t; trunc = notrunc())
+                @test U * S * Vᴴ ≈ t
+                @test ϵ ≈ 0
+                @test isisometric(U)
+                @test isisometric(Vᴴ; side = :right)
+
+                U, S, Vᴴ, ϵ = @constinferred svd_trunc(t, DefaultAlgorithm(; trunc = notrunc()))
+                @test U * S * Vᴴ ≈ t
+                @test ϵ ≈ 0
+                @test isisometric(U)
+                @test isisometric(Vᴴ; side = :right)
+
+                # dimension of S is a float for IsingBimodule
+                nvals = round(Int, dim(domain(S)) / 2)
+                trunc = truncrank(nvals)
+                U1, S1, Vᴴ1, ϵ1 = @constinferred svd_trunc(t; trunc)
+                @test t * Vᴴ1' ≈ U1 * S1
+                @test isisometric(U1)
+                @test isisometric(Vᴴ1; side = :right)
+                @test norm(t - U1 * S1 * Vᴴ1) ≈ ϵ1 atol = eps(real(T))^(4 / 5)
+                @test abs(dim(domain(S1)) - nvals) ≤ maximum(c -> blockdim(domain(t), c), blocksectors(t); init = 1)
+
+                λ = minimum(diagview(S1))
+                trunc = trunctol(; atol = λ - 10eps(λ))
+                U2, S2, Vᴴ2, ϵ2 = @constinferred svd_trunc(t; trunc)
+                @test t * Vᴴ2' ≈ U2 * S2
+                @test isisometric(U2)
+                @test isisometric(Vᴴ2; side = :right)
+                @test norm(t - U2 * S2 * Vᴴ2) ≈ ϵ2 atol = eps(real(T))^(4 / 5)
+                @test minimum(diagview(S1)) >= λ
+                @test U2 ≈ U1
+                @test S2 ≈ S1
+                @test Vᴴ2 ≈ Vᴴ1
+                @test ϵ1 ≈ ϵ2
+
+                trunc = truncspace(space(S2, 1))
+                U3, S3, Vᴴ3, ϵ3 = @constinferred svd_trunc(t; trunc)
+                @test t * Vᴴ3' ≈ U3 * S3
+                @test isisometric(U3)
+                @test isisometric(Vᴴ3; side = :right)
+                @test norm(t - U3 * S3 * Vᴴ3) ≈ ϵ3 atol = eps(real(T))^(4 / 5)
+                @test space(S3, 1) ≾ space(S2, 1)
+
+                for trunc in (truncerror(; atol = ϵ2), truncerror(; rtol = ϵ2 / norm(t)))
+                    U4, S4, Vᴴ4, ϵ4 = @constinferred svd_trunc(t; trunc)
+                    @test t * Vᴴ4' ≈ U4 * S4
+                    @test isisometric(U4)
+                    @test isisometric(Vᴴ4; side = :right)
+                    @test norm(t - U4 * S4 * Vᴴ4) ≈ ϵ4 atol = eps(real(T))^(4 / 5)
+                    @test ϵ4 ≤ ϵ2
+                end
+
+                trunc = truncrank(nvals) & trunctol(; atol = λ - 10eps(λ))
+                U5, S5, Vᴴ5, ϵ5 = @constinferred svd_trunc(t; trunc)
+                @test t * Vᴴ5' ≈ U5 * S5
+                @test isisometric(U5)
+                @test isisometric(Vᴴ5; side = :right)
+                @test norm(t - U5 * S5 * Vᴴ5) ≈ ϵ5 atol = eps(real(T))^(4 / 5)
+                @test minimum(diagview(S5)) >= λ
+                @test abs(dim(domain(S5)) - nvals) ≤ maximum(c -> blockdim(domain(t), c), blocksectors(t); init = 1)
+            end
+        end
+
+        @testset "Eigenvalue decomposition" begin
+            for T in eltypes,
+                    t in (
+                        AMDGPU.rand(T, V1, V1),
+                        AMDGPU.rand(T, W, W),
+                        AMDGPU.rand(T, W, W)',
+                        DiagonalTensorMap(AMDGPU.rand(T, reduceddim(V1)), V1),
+                    )
+
+                # ROCSOLVER does not provide geev
+                nvals = round(Int, dim(domain(t)) / 2)
+                #=d, v = @constinferred eig_full(t)
+                @test t * v ≈ v * d
+
+                d, v = @constinferred eig_full(t, DefaultAlgorithm())
+                @test t * v ≈ v * d
+
+                d′ = @constinferred eig_vals(t)
+                @test parent(d′) ≈ parent(diagview(d))
+                @test d′ isa TensorKit.SectorVector
+
+                d′ = @constinferred eig_vals(t, DefaultAlgorithm())
+                @test parent(d′) ≈ parent(diagview(d))
+                @test d′ isa TensorKit.SectorVector
+
+                d2 = @constinferred DiagonalTensorMap(d′)
+                @test d2 ≈ d
+
+                vdv = project_hermitian!(v' * v)
+                @test @constinferred isposdef(vdv)
+                t isa DiagonalTensorMap || @test !isposdef(t) # unlikely for non-hermitian map
+
+                d, v = @constinferred eig_trunc(t; trunc = truncrank(nvals))
+                @test t * v ≈ v * d
+                #test_dim_isapprox(domain(d), nvals)
+
+                d, v = @constinferred eig_trunc(t, DefaultAlgorithm(; trunc = truncrank(nvals)))
+                @test t * v ≈ v * d
+                #test_dim_isapprox(domain(d), nvals)=#
+
+                t2 = @constinferred project_hermitian(t)
+                D, V = eigen(t2)
+                @test isisometric(V)
+                D̃, Ṽ = @constinferred eigh_full(t2)
+                @test D ≈ D̃
+                @test V ≈ Ṽ
+
+                D̃, Ṽ = @constinferred eigh_full(t2, DefaultAlgorithm())
+                @test D ≈ D̃
+                @test V ≈ Ṽ
+
+                λ = minimum(real, parent(diagview(D)))
+                @test cond(Ṽ) ≈ one(real(T))
+                @test isposdef(t2) == isposdef(λ)
+                @test isposdef(t2 - λ * one(t2) + 0.1 * one(t2))
+                @test !isposdef(t2 - λ * one(t2) - 0.1 * one(t2))
+
+                d, v = @constinferred eigh_full(t2)
+                @test t2 * v ≈ v * d
+                @test isunitary(v)
+
+                d′ = @constinferred eigh_vals(t2)
+                @test parent(d′) ≈ parent(diagview(d))
+                @test d′ isa TensorKit.SectorVector
+
+                d′ = @constinferred eigh_vals(t2, DefaultAlgorithm())
+                @test parent(d′) ≈ parent(diagview(d))
+                @test d′ isa TensorKit.SectorVector
+
+                λ = minimum(real, parent(diagview(d)))
+                @test cond(v) ≈ one(real(T))
+                @test isposdef(t2) == isposdef(λ)
+                @test isposdef(t2 - λ * one(t) + 0.1 * one(t2))
+                @test !isposdef(t2 - λ * one(t) - 0.1 * one(t2))
+
+                d, v = @constinferred eigh_trunc(t2; trunc = truncrank(nvals))
+                @test t2 * v ≈ v * d
+                #test_dim_isapprox(domain(d), nvals)
+
+                d, v = @constinferred eigh_trunc(t2, DefaultAlgorithm(; trunc = truncrank(nvals)))
+                @test t2 * v ≈ v * d
+                #test_dim_isapprox(domain(d), nvals)
+            end
+        end
+
+        @testset "Condition number and rank" begin
+            for T in eltypes,
+                    t in (
+                        AMDGPU.rand(T, W, W), AMDGPU.rand(T, W, W)',
+                        AMDGPU.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), AMDGPU.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))',
+                        AMDGPU.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), AMDGPU.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))',
+                        DiagonalTensorMap(AMDGPU.rand(T, reduceddim(V1)), V1),
+                    )
+
+                d1, d2 = dim(codomain(t)), dim(domain(t))
+                r = rank(t)
+                @test r ≈ min(d1, d2)
+                @test typeof(r) == typeof(d1)
+                M = left_null(t)
+                @test @constinferred(rank(M)) + r ≈ d1
+                Mᴴ = right_null(t)
+                @test rank(Mᴴ) + r ≈ d2
+            end
+            for T in eltypes
+                u = unitary(T, V1 ⊗ V2, V1 ⊗ V2)
+                @test @constinferred(cond(u)) ≈ one(real(T))
+                @test @constinferred(rank(u)) == dim(V1 ⊗ V2)
+
+                t = AMDGPU.rand(T, zerospace(V1), W)
+                @test rank(t) == 0
+                t2 = AMDGPU.rand(T, zerospace(V1) * zerospace(V2), zerospace(V1) * zerospace(V2))
+                @test rank(t2) == 0
+                @test cond(t2) == 0.0
+            end
+            #=for T in eltypes, t in (
+                        AMDGPU.rand(T, W, W),
+                        AMDGPU.rand(T, W, W)',
+                    )
+                project_hermitian!(t)
+                vals = @constinferred LinearAlgebra.eigvals(t)
+                λmax = maximum(abs, parent(vals))
+                λmin = minimum(abs, parent(vals))
+                @test cond(t) ≈ λmax / λmin
+            end=# # ROCSOLVER doesn't provide geev
+        end
+
+        @testset "Hermitian projections" begin
+            for T in eltypes,
+                    t in (
+                        AMDGPU.rand(T, V1, V1),
+                        AMDGPU.rand(T, W, W),
+                        AMDGPU.rand(T, W, W)',
+                        DiagonalTensorMap(AMDGPU.rand(T, reduceddim(V1)), V1),
+                    )
+                normalize!(t)
+                noisefactor = eps(real(T))^(3 / 4)
+
+                th = (t + t') / 2
+                ta = (t - t') / 2
+                tc = copy(t)
+
+                th′ = @constinferred project_hermitian(t)
+                @test ishermitian(th′)
+                @test th′ ≈ th
+
+                th′ = @constinferred project_hermitian(t, DefaultAlgorithm())
+                @test ishermitian(th′)
+                @test th′ ≈ th
+
+                @test t == tc
+                th_approx = th + noisefactor * ta
+                @test !ishermitian(th_approx) || (T <: Real && t isa DiagonalTensorMap)
+                @test ishermitian(th_approx; atol = 10 * noisefactor)
+
+                ta′ = project_antihermitian(t)
+                @test isantihermitian(ta′)
+                @test ta′ ≈ ta
+
+                ta′ = @constinferred project_antihermitian(t, DefaultAlgorithm())
+                @test isantihermitian(ta′)
+                @test ta′ ≈ ta
+
+                @test t == tc
+                ta_approx = ta + noisefactor * th
+                @test !isantihermitian(ta_approx)
+                @test isantihermitian(ta_approx; atol = 10 * noisefactor) || (T <: Real && t isa DiagonalTensorMap)
+            end
+        end
+
+        @testset "Isometric projections" begin
+            for T in eltypes,
+                    t in (
+                        AMDGPU.randn(T, W, W),
+                        AMDGPU.randn(T, W, W)',
+                        AMDGPU.randn(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'),
+                        AMDGPU.randn(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))',
+                    )
+                t2 = project_isometric(t)
+                @test isisometric(t2)
+                t2′ = @constinferred project_isometric(t, DefaultAlgorithm())
+                @test isisometric(t2′)
+                @test t2′ * ((t2′)' * t) ≈ t
+
+                t3 = project_isometric(t2)
+                @test t3 ≈ t2 # stability of the projection
+                @test t2 * (t2' * t) ≈ t
+
+                tc = similar(t)
+                t3 = @constinferred project_isometric!(copy!(tc, t), t2)
+                @test t3 === t2
+                @test isisometric(t2)
+
+                # test that t2 is closer to A then any other isometry
+                for k in 1:10
+                    δt = AMDGPU.randn!(similar(t))
+                    t3 = project_isometric(t + δt / 100)
+                    @test norm(t - t3) > norm(t - t2)
+                end
+            end
+        end
+    end
+end

From 992164701c8b453f5ad0403cb795260cdf00c317 Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <katharine.hyatt@ugent.be>
Date: Fri, 26 Jun 2026 21:21:37 +0200
Subject: [PATCH 4/4] Try Float64

---
 test/amd/factorizations.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/amd/factorizations.jl b/test/amd/factorizations.jl
index b94f8fc60..a4c907d63 100644
--- a/test/amd/factorizations.jl
+++ b/test/amd/factorizations.jl
@@ -10,7 +10,7 @@ const ROCTensorMap = getglobal(AMDGPUExt, :ROCTensorMap)
 using AMDGPU.rocBLAS
 
 spacelist = factorization_spacelist(fast_tests)
-eltypes = (Float32, ComplexF64)
+eltypes = (Float64, ComplexF64)
 
 for V in spacelist
     I = sectortype(first(V))