From f7a3913a4bea0471ecb1755e0b5d7efa2ff7248a Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Thu, 26 Mar 2026 08:48:37 -0400 Subject: [PATCH 1/8] More tests for CUDA + MPS --- .buildkite/pipeline.yml | 4 +- ext/MPSKitAdaptExt.jl | 2 + src/operators/abstractmpo.jl | 10 +- src/operators/jordanmpotensor.jl | 5 +- src/operators/mpo.jl | 2 +- src/operators/mpohamiltonian.jl | 30 +++-- src/operators/ortho.jl | 21 +++- test/gpu/cuda/operators.jl | 187 +++++++++++++++++++++++++++++++ test/gpu/cuda/states.jl | 43 ++++++- 9 files changed, 276 insertions(+), 28 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 0fb57f719..31348c888 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -15,7 +15,7 @@ steps: queue: "juliagpu" cuda: "*" if: build.message !~ /\[skip tests\]/ - timeout_in_minutes: 30 + timeout_in_minutes: 60 - label: "Julia LTS -- CUDA" plugins: @@ -30,4 +30,4 @@ steps: queue: "juliagpu" cuda: "*" if: build.message !~ /\[skip tests\]/ - timeout_in_minutes: 30 + timeout_in_minutes: 60 diff --git a/ext/MPSKitAdaptExt.jl b/ext/MPSKitAdaptExt.jl index 6f0b0c7f2..78f4b41af 100644 --- a/ext/MPSKitAdaptExt.jl +++ b/ext/MPSKitAdaptExt.jl @@ -35,5 +35,7 @@ end MPSKit.JordanMPOTensor(space(W), adapt(to, W.A), adapt(to, W.B), adapt(to, W.C), adapt(to, W.D)) @inline Adapt.adapt_structure(to, mpo::MPOHamiltonian) = MPOHamiltonian(map(x -> adapt(to, x), mpo.W)) +@inline Adapt.adapt_structure(to, ml::MPSKit.Multiline) = MPSKit.Multiline(map(adapt(to), ml.data)) +@inline Adapt.adapt_structure(to, pa::MPSKit.PeriodicArray) = MPSKit.PeriodicArray(map(adapt(to), pa.data)) end diff --git a/src/operators/abstractmpo.jl b/src/operators/abstractmpo.jl index 41e5ea576..fb4c57e43 100644 --- a/src/operators/abstractmpo.jl +++ b/src/operators/abstractmpo.jl @@ -99,7 +99,7 @@ Base.:\(α::Number, mpo::AbstractMPO) = scale(mpo, inv(α)) function VectorInterface.scale(mpo::AbstractMPO, α::Number) T = VectorInterface.promote_scale(scalartype(mpo), scalartype(α)) - dst = similar(mpo, T) + dst = similar(mpo, TensorKit.similarstoragetype(storagetype(mpo), T)) return scale!(dst, mpo, α) end @@ -153,7 +153,11 @@ Compute the mpo tensor that arises from multiplying MPOs. """ function fuse_mul_mpo(O1, O2) TT = promote_type(scalartype(O1), scalartype(O2)) - T = TensorKit.similarstoragetype(storagetype(O1), TT) + T = if O1 isa BraidingTensor + TensorKit.similarstoragetype(storagetype(O2), TT) + else + TensorKit.similarstoragetype(storagetype(O1), TT) + end F_left = fuser(T, left_virtualspace(O2), left_virtualspace(O1)) F_right = fuser(T, right_virtualspace(O2), right_virtualspace(O1)) return _fuse_mpo_mpo(O1, O2, F_left, F_right) @@ -190,7 +194,7 @@ end function add_physical_charge(O::MPOTensor, charge::Sector) sectortype(O) === typeof(charge) || throw(SectorMismatch()) auxspace = Vect[typeof(charge)](charge => 1)' - F = fuser(scalartype(O), physicalspace(O), auxspace) + F = fuser(storagetype(O), physicalspace(O), auxspace) @plansor O_charged[-1 -2; -3 -4] := F[-2; 1 2] * O[-1 1; 4 3] * τ[3 2; 5 -4] * conj(F[-3; 4 5]) return O_charged diff --git a/src/operators/jordanmpotensor.jl b/src/operators/jordanmpotensor.jl index e4f77d8d7..1fb13fecf 100644 --- a/src/operators/jordanmpotensor.jl +++ b/src/operators/jordanmpotensor.jl @@ -205,7 +205,8 @@ end for f in (:real, :complex) @eval function Base.$f(W::JordanMPOTensor) E = $f(scalartype(W)) - W′ = JordanMPOTensor{E}(undef, space(W)) + TE = TensorKit.similarstoragetype(TensorKit.storagetype(W), E) + W′ = similar(W, TE) for (I, v) in nonzero_pairs(W.A) W′.A[I] = $f(v) end @@ -242,7 +243,7 @@ end elseif 1 < i < size(W, 1) && 1 < j < size(W, 4) return W.A[i - 1, 1, 1, j - 1] else - return zeros(scalartype(W), eachspace(W)[i, 1, 1, j]) + return zeros(storagetype(W), eachspace(W)[i, 1, 1, j]) end end diff --git a/src/operators/mpo.jl b/src/operators/mpo.jl index 8630a0d56..7bf0d16ca 100644 --- a/src/operators/mpo.jl +++ b/src/operators/mpo.jl @@ -62,7 +62,7 @@ eachsite(mpo::InfiniteMPO) = PeriodicArray(eachindex(mpo)) function Base.similar(mpo::MPO{<:MPOTensor}, ::Type{O}, L::Int) where {O <: MPOTensor} return MPO(similar(parent(mpo), O, L)) end -function Base.similar(mpo::MPO, ::Type{T}) where {T <: Number} +function Base.similar(mpo::MPO, ::Type{T}) where {T <: Union{Number, DenseVector}} return MPO(similar.(parent(mpo), T)) end diff --git a/src/operators/mpohamiltonian.jl b/src/operators/mpohamiltonian.jl index a2e39848f..c4b4ccd99 100644 --- a/src/operators/mpohamiltonian.jl +++ b/src/operators/mpohamiltonian.jl @@ -59,19 +59,19 @@ function InfiniteMPOHamiltonian(Ws::AbstractVector{O}) where {O <: MPOTensor} end """ - FiniteMPOHamiltonian(Ws::Vector{<:Matrix}) + FiniteMPOHamiltonian(Ws::Vector{<:AbstractMatrix}) Create a `FiniteMPOHamiltonian` from a vector of matrices, such that `Ws[i][j, k]` represents the operator at site `i`, left level `j` and right level `k`. Here, the entries can be either `MPOTensor`, `Missing` or `Number`. """ -function FiniteMPOHamiltonian(Ws::Vector{<:Matrix}) +function FiniteMPOHamiltonian(Ws::Vector{<:AbstractMatrix}) T = promote_type(_split_mpoham_types.(Ws)...) W = jordanmpotensortype(T) return FiniteMPOHamiltonian{W}(Ws) end -function FiniteMPOHamiltonian{O}(W_mats::Vector{<:Matrix}) where {O <: JordanMPOTensor} - T = scalartype(O) +function FiniteMPOHamiltonian{O}(W_mats::Vector{<:AbstractMatrix}) where {O <: JordanMPOTensor} + T = storagetype(O) L = length(W_mats) # initialize sumspaces S = spacetype(O) @@ -157,12 +157,12 @@ Create a `InfiniteMPOHamiltonian` from a vector of matrices, such that `Ws[i][j, the the operator at site `i`, left level `j` and right level `k`. Here, the entries can be either `MPOTensor`, `Missing` or `Number`. """ -function InfiniteMPOHamiltonian(Ws::Vector{<:Matrix}) +function InfiniteMPOHamiltonian(Ws::Vector{<:AbstractMatrix}) T = promote_type(_split_mpoham_types.(Ws)...) TW = jordanmpotensortype(T) return InfiniteMPOHamiltonian{TW}(Ws) end -function InfiniteMPOHamiltonian{O}(W_mats::Vector{<:Matrix}) where {O <: MPOTensor} +function InfiniteMPOHamiltonian{O}(W_mats::Vector{<:AbstractMatrix}) where {O <: MPOTensor} # InfiniteMPOHamiltonian only works for square matrices: for W_mat in W_mats size(W_mat, 1) == size(W_mat, 2) || @@ -172,7 +172,7 @@ function InfiniteMPOHamiltonian{O}(W_mats::Vector{<:Matrix}) where {O <: MPOTens throw(ArgumentError("matrices should have the same size")) nlvls = size(W_mats[1], 1) - T = scalartype(O) + T = storagetype(O) L = length(W_mats) # initialize sumspaces S = spacetype(O) @@ -504,10 +504,8 @@ function InfiniteMPOHamiltonian(lattice′::AbstractArray{<:VectorSpace}, local_ end # partial sort by interaction range - local_mpos = sort!( - map(Base.Fix1(instantiate_operator, lattice), collect(local_operators)); - by = x -> length(x[1]) - ) + unsorted_mpos = map(Base.Fix1(instantiate_operator, lattice), [local_operators...]) + local_mpos = sort!(unsorted_mpos; by = x -> length(x[1])) for (sites, local_mpo) in local_mpos local key_R # trick to define key_R before the first iteration @@ -703,8 +701,8 @@ end function Base.similar(H::MPOHamiltonian, ::Type{O}, L::Int) where {O <: MPOTensor} return MPOHamiltonian(similar(parent(H), O, L)) end -function Base.similar(H::MPOHamiltonian, ::Type{T}) where {T <: Number} - return MPOHamiltonian(similar.(parent(H), T)) +function Base.similar(H::MPOHamiltonian, ::Type{TorA}) where {TorA <: Union{Number, DenseVector}} + return MPOHamiltonian(similar.(parent(H), TorA)) end # Linear Algebra @@ -729,7 +727,7 @@ function Base.:+( ⊞(Vtriv, right_virtualspace(A), Vtriv) V = Vleft ⊗ physicalspace(A) ← physicalspace(A) ⊗ Vright - H[i] = eltype(H)(V, A, B, C, D) + H[i] = JordanMPOTensor(V, A, B, C, D) end return FiniteMPOHamiltonian(H) end @@ -751,7 +749,7 @@ function Base.:+( Vright = ⊞(Vtriv, right_virtualspace(A), Vtriv) V = Vleft ⊗ physicalspace(A) ← physicalspace(A) ⊗ Vright - H[i] = eltype(H)(V, A, B, C, D) + H[i] = JordanMPOTensor(V, A, B, C, D) end return InfiniteMPOHamiltonian(H) end @@ -821,7 +819,7 @@ function Base.:*(H::FiniteMPOHamiltonian, mps::FiniteMPS) A, tensormaptype( spacetype(mps), numout(eltype(mps)), numin(eltype(mps)), - promote_type(scalartype(H), scalartype(mps)) + promote_type(storagetype(H), storagetype(mps)) ) ) # left to middle diff --git a/src/operators/ortho.jl b/src/operators/ortho.jl index b48d12ef1..b5c5ef180 100644 --- a/src/operators/ortho.jl +++ b/src/operators/ortho.jl @@ -11,7 +11,14 @@ function left_canonicalize!( d = sqrt(dim(P)) # orthogonalize second column against first - WI = removeunit(W[1, 1, 1, 1], 1) + Wi = W[1, 1, 1, 1] + if Wi isa BraidingTensor + Wi′ = removeunit(Wi, 1) + WI = similar(Wi′, TensorKit.storagetype(H)) + copy!(WI, Wi′) + else + WI = removeunit(Wi, 1) + end @plansor t[l; r] := conj(WI[p; p' l]) * W.C[p; p' r] # TODO: the following is currently broken due to a TensorKit bug # @plansor C′[p; p' r] := W.C[p; p' r] - WI[p; p' l] * t[l; r] @@ -99,7 +106,14 @@ function right_canonicalize!( d = sqrt(dim(P)) # orthogonalize second row against last - WI = removeunit(W[end, 1, 1, end], 4) + Wi = W[end, 1, 1, end] + if Wi isa BraidingTensor + Wi′ = removeunit(Wi, 4) + WI = similar(Wi′, TensorKit.storagetype(H)) + copy!(WI, Wi′) + else + WI = removeunit(Wi, 4) + end @plansor t[l; r] := conj(WI[r p; p']) * W.B[l p; p'] # TODO: the following is currently broken due to a TensorKit bug # @plansor B′[l p; p'] := W.B[l p; p'] - WI[r p; p'] * t[l; r] @@ -124,7 +138,8 @@ function right_canonicalize!( end H[i] = JordanMPOTensor(V ⊗ P ← domain(W), Q1, Q2, W.C, W.D) else - tmp = transpose(cat(insertleftunit(B′, 4), W.A; dims = 4), ((1,), (3, 4, 2))) + B′′ = insertleftunit(B′, 4) + tmp = transpose(cat(B′′, W.A; dims = 4), ((1,), (3, 4, 2))) R, Q = right_orth!(tmp; alg) if dim(R) == 0 V = _rightunit ⊞ _rightunit diff --git a/test/gpu/cuda/operators.jl b/test/gpu/cuda/operators.jl index cf42a4e7b..f8331104d 100644 --- a/test/gpu/cuda/operators.jl +++ b/test/gpu/cuda/operators.jl @@ -83,3 +83,190 @@ using Adapt, CUDA, cuTENSOR @test dot(mpomps₁, mpomps₁) ≈ dot(mpo₁, mpo₁) end + +@testset "Finite CuMPOHamiltonian T ($T), V ($(spacetype(V)))" for T in (Float64, ComplexF64), V in (ℂ^2, U1Space(-1 => 1, 0 => 1, 1 => 1)) + L = 3 + lattice = fill(V, L) + O₁ = randn(T, V, V) + O₁ += O₁' + E = id(CuVector{T, CUDA.DeviceMemory}, domain(O₁)) + O₂ = randn(T, V^2 ← V^2) + O₂ += O₂' + + dO₁ = adapt(CuVector{T, CUDA.DeviceMemory}, O₁) + dO₂ = adapt(CuVector{T, CUDA.DeviceMemory}, O₂) + hH1 = FiniteMPOHamiltonian(lattice, i => O₁ for i in 1:L) + hH2 = FiniteMPOHamiltonian(lattice, (i, i + 1) => O₂ for i in 1:(L - 1)) + hH3 = FiniteMPOHamiltonian(lattice, 1 => O₁, (2, 3) => O₂, (1, 3) => O₂) + H1 = adapt(CuVector{T, CUDA.DeviceMemory}, hH1) + H2 = adapt(CuVector{T, CUDA.DeviceMemory}, hH2) + H3 = adapt(CuVector{T, CUDA.DeviceMemory}, hH3) + @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory} + @test TensorKit.storagetype(H2) == CuVector{T, CUDA.DeviceMemory} + @test TensorKit.storagetype(H3) == CuVector{T, CUDA.DeviceMemory} + + @test scalartype(H1) == scalartype(H2) == scalartype(H3) == T + if !(T <: Complex) + for H in (H1, H2, H3) + Hc = @constinferred complex(H) + @test scalartype(Hc) == complex(T) + @test TensorKit.storagetype(Hc) == CuVector{complex(T), CUDA.DeviceMemory} + end + end + + # check if constructor works by converting back to tensormap + hH1_tm = convert(TensorMap, hH1) + H1_tm = convert(TensorMap, H1) + @test TensorKit.storagetype(H1_tm) == CuVector{T, CUDA.DeviceMemory} + operators = vcat(fill(E, L - 1), dO₁) + @test H1_tm ≈ mapreduce(+, 1:L) do i + return reduce(⊗, circshift(operators, i)) + end + operators = vcat(fill(E, L - 2), dO₂) + @test convert(TensorMap, H2) ≈ mapreduce(+, 1:(L - 1)) do i + return reduce(⊗, circshift(operators, i)) + end + @test convert(TensorMap, H3) ≈ + dO₁ ⊗ E ⊗ E + E ⊗ dO₂ + permute(dO₂ ⊗ E, ((1, 3, 2), (4, 6, 5))) + + # check if adding terms on the same site works + single_terms = Iterators.flatten(Iterators.repeated((i => O₁ / 2 for i in 1:L), 2)) + H4 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, single_terms)) + @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} + @test H4 ≈ H1 atol = 1.0e-6 + double_terms = Iterators.flatten( + Iterators.repeated(((i, i + 1) => O₂ / 2 for i in 1:(L - 1)), 2) + ) + H5 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, double_terms)) + @test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory} + @test H5 ≈ H2 atol = 1.0e-6 + + # test linear algebra + @test H1 ≈ + adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, 1 => O₁)) + + adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, 2 => O₁)) + + adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, 3 => O₁)) + @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory} + + H1′a = 0.8 * H1 + @test TensorKit.storagetype(H1′a) == CuVector{T, CUDA.DeviceMemory} + H1′b = 0.2 * H1 + @test TensorKit.storagetype(H1′b) == CuVector{T, CUDA.DeviceMemory} + H1′ = H1′a + H1′b + @test TensorKit.storagetype(H1′) == CuVector{T, CUDA.DeviceMemory} + @test H1′ ≈ H1 atol = 1.0e-6 + @test convert(TensorMap, H1 + H2) ≈ convert(TensorMap, H1) + convert(TensorMap, H2) atol = 1.0e-6 + H1_trunc = changebonds(H1, SvdCut(; trscheme = truncrank(0))) + @test H1_trunc ≈ H1 + @test all(left_virtualspace(H1_trunc) .== left_virtualspace(H1)) + # test dot and application + hstate = rand(T, prod(lattice)) + state = adapt(CuVector{T, CUDA.DeviceMemory}, hstate) + @test TensorKit.storagetype(state) == CuVector{T, CUDA.DeviceMemory} + hmps = FiniteMPS(hstate) + mps = adapt(CuVector{T, CUDA.DeviceMemory}, hmps) + @test TensorKit.storagetype(mps) == CuVector{T, CUDA.DeviceMemory} + @test norm(mps) ≈ norm(state) + @test norm(H1) ≈ norm(H1_tm) + + @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory} + hH1mps = hH1 * hmps + H1mps = H1 * mps + @test TensorKit.storagetype(H1mps) == CuVector{T, CUDA.DeviceMemory} + hH1tmstate = hH1_tm * hstate + H1tmstate = H1_tm * state + @test TensorKit.storagetype(H1tmstate) == CuVector{T, CUDA.DeviceMemory} + @test norm(H1mps) ≈ norm(H1tmstate) + + H1mpstm = convert(TensorMap, H1mps) + @test TensorKit.storagetype(H1mpstm) == CuVector{T, CUDA.DeviceMemory} + @test H1mpstm ≈ H1tmstate + @test H1 * state ≈ H1tmstate + + H2mps = H2 * mps + hH2mps = hH2 * hmps + @test TensorKit.storagetype(H2mps) == CuVector{T, CUDA.DeviceMemory} + @test dot(mps, H2, mps) ≈ dot(mps, H2mps) + @test dot(hmps, hH2mps) ≈ dot(mps, H2mps) + @test dot(mps, H2, mps) ≈ dot(hmps, hH2mps) + # test constructor from dictionary with mixed linear and Cartesian lattice indices as keys + grid = square = fill(V, 3, 3) + + local_operators = Dict((I,) => O₁ for I in eachindex(grid)) + I_vertical = CartesianIndex(1, 0) + vertical_operators = Dict( + (I, I + I_vertical) => O₂ for I in eachindex(IndexCartesian(), square) if I[1] < size(square, 1) + ) + I_horizontal = CartesianIndex(0, 1) + horizontal_operators = Dict( + (I, I + I_horizontal) => O₂ for I in eachindex(IndexCartesian(), square) if I[2] < size(square, 1) + ) + operators = merge(local_operators, vertical_operators, horizontal_operators) + H4 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, operators)) + @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} + + @test H4 ≈ + adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, local_operators)) + + adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, vertical_operators)) + + adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, horizontal_operators)) atol = 1.0e-4 + @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} + + H4′ = H4 / 3 + 2H4 / 3 + @test TensorKit.storagetype(H4′) == CuVector{T, CUDA.DeviceMemory} + H5 = changebonds(H4′, SvdCut(; trscheme = trunctol(; atol = 1.0e-12))) + @test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory} + # needed here to avoid real * complex multiplication in cuTENSOR, which isn't supported + psi = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPS(T, physicalspace(H5), V ⊕ rightunitspace(V))) + @test expectation_value(psi, H4) ≈ expectation_value(psi, H5) +end + +pspaces = (ℙ^4, Rep[U₁](0 => 2), Rep[SU₂](1 => 1)) +vspaces = (ℙ^10, Rep[U₁]((0 => 20)), Rep[SU₂](1 // 2 => 10, 3 // 2 => 5, 5 // 2 => 1)) + +@testset "CuInfiniteMPOHamiltonian $(sectortype(pspace))" for (pspace, Dspace) in zip(pspaces, vspaces) + # generate a 1-2-3 body interaction + operators = ntuple(3) do i + O = rand(ComplexF64, pspace^i, pspace^i) + return O += O' + end + + H1 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPOHamiltonian(operators[1])) + H2 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPOHamiltonian(operators[2])) + H3 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, repeat(InfiniteMPOHamiltonian(operators[3]), 2)) + + @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory} + @test TensorKit.storagetype(H2) == CuVector{ComplexF64, CUDA.DeviceMemory} + @test TensorKit.storagetype(H3) == CuVector{ComplexF64, CUDA.DeviceMemory} + # make a teststate to measure expectation values for + ψ1 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPS([pspace], [Dspace])) + ψ2 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPS([pspace, pspace], [Dspace, Dspace])) + @test TensorKit.storagetype(ψ1) == CuVector{ComplexF64, CUDA.DeviceMemory} + @test TensorKit.storagetype(ψ2) == CuVector{ComplexF64, CUDA.DeviceMemory} + + e1 = expectation_value(ψ1, H1) + e2 = expectation_value(ψ1, H2) + + H1 = 2 * H1 + @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory} + H1 -= [1] + @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory} + @test e1 * 2 - 1 ≈ expectation_value(ψ1, H1) atol = 1.0e-10 + + H1 = H1 + H2 + @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory} + @test e1 * 2 + e2 - 1 ≈ expectation_value(ψ1, H1) atol = 1.0e-10 + + H1 = repeat(H1, 2) + @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory} + + e1 = expectation_value(ψ2, H1) + e3 = expectation_value(ψ2, H3) + + @test e1 + e3 ≈ expectation_value(ψ2, H1 + H3) atol = 1.0e-10 + + H4 = H1 + H3 + @test TensorKit.storagetype(H4) == CuVector{ComplexF64, CUDA.DeviceMemory} + h4 = H4 * H4 + @test TensorKit.storagetype(h4) == CuVector{ComplexF64, CUDA.DeviceMemory} + @test real(expectation_value(ψ2, H4)) >= 0 +end diff --git a/test/gpu/cuda/states.jl b/test/gpu/cuda/states.jl index 39627ac41..a37712209 100644 --- a/test/gpu/cuda/states.jl +++ b/test/gpu/cuda/states.jl @@ -4,7 +4,7 @@ using MPSKit: GeometryStyle, InfiniteChainStyle, TransferMatrix using TensorKit using TensorKit: ℙ using Adapt, CUDA, cuTENSOR - +#= @testset "CuMPS ($(sectortype(D)), $elt)" for (D, d, elt) in [(ℙ^10, ℙ^2, ComplexF64), (Rep[U₁](1 => 3), Rep[U₁](0 => 1), ComplexF64)] tol = Float64(eps(real(elt)) * 100) @@ -50,3 +50,44 @@ using Adapt, CUDA, cuTENSOR @test norm(2 * ψ + ψ - 3 * ψ) ≈ 0.0 atol = sqrt(eps(real(ComplexF64))) end + +@testset "CuMultilineMPS ($(sectortype(D)), $elt)" for (D, d, elt) in + [(ℙ^10, ℙ^2, ComplexF64), (Rep[U₁](1 => 3), Rep[U₁](0 => 1), ComplexF32)] + tol = Float64(eps(real(elt)) * 100) + ψ = adapt( + CuVector{elt, CUDA.DeviceMemory}, MultilineMPS( + [ + rand(elt, D * d, D) rand(elt, D * d, D) + rand(elt, D * d, D) rand(elt, D * d, D) + ]; tol + ) + ) + + @test GeometryStyle(typeof(ψ)) == InfiniteChainStyle() + @test GeometryStyle(ψ) == InfiniteChainStyle() + @test TensorKit.storagetype(ψ) == CuVector{elt, CUDA.DeviceMemory} + @test TensorKit.sectortype(ψ) == sectortype(D) + + @test !isfinite(typeof(ψ)) + + @test physicalspace(ψ) == fill(d, 2, 2) + @test all(x -> x ≾ D, left_virtualspace(ψ)) + @test all(x -> x ≾ D, right_virtualspace(ψ)) + + for i in 1:size(ψ, 1), j in 1:size(ψ, 2) + @plansor difference[-1 -2; -3] := ψ.AL[i, j][-1 -2; 1] * ψ.C[i, j][1; -3] - + ψ.C[i, j - 1][-1; 1] * ψ.AR[i, j][1 -2; -3] + @test norm(difference, Inf) < tol * 10 + + @test l_LL(ψ, i, j) * TransferMatrix(ψ.AL[i, j], ψ.AL[i, j]) ≈ l_LL(ψ, i, j + 1) + @test l_LR(ψ, i, j) * TransferMatrix(ψ.AL[i, j], ψ.AR[i, j]) ≈ l_LR(ψ, i, j + 1) + @test l_RL(ψ, i, j) * TransferMatrix(ψ.AR[i, j], ψ.AL[i, j]) ≈ l_RL(ψ, i, j + 1) + @test l_RR(ψ, i, j) * TransferMatrix(ψ.AR[i, j], ψ.AR[i, j]) ≈ l_RR(ψ, i, j + 1) + + @test TransferMatrix(ψ.AL[i, j], ψ.AL[i, j]) * r_LL(ψ, i, j) ≈ r_LL(ψ, i, j + 1) + @test TransferMatrix(ψ.AL[i, j], ψ.AR[i, j]) * r_LR(ψ, i, j) ≈ r_LR(ψ, i, j + 1) + @test TransferMatrix(ψ.AR[i, j], ψ.AL[i, j]) * r_RL(ψ, i, j) ≈ r_RL(ψ, i, j + 1) + @test TransferMatrix(ψ.AR[i, j], ψ.AR[i, j]) * r_RR(ψ, i, j) ≈ r_RR(ψ, i, j + 1) + end +end +=# From d90faf650ea47462897a700d72dfa64badce798e Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 10 Jun 2026 09:38:36 +0200 Subject: [PATCH 2/8] Try to fix failing test --- src/operators/mpohamiltonian.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operators/mpohamiltonian.jl b/src/operators/mpohamiltonian.jl index c4b4ccd99..f929d489d 100644 --- a/src/operators/mpohamiltonian.jl +++ b/src/operators/mpohamiltonian.jl @@ -71,7 +71,7 @@ function FiniteMPOHamiltonian(Ws::Vector{<:AbstractMatrix}) return FiniteMPOHamiltonian{W}(Ws) end function FiniteMPOHamiltonian{O}(W_mats::Vector{<:AbstractMatrix}) where {O <: JordanMPOTensor} - T = storagetype(O) + T = scalartype(O) L = length(W_mats) # initialize sumspaces S = spacetype(O) From dcf7dc6bbb496c3a52d7994af3f62e49ad16ca3b Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 10 Jun 2026 10:38:18 +0200 Subject: [PATCH 3/8] Another one --- src/operators/mpohamiltonian.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operators/mpohamiltonian.jl b/src/operators/mpohamiltonian.jl index f929d489d..401064493 100644 --- a/src/operators/mpohamiltonian.jl +++ b/src/operators/mpohamiltonian.jl @@ -172,7 +172,7 @@ function InfiniteMPOHamiltonian{O}(W_mats::Vector{<:AbstractMatrix}) where {O <: throw(ArgumentError("matrices should have the same size")) nlvls = size(W_mats[1], 1) - T = storagetype(O) + T = scalartype(O) L = length(W_mats) # initialize sumspaces S = spacetype(O) From e3ac68cf057aa3f8904494c933fea21cfb1067f4 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 10 Jun 2026 15:16:11 +0200 Subject: [PATCH 4/8] Try restoring abstractmpo --- src/operators/abstractmpo.jl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/operators/abstractmpo.jl b/src/operators/abstractmpo.jl index fb4c57e43..d5b02957d 100644 --- a/src/operators/abstractmpo.jl +++ b/src/operators/abstractmpo.jl @@ -153,11 +153,7 @@ Compute the mpo tensor that arises from multiplying MPOs. """ function fuse_mul_mpo(O1, O2) TT = promote_type(scalartype(O1), scalartype(O2)) - T = if O1 isa BraidingTensor - TensorKit.similarstoragetype(storagetype(O2), TT) - else - TensorKit.similarstoragetype(storagetype(O1), TT) - end + T = TensorKit.promote_storagetype(TT, O1, O2) F_left = fuser(T, left_virtualspace(O2), left_virtualspace(O1)) F_right = fuser(T, right_virtualspace(O2), right_virtualspace(O1)) return _fuse_mpo_mpo(O1, O2, F_left, F_right) From b21fdff93ab67039a51919862f512614975694fa Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Thu, 11 Jun 2026 10:31:17 +0200 Subject: [PATCH 5/8] Simplify similar calls --- src/operators/abstractmpo.jl | 4 ++-- src/operators/jordanmpotensor.jl | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/operators/abstractmpo.jl b/src/operators/abstractmpo.jl index d5b02957d..f66b3cf02 100644 --- a/src/operators/abstractmpo.jl +++ b/src/operators/abstractmpo.jl @@ -99,7 +99,7 @@ Base.:\(α::Number, mpo::AbstractMPO) = scale(mpo, inv(α)) function VectorInterface.scale(mpo::AbstractMPO, α::Number) T = VectorInterface.promote_scale(scalartype(mpo), scalartype(α)) - dst = similar(mpo, TensorKit.similarstoragetype(storagetype(mpo), T)) + dst = similar(mpo, T) return scale!(dst, mpo, α) end @@ -153,7 +153,7 @@ Compute the mpo tensor that arises from multiplying MPOs. """ function fuse_mul_mpo(O1, O2) TT = promote_type(scalartype(O1), scalartype(O2)) - T = TensorKit.promote_storagetype(TT, O1, O2) + T = TensorKit.promote_storagetype(TT, O1, O2) F_left = fuser(T, left_virtualspace(O2), left_virtualspace(O1)) F_right = fuser(T, right_virtualspace(O2), right_virtualspace(O1)) return _fuse_mpo_mpo(O1, O2, F_left, F_right) diff --git a/src/operators/jordanmpotensor.jl b/src/operators/jordanmpotensor.jl index 1fb13fecf..5159bbcd1 100644 --- a/src/operators/jordanmpotensor.jl +++ b/src/operators/jordanmpotensor.jl @@ -205,8 +205,7 @@ end for f in (:real, :complex) @eval function Base.$f(W::JordanMPOTensor) E = $f(scalartype(W)) - TE = TensorKit.similarstoragetype(TensorKit.storagetype(W), E) - W′ = similar(W, TE) + W′ = similar(W, E) for (I, v) in nonzero_pairs(W.A) W′.A[I] = $f(v) end From 63c88422d34e0579a113d9e071499dc8cf628d58 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Thu, 11 Jun 2026 08:57:50 -0400 Subject: [PATCH 6/8] Fix JordanMPOTensor similar --- src/operators/jordanmpotensor.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/operators/jordanmpotensor.jl b/src/operators/jordanmpotensor.jl index 5159bbcd1..af95282cc 100644 --- a/src/operators/jordanmpotensor.jl +++ b/src/operators/jordanmpotensor.jl @@ -133,8 +133,10 @@ end function jordanmpotensortype(::Type{O}) where {O <: AbstractTensorMap} return jordanmpotensortype(spacetype(O), storagetype(O)) end -Base.similar(W::JordanMPOTensor, ::Type{TorA}) where {TorA} = - jordanmpotensortype(spacetype(W), TorA)(undef, space(W)) +function Base.similar(W::JordanMPOTensor, ::Type{T}) where {T <: Number} + TE = TensorKit.similarstoragetype(TensorKit.storagetype(W), T) + return jordanmpotensortype(spacetype(W), TE)(undef, space(W)) +end # Properties # ---------- From 768710ad4b77879256eaf1e7a4519ad4477a89dc Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Thu, 11 Jun 2026 11:28:13 -0400 Subject: [PATCH 7/8] More cleanup --- src/operators/ortho.jl | 16 ++-------------- test/gpu/cuda/operators.jl | 6 ++---- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/src/operators/ortho.jl b/src/operators/ortho.jl index b5c5ef180..a91442213 100644 --- a/src/operators/ortho.jl +++ b/src/operators/ortho.jl @@ -12,13 +12,7 @@ function left_canonicalize!( # orthogonalize second column against first Wi = W[1, 1, 1, 1] - if Wi isa BraidingTensor - Wi′ = removeunit(Wi, 1) - WI = similar(Wi′, TensorKit.storagetype(H)) - copy!(WI, Wi′) - else - WI = removeunit(Wi, 1) - end + WI = removeunit(Wi, 1) @plansor t[l; r] := conj(WI[p; p' l]) * W.C[p; p' r] # TODO: the following is currently broken due to a TensorKit bug # @plansor C′[p; p' r] := W.C[p; p' r] - WI[p; p' l] * t[l; r] @@ -107,13 +101,7 @@ function right_canonicalize!( # orthogonalize second row against last Wi = W[end, 1, 1, end] - if Wi isa BraidingTensor - Wi′ = removeunit(Wi, 4) - WI = similar(Wi′, TensorKit.storagetype(H)) - copy!(WI, Wi′) - else - WI = removeunit(Wi, 4) - end + WI = removeunit(Wi, 4) @plansor t[l; r] := conj(WI[r p; p']) * W.B[l p; p'] # TODO: the following is currently broken due to a TensorKit bug # @plansor B′[l p; p'] := W.B[l p; p'] - WI[r p; p'] * t[l; r] diff --git a/test/gpu/cuda/operators.jl b/test/gpu/cuda/operators.jl index f8331104d..ca7d6ed86 100644 --- a/test/gpu/cuda/operators.jl +++ b/test/gpu/cuda/operators.jl @@ -87,11 +87,9 @@ end @testset "Finite CuMPOHamiltonian T ($T), V ($(spacetype(V)))" for T in (Float64, ComplexF64), V in (ℂ^2, U1Space(-1 => 1, 0 => 1, 1 => 1)) L = 3 lattice = fill(V, L) - O₁ = randn(T, V, V) - O₁ += O₁' + O₁ = project_hermitian!(randn(T, V ← V)) E = id(CuVector{T, CUDA.DeviceMemory}, domain(O₁)) - O₂ = randn(T, V^2 ← V^2) - O₂ += O₂' + O₂ = project_hermitian!(randn(T, V^2 ← V^2)) dO₁ = adapt(CuVector{T, CUDA.DeviceMemory}, O₁) dO₂ = adapt(CuVector{T, CUDA.DeviceMemory}, O₂) From a9786e4242ab5e66d59bbaf0c772d7bb38b7a090 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 12 Jun 2026 10:10:10 +0200 Subject: [PATCH 8/8] Update pipeline.yml New JuliaGPU agents --- .buildkite/pipeline.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 31348c888..4424686e6 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -12,8 +12,7 @@ steps: - src - ext agents: - queue: "juliagpu" - cuda: "*" + queue: "cuda" if: build.message !~ /\[skip tests\]/ timeout_in_minutes: 60 @@ -27,7 +26,6 @@ steps: - src - ext agents: - queue: "juliagpu" - cuda: "*" + queue: "cuda" if: build.message !~ /\[skip tests\]/ timeout_in_minutes: 60