Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
EnzymeTestUtils = "12d8515a-0907-448a-8884-5fe00fdf1c5a"
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"

[extensions]
Expand All @@ -35,6 +36,7 @@ TensorKitChainRulesCoreExt = "ChainRulesCore"
TensorKitEnzymeExt = "Enzyme"
TensorKitEnzymeTestUtilsExt = "EnzymeTestUtils"
TensorKitFiniteDifferencesExt = "FiniteDifferences"
TensorKitGPUArraysExt = "GPUArrays"
TensorKitMooncakeExt = "Mooncake"

[workspace]
Expand All @@ -49,6 +51,7 @@ Dictionaries = "0.4"
Enzyme = "0.13.157"
EnzymeTestUtils = "0.2.8"
FiniteDifferences = "0.12"
GPUArrays = "11.4.1"
LRUCache = "1.6"
LinearAlgebra = "1"
MatrixAlgebraKit = "0.6.8"
Expand Down
3 changes: 1 addition & 2 deletions ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
module TensorKitAMDGPUExt

using AMDGPU, AMDGPU.rocBLAS, AMDGPU.rocSOLVER, LinearAlgebra
using AMDGPU: @allowscalar
import AMDGPU: rand as rocrand, rand! as rocrand!, randn as rocrandn, randn! as rocrandn!

using TensorKit
using TensorKit.Factorizations
using Strided
using MatrixAlgebraKit
using MatrixAlgebraKit: AbstractAlgorithm
using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype, project_symmetric_and_check
using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype
import TensorKit: randisometry
using Base: rand, randn

Expand Down
17 changes: 0 additions & 17 deletions ext/TensorKitAMDGPUExt/roctensormap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,6 @@ function ROCTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂
return ROCTensorMap{T, S, N₁, N₂}(ROCArray{T}(t.data), space(t))
end

# project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy
function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: ROCVector{T}}
h_t = TensorKit.TensorMapWithStorage{T, Vector{T}}(undef, V)
h_t = TensorKit.project_symmetric!(h_t, Array(data))
# verify result
isapprox(Array(reshape(data, dims(h_t))), convert(Array, h_t); atol = tol) ||
throw(ArgumentError("Data has non-zero elements at incompatible positions"))
return TensorKit.TensorMapWithStorage{T, A}(A(h_t.data), V)
end

for (fname, felt) in ((:zeros, :zero), (:ones, :one))
@eval begin
function AMDGPU.$fname(
Expand Down Expand Up @@ -92,13 +82,6 @@ for randfun in (:rocrand, :rocrandn)
end
end

# Scalar implementation
#-----------------------
function TensorKit.scalar(t::ROCTensorMap{T, S, 0, 0}) where {T, S}
inds = findall(!iszero, t.data)
return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)]
end

function Base.convert(
TT::Type{ROCTensorMap{T, S, N₁, N₂}},
t::AbstractTensorMap{<:Any, S, N₁, N₂}
Expand Down
17 changes: 0 additions & 17 deletions ext/TensorKitCUDAExt/TensorKitCUDAExt.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
module TensorKitCUDAExt

using CUDA, CUDA.cuBLAS, CUDA.cuSOLVER, CUDA.cuRAND, LinearAlgebra
using CUDA: @allowscalar
import CUDA.cuRAND: rand as curand, rand! as curand!, randn as curandn, randn! as curandn!
using Strided: StridedViews
using CUDA.CUDACore.KernelAbstractions: @kernel, @index, get_backend

using Adapt: Adapt

Expand All @@ -20,20 +18,5 @@ using TensorKit: MatrixAlgebraKit
using Random

include("cutensormap.jl")
include("truncation.jl")

function TensorKit.fill_braidingsubblock!(data::TD, val) where {T, TD <: Union{<:CuMatrix{T}, <:StridedViews.StridedView{T, 4, <:CuArray{T}}}}
# COV_EXCL_START
# kernels are not reachable by coverage
@kernel function fill_subblock_kernel!(subblock, val)
idx = @index(Global, Cartesian)
idx_val = idx[1] == idx[4] && idx[2] == idx[3] ? val : zero(val)
@inbounds subblock[idx] = idx_val
end
# COV_EXCL_STOP
kernel = fill_subblock_kernel!(get_backend(data))
kernel(data, val; ndrange = size(data))
return data
end

end
17 changes: 0 additions & 17 deletions ext/TensorKitCUDAExt/cutensormap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,6 @@ function CuTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂,
return CuTensorMap{T, S, N₁, N₂}(CuArray{T}(t.data), space(t))
end

# project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy
function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: CuVector{T}}
h_t = TensorKit.TensorMapWithStorage{T, Vector{T}}(undef, V)
h_t = TensorKit.project_symmetric!(h_t, Array(data))
# verify result
isapprox(Array(reshape(data, dims(h_t))), convert(Array, h_t); atol = tol) ||
throw(ArgumentError("Data has non-zero elements at incompatible positions"))
return TensorKit.TensorMapWithStorage{T, A}(A(h_t.data), V)
end

for (fname, felt) in ((:zeros, :zero), (:ones, :one))
@eval begin
function CUDA.$fname(
Expand Down Expand Up @@ -94,13 +84,6 @@ for randfun in (:curand, :curandn)
end
end

# Scalar implementation
#-----------------------
function TensorKit.scalar(t::CuTensorMap{T, S, 0, 0}) where {T, S}
inds = findall(!iszero, t.data)
return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)]
end

function LinearAlgebra.isposdef(t::CuTensorMap)
domain(t) == codomain(t) ||
throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same"))
Expand Down
69 changes: 0 additions & 69 deletions ext/TensorKitCUDAExt/truncation.jl

This file was deleted.

117 changes: 117 additions & 0 deletions ext/TensorKitGPUArraysExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
module TensorKitGPUArraysExt

using GPUArrays
using GPUArrays: @allowscalar
using GPUArrays.KernelAbstractions: @kernel, @index, get_backend

using Strided: StridedViews
using MatrixAlgebraKit, Adapt
using TensorKit
using TensorKit.Factorizations
using TensorKit.Factorizations: AbstractAlgorithm
using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype, project_symmetric_and_check
import TensorKit: randisometry, rand, randn, fill_braidingsubblock!

function TensorKit.fill_braidingsubblock!(data::TD, val) where {T, TD <: Union{<:AnyGPUMatrix{T}, <:StridedViews.StridedView{T, 4, <:AnyGPUArray{T}}}}
# COV_EXCL_START
# kernels are not reachable by coverage
@kernel function fill_subblock_kernel!(subblock, val)
idx = @index(Global, Cartesian)
idx_val = idx[1] == idx[4] && idx[2] == idx[3] ? val : zero(val)
@inbounds subblock[idx] = idx_val
end
# COV_EXCL_STOP
kernel = fill_subblock_kernel!(get_backend(data))
kernel(data, val; ndrange = size(data))
return data
end

const GPUSectorVector{T, I} = TensorKit.SectorVector{T, I, <:AnyGPUVector{T}}

function MatrixAlgebraKit.findtruncated(
values::GPUSectorVector, strategy::MatrixAlgebraKit.TruncationByOrder
)
I = sectortype(values)

dims = similar(values, Base.promote_op(dim, I))
for (c, v) in pairs(dims)
fill!(v, dim(c))
end

isempty(parent(values)) && return similar(values, Bool)

perm = sortperm(parent(values); strategy.by, strategy.rev)
cumulative_dim = cumsum(Base.permute!(parent(dims), perm))

result = similar(values, Bool)
parent(result)[perm] .= cumulative_dim .<= strategy.howmany
return result
end

function MatrixAlgebraKit.findtruncated(
values::GPUSectorVector, strategy::MatrixAlgebraKit.TruncationByError
)
(isfinite(strategy.p) && strategy.p > 0) ||
throw(ArgumentError(lazy"p-norm with p = $(strategy.p) is currently not supported."))
ϵᵖmax = max(strategy.atol^strategy.p, strategy.rtol^strategy.p * norm(values, strategy.p))
ϵᵖ = similar(values, typeof(ϵᵖmax))

# dimensions are all 1 so no need to account for weight
if FusionStyle(sectortype(values)) isa UniqueFusion
parent(ϵᵖ) .= abs.(parent(values)) .^ strategy.p
else
for (c, v) in pairs(values)
v′ = ϵᵖ[c]
v′ .= abs.(v) .^ strategy.p .* dim(c)
end
end

isempty(parent(values)) && return similar(values, Bool)

perm = sortperm(parent(values); by = abs, rev = false)
cumulative_err = cumsum(Base.permute!(parent(ϵᵖ), perm))

result = similar(values, Bool)
parent(result)[perm] .= cumulative_err .> ϵᵖmax
return result
end

function MatrixAlgebraKit.findtruncated_svd(values::GPUSectorVector, strategy::S) where {S <: MatrixAlgebraKit.TruncationStrategy}
# returning a GPUSectorVector wrecks things in truncate_{co}domain
# because of scalar indexing
return Adapt.adapt(Vector, MatrixAlgebraKit.findtruncated(values, strategy))
end

for strat in (:(MatrixAlgebraKit.TruncationByOrder), :(MatrixAlgebraKit.TruncationByError), :(MatrixAlgebraKit.TruncationIntersection), :(TensorKit.Factorizations.TruncationSpace))
@eval function MatrixAlgebraKit.findtruncated_svd(values::GPUSectorVector, strategy::$strat)
# returning a GPUSectorVector wrecks things in truncate_{co}domain
# because of scalar indexing
return Adapt.adapt(Vector, MatrixAlgebraKit.findtruncated(values, strategy))
end
end

function MatrixAlgebraKit.findtruncated_svd(values::GPUSectorVector, strategy::MatrixAlgebraKit.TruncationByValue)
atol = TensorKit.Factorizations.rtol_to_atol(values, strategy.p, strategy.atol, strategy.rtol)
strategy′ = trunctol(; atol, strategy.by, strategy.keep_below)
return SectorDict(c => Adapt.adapt(Vector, MatrixAlgebraKit.findtruncated_svd(d, strategy′)) for (c, d) in pairs(values))
end

# project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy
function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: AnyGPUVector{T}}
h_t = TensorKit.TensorMapWithStorage{T, Vector{T}}(undef, V)
h_t = TensorKit.project_symmetric!(h_t, Array(data))
# verify result
isapprox(Array(reshape(data, dims(h_t))), convert(Array, h_t); atol = tol) ||
throw(ArgumentError("Data has non-zero elements at incompatible positions"))
return TensorKit.TensorMapWithStorage{T, A}(A(h_t.data), V)
end

# Scalar implementation
#-----------------------
function TensorKit.scalar(t::TensorMap{T, S, 0, 0, <:AnyGPUArray}) where {T, S}
inds = findall(!iszero, t.data)
return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)]
end


end
1 change: 0 additions & 1 deletion test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ AllocCheck = "0.2"
ChainRulesTestUtils = "1"
Combinatorics = "1"
cuTENSOR = "6"
GPUArrays = "11.3.1"
JET = "0.9, 0.10, 0.11"
ParallelTestRunner = "2"
Test = "1"
Expand Down
Loading