Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of LU decomposition using data flow tasks #65

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ uuid = "8646bddf-ab1c-4fa7-9c51-ba187d647618"
version = "0.2.8"

[deps]
DataFlowTasks = "d1549cb6-e9f4-42f8-98cc-ffc8d067ff5b"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Expand Down
51 changes: 51 additions & 0 deletions src/hmatrix.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using DataFlowTasks

"""
mutable struct HMatrix{R,T} <: AbstractMatrix{T}

Expand Down Expand Up @@ -156,6 +158,7 @@
@printf(io, "\n\t min number of elements per leaf: %i", minimum(points_per_leaf))
@printf(io, "\n\t max number of elements per leaf: %i", maximum(points_per_leaf))
depth_per_leaf = map(depth, leaves_)
# TODO: the depth of tree is always zero because all leafs are roots
@printf(io, "\n\t depth of tree: %i", maximum(depth_per_leaf))
@printf(io, "\n\t compression ratio: %f\n", compression_ratio(hmat))
return io
Expand Down Expand Up @@ -467,6 +470,54 @@
return H
end

"""
ancestors(H::HMatrix)

Return all ancestors of `H`.
"""
function ancestors(H::HMatrix)
ancestors = []
W = H
while W !== parentnode(W)
W = parentnode(W)
push!(ancestors, W)
end
return ancestors
end

"""
issubmatrix(A::HMatrix, B::HMatrix)

Return `true` if B is a submatrix of A.
"""
function issubmatrix(A::HMatrix, B::HMatrix)
for ancB in ancestors(B)
if A === ancB
return true
end
end
return false
end

function DataFlowTasks.memory_overlap(A::HMatrix, B::HMatrix)
# TODO: compare leaves in more efficient way.
if A === B
return true
elseif issubmatrix(A, B) || issubmatrix(B, A)
return true
end
chdA = leaves(A)
chdB = leaves(B)
for i in eachindex(chdA)
for j in eachindex(chdB)
if data(chdA[i]) === data(chdB[j])
return true

Check warning on line 514 in src/hmatrix.jl

View check run for this annotation

Codecov / codecov/patch

src/hmatrix.jl#L514

Added line #L514 was not covered by tests
Comment on lines +502 to +514
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think for most use cases, we can simply:

  1. Check if A and B have a common root
  2. Check the intersection of the row and column ranges of A and B

When they don't have a common root, we can (probably?) assume they don't overlap?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If they don't have a common root it is not nesseccery they don't overlap. For example, we can have this situation:
M = Matrix(...)
H = HMatrix(M, ...)
H1 = HMatrix(M, ...)
Then even we check intersections of H and H1 we need to compare their data anyway. But we can assume that the H and H1 are built correctly(without using one data matrix for more than one HMatrix) and then remove the loops altogether.

end
end
end
return false
end

############################################################################################
# Recipes
############################################################################################
Expand Down
65 changes: 60 additions & 5 deletions src/lu.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using DataFlowTasks

const NOPIVOT = VERSION >= v"1.7" ? NoPivot : Val{false}

const HLU = LU{<:Any,<:HMatrix}
Expand Down Expand Up @@ -30,9 +32,16 @@ function LinearAlgebra.lu!(M::HMatrix, compressor; threads = use_threads())
nt = Threads.nthreads()
chn = Channel{ACABuffer{T}}(nt)
foreach(i -> put!(chn, ACABuffer(T)), 1:nt)
_lu!(M, compressor, threads, chn)
# wrap the result in the LU structure
return LU(M, Int[], 0)
if (threads)
_lu_threads!(M, compressor, chn)
# wrap the result in the LU structure
d = @dspawn LU(@R(M), Int[], 0) label = "result"
return fetch(d)
else
_lu!(M, compressor, chn)
# wrap the result in the LU structure
return LU(M, Int[], 0)
end
end

"""
Expand All @@ -59,7 +68,7 @@ Hierarchical LU factorization. See [`lu!`](@ref) for the available options.
"""
LinearAlgebra.lu(M::HMatrix, args...; kwargs...) = lu!(deepcopy(M), args...; kwargs...)

function _lu!(M::HMatrix, compressor, threads, bufs = nothing)
function _lu!(M::HMatrix, compressor, bufs = nothing)
if isleaf(M)
d = data(M)
@assert d isa Matrix
Expand All @@ -69,7 +78,7 @@ function _lu!(M::HMatrix, compressor, threads, bufs = nothing)
chdM = children(M)
m, n = size(chdM)
for i in 1:m
_lu!(chdM[i, i], compressor, threads, bufs)
_lu!(chdM[i, i], compressor, bufs)
for j in (i+1):n
ldiv!(UnitLowerTriangular(chdM[i, i]), chdM[i, j], compressor, bufs)
rdiv!(chdM[j, i], UpperTriangular(chdM[i, i]), compressor, bufs)
Expand All @@ -84,6 +93,52 @@ function _lu!(M::HMatrix, compressor, threads, bufs = nothing)
return M
end

function _lu_threads!(M::HMatrix, compressor, bufs = nothing, level = 0, parent = (0, 0))
if isleaf(M)
@dspawn begin
@RW(M)
d = data(M)
@assert d isa Matrix
lu!(d, NOPIVOT())
end label = "lu($(parent[1]),$(parent[2]))\nlevel=$(level)"
else
@assert !hasdata(M)
chdM = children(M)
m, n = size(chdM)
for i in 1:m
_lu_threads!(chdM[i, i], compressor, bufs, level + 1, (i, i))
for j in (i+1):n
@dspawn ldiv!(
UnitLowerTriangular(@R(chdM[i, i])),
@RW(chdM[i, j]),
compressor,
bufs,
) label = "ldiv($i,$j)\nlevel=$(level+1)"
@dspawn rdiv!(
@RW(chdM[j, i]),
UpperTriangular(@R(chdM[i, i])),
compressor,
bufs,
) label = "rdiv($j,$i)\nlevel=$(level+1)"
end
for j in (i+1):m
for k in (i+1):n
@dspawn hmul!(
@RW(chdM[j, k]),
@R(chdM[j, i]),
@R(chdM[i, k]),
-1,
1,
compressor,
bufs,
) label = "hmul($j,$k)\nlevel=$(level+1)"
end
end
end
end
return M
end
Comment on lines +96 to +140
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems fine as a first implementation, but this level of granularity is not going to be sufficient for a good parallel scaling (I think). You probably need to spawn task a finer scale inside the hmul! and ldiv!/rdiv! functions...

We should probably start by looking at the TaskGraph for the current implementation. Could you post one e.g. on the PR?

Copy link
Collaborator Author

@AZEY4 AZEY4 Jun 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you are right. I am planning to use higher level of granularity and to spawn tasks inside ldiv!, rdiv! and hmul!.


function LinearAlgebra.ldiv!(A::HLU, y::AbstractVector; global_index = true)
p = A.factors # underlying data
ctree = coltree(p)
Expand Down
2 changes: 1 addition & 1 deletion src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ function leaves(tree)
end

"""
leaves(tree)
nodes(tree)
AZEY4 marked this conversation as resolved.
Show resolved Hide resolved

Return a vector containing all the nodes of `tree`.
"""
Expand Down
1 change: 1 addition & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[deps]
ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
DataFlowTasks = "d1549cb6-e9f4-42f8-98cc-ffc8d067ff5b"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Expand Down
30 changes: 30 additions & 0 deletions test/hmatrix_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ using StaticArrays
using HMatrices
using LinearAlgebra
using SparseArrays
using DataFlowTasks

include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl"))

Expand Down Expand Up @@ -58,3 +59,32 @@ end
Hnew = axpy!(true, S, deepcopy(H))
@test Matrix(Hnew) == (H_full + Matrix(S))
end

@testset "Memory overlap" begin
m = 1000
T = Float64

X = points_on_sphere(m)
Y = X
K = laplace_matrix(X, X)

X1 = points_on_sphere(m)
Y1 = X1
K1 = laplace_matrix(X1, X1)

splitter = CardinalitySplitter(; nmax = 50)
Xclt = ClusterTree(X, splitter)
Yclt = ClusterTree(Y, splitter)
X1clt = ClusterTree(X1, splitter)
Y1clt = ClusterTree(Y1, splitter)
adm = StrongAdmissibilityStd(3)
comp = PartialACA(; atol = 1e-10)

H = assemble_hmatrix(K, Xclt, Yclt; adm, comp, threads = false, distributed = false)
H1 = assemble_hmatrix(K1, X1clt, Y1clt; adm, comp, threads = false, distributed = false)

# Test memory overlap function
@test DataFlowTasks.memory_overlap(H, H) == true
@test DataFlowTasks.memory_overlap(H, H1) == false
@test DataFlowTasks.memory_overlap(H, H.children[1]) == true
end
2 changes: 1 addition & 1 deletion test/lu_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ adm = StrongAdmissibilityStd(3)
comp = PartialACA(; atol = 1e-10)
for threads in (false, true)
H = assemble_hmatrix(K, Xclt, Yclt; adm, comp, threads, distributed = false)
hlu = lu(H; atol = 1e-10)
hlu = lu(H; atol = 1e-10, threads = threads)
y = rand(m)
M = Matrix(K)
exact = M \ y
Expand Down
Loading