-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implementation of LU decomposition using data flow tasks #65
base: main
Are you sure you want to change the base?
Changes from 4 commits
e7daf13
186e0de
88f38e2
78e26b9
4bfb148
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
using DataFlowTasks | ||
|
||
const NOPIVOT = VERSION >= v"1.7" ? NoPivot : Val{false} | ||
|
||
const HLU = LU{<:Any,<:HMatrix} | ||
|
@@ -30,9 +32,16 @@ function LinearAlgebra.lu!(M::HMatrix, compressor; threads = use_threads()) | |
nt = Threads.nthreads() | ||
chn = Channel{ACABuffer{T}}(nt) | ||
foreach(i -> put!(chn, ACABuffer(T)), 1:nt) | ||
_lu!(M, compressor, threads, chn) | ||
# wrap the result in the LU structure | ||
return LU(M, Int[], 0) | ||
if (threads) | ||
_lu_threads!(M, compressor, chn) | ||
# wrap the result in the LU structure | ||
d = @dspawn LU(@R(M), Int[], 0) label = "result" | ||
return fetch(d) | ||
else | ||
_lu!(M, compressor, chn) | ||
# wrap the result in the LU structure | ||
return LU(M, Int[], 0) | ||
end | ||
end | ||
|
||
""" | ||
|
@@ -59,7 +68,7 @@ Hierarchical LU factorization. See [`lu!`](@ref) for the available options. | |
""" | ||
LinearAlgebra.lu(M::HMatrix, args...; kwargs...) = lu!(deepcopy(M), args...; kwargs...) | ||
|
||
function _lu!(M::HMatrix, compressor, threads, bufs = nothing) | ||
function _lu!(M::HMatrix, compressor, bufs = nothing) | ||
if isleaf(M) | ||
d = data(M) | ||
@assert d isa Matrix | ||
|
@@ -69,7 +78,7 @@ function _lu!(M::HMatrix, compressor, threads, bufs = nothing) | |
chdM = children(M) | ||
m, n = size(chdM) | ||
for i in 1:m | ||
_lu!(chdM[i, i], compressor, threads, bufs) | ||
_lu!(chdM[i, i], compressor, bufs) | ||
for j in (i+1):n | ||
ldiv!(UnitLowerTriangular(chdM[i, i]), chdM[i, j], compressor, bufs) | ||
rdiv!(chdM[j, i], UpperTriangular(chdM[i, i]), compressor, bufs) | ||
|
@@ -84,6 +93,52 @@ function _lu!(M::HMatrix, compressor, threads, bufs = nothing) | |
return M | ||
end | ||
|
||
function _lu_threads!(M::HMatrix, compressor, bufs = nothing, level = 0, parent = (0, 0)) | ||
if isleaf(M) | ||
@dspawn begin | ||
@RW(M) | ||
d = data(M) | ||
@assert d isa Matrix | ||
lu!(d, NOPIVOT()) | ||
end label = "lu($(parent[1]),$(parent[2]))\nlevel=$(level)" | ||
else | ||
@assert !hasdata(M) | ||
chdM = children(M) | ||
m, n = size(chdM) | ||
for i in 1:m | ||
_lu_threads!(chdM[i, i], compressor, bufs, level + 1, (i, i)) | ||
for j in (i+1):n | ||
@dspawn ldiv!( | ||
UnitLowerTriangular(@R(chdM[i, i])), | ||
@RW(chdM[i, j]), | ||
compressor, | ||
bufs, | ||
) label = "ldiv($i,$j)\nlevel=$(level+1)" | ||
@dspawn rdiv!( | ||
@RW(chdM[j, i]), | ||
UpperTriangular(@R(chdM[i, i])), | ||
compressor, | ||
bufs, | ||
) label = "rdiv($j,$i)\nlevel=$(level+1)" | ||
end | ||
for j in (i+1):m | ||
for k in (i+1):n | ||
@dspawn hmul!( | ||
@RW(chdM[j, k]), | ||
@R(chdM[j, i]), | ||
@R(chdM[i, k]), | ||
-1, | ||
1, | ||
compressor, | ||
bufs, | ||
) label = "hmul($j,$k)\nlevel=$(level+1)" | ||
end | ||
end | ||
end | ||
end | ||
return M | ||
end | ||
Comment on lines
+96
to
+140
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems fine as a first implementation, but this level of granularity is not going to be sufficient for a good parallel scaling (I think). You probably need to spawn task a finer scale inside the We should probably start by looking at the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, you are right. I am planning to use higher level of granularity and to spawn tasks inside ldiv!, rdiv! and hmul!. |
||
|
||
function LinearAlgebra.ldiv!(A::HLU, y::AbstractVector; global_index = true) | ||
p = A.factors # underlying data | ||
ctree = coltree(p) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think for most use cases, we can simply:
A
andB
have a common rootA
andB
When they don't have a common root, we can (probably?) assume they don't overlap?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If they don't have a common root it is not nesseccery they don't overlap. For example, we can have this situation:
M = Matrix(...)
H = HMatrix(M, ...)
H1 = HMatrix(M, ...)
Then even we check intersections of H and H1 we need to compare their data anyway. But we can assume that the H and H1 are built correctly(without using one data matrix for more than one HMatrix) and then remove the loops altogether.