IntegralEquations · AZEY4 · Jun 21, 2024 · Jun 23, 2024 · Jun 23, 2024 · Jun 23, 2024
diff --git a/Project.toml b/Project.toml
@@ -3,6 +3,7 @@ uuid = "8646bddf-ab1c-4fa7-9c51-ba187d647618"
 version = "0.2.8"
 
 [deps]
+DataFlowTasks = "d1549cb6-e9f4-42f8-98cc-ffc8d067ff5b"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"

diff --git a/src/hmatrix.jl b/src/hmatrix.jl
@@ -1,3 +1,5 @@
+using DataFlowTasks
+
 """
     mutable struct HMatrix{R,T} <: AbstractMatrix{T}
 
@@ -156,6 +158,7 @@
     @printf(io, "\n\t min number of elements per leaf: %i", minimum(points_per_leaf))
     @printf(io, "\n\t max number of elements per leaf: %i", maximum(points_per_leaf))
     depth_per_leaf = map(depth, leaves_)
+    # TODO: the depth of tree is always zero because all leafs are roots
     @printf(io, "\n\t depth of tree: %i", maximum(depth_per_leaf))
     @printf(io, "\n\t compression ratio: %f\n", compression_ratio(hmat))
     return io
@@ -467,6 +470,54 @@
     return H
 end
 
+"""
+    ancestors(H::HMatrix)
+
+Return all ancestors of `H`.
+"""
+function ancestors(H::HMatrix)
+    ancestors = []
+    W = H
+    while W !== parentnode(W)
+        W = parentnode(W)
+        push!(ancestors, W)
+    end
+    return ancestors
+end
+
+"""
+    issubmatrix(A::HMatrix, B::HMatrix)
+
+Return `true` if B is a submatrix of A.
+"""
+function issubmatrix(A::HMatrix, B::HMatrix)
+    for ancB in ancestors(B)
+        if A === ancB
+            return true
+        end
+    end
+    return false
+end
+
+function DataFlowTasks.memory_overlap(A::HMatrix, B::HMatrix)
+    # TODO: compare leaves in more efficient way.
+    if A === B
+        return true
+    elseif issubmatrix(A, B) || issubmatrix(B, A)
+        return true
+    end
+    chdA = leaves(A)
+    chdB = leaves(B)
+    for i in eachindex(chdA)
+        for j in eachindex(chdB)
+            if data(chdA[i]) === data(chdB[j])
+                return true
+            end
+        end
+    end
+    return false
+end
+
 ############################################################################################
 # Recipes
 ############################################################################################

diff --git a/src/lu.jl b/src/lu.jl
@@ -1,3 +1,5 @@
+using DataFlowTasks
+
 const NOPIVOT = VERSION >= v"1.7" ? NoPivot : Val{false}
 
 const HLU = LU{<:Any,<:HMatrix}
@@ -30,9 +32,16 @@ function LinearAlgebra.lu!(M::HMatrix, compressor; threads = use_threads())
     nt = Threads.nthreads()
     chn = Channel{ACABuffer{T}}(nt)
     foreach(i -> put!(chn, ACABuffer(T)), 1:nt)
-    _lu!(M, compressor, threads, chn)
-    # wrap the result in the LU structure
-    return LU(M, Int[], 0)
+    if (threads)
+        _lu_threads!(M, compressor, chn)
+        # wrap the result in the LU structure
+        d = @dspawn LU(@R(M), Int[], 0) label = "result"
+        return fetch(d)
+    else
+        _lu!(M, compressor, chn)
+        # wrap the result in the LU structure
+        return LU(M, Int[], 0)
+    end
 end
 
 """
@@ -59,7 +68,7 @@ Hierarchical LU factorization. See [`lu!`](@ref) for the available options.
 """
 LinearAlgebra.lu(M::HMatrix, args...; kwargs...) = lu!(deepcopy(M), args...; kwargs...)
 
-function _lu!(M::HMatrix, compressor, threads, bufs = nothing)
+function _lu!(M::HMatrix, compressor, bufs = nothing)
     if isleaf(M)
         d = data(M)
         @assert d isa Matrix
@@ -69,7 +78,7 @@ function _lu!(M::HMatrix, compressor, threads, bufs = nothing)
         chdM = children(M)
         m, n = size(chdM)
         for i in 1:m
-            _lu!(chdM[i, i], compressor, threads, bufs)
+            _lu!(chdM[i, i], compressor, bufs)
             for j in (i+1):n
                 ldiv!(UnitLowerTriangular(chdM[i, i]), chdM[i, j], compressor, bufs)
                 rdiv!(chdM[j, i], UpperTriangular(chdM[i, i]), compressor, bufs)
@@ -84,6 +93,52 @@ function _lu!(M::HMatrix, compressor, threads, bufs = nothing)
     return M
 end
 
+function _lu_threads!(M::HMatrix, compressor, bufs = nothing, level = 0, parent = (0, 0))
+    if isleaf(M)
+        @dspawn begin
+            @RW(M)
+            d = data(M)
+            @assert d isa Matrix
+            lu!(d, NOPIVOT())
+        end label = "lu($(parent[1]),$(parent[2]))\nlevel=$(level)"
+    else
+        @assert !hasdata(M)
+        chdM = children(M)
+        m, n = size(chdM)
+        for i in 1:m
+            _lu_threads!(chdM[i, i], compressor, bufs, level + 1, (i, i))
+            for j in (i+1):n
+                @dspawn ldiv!(
+                    UnitLowerTriangular(@R(chdM[i, i])),
+                    @RW(chdM[i, j]),
+                    compressor,
+                    bufs,
+                ) label = "ldiv($i,$j)\nlevel=$(level+1)"
+                @dspawn rdiv!(
+                    @RW(chdM[j, i]),
+                    UpperTriangular(@R(chdM[i, i])),
+                    compressor,
+                    bufs,
+                ) label = "rdiv($j,$i)\nlevel=$(level+1)"
+            end
+            for j in (i+1):m
+                for k in (i+1):n
+                    @dspawn hmul!(
+                        @RW(chdM[j, k]),
+                        @R(chdM[j, i]),
+                        @R(chdM[i, k]),
+                        -1,
+                        1,
+                        compressor,
+                        bufs,
+                    ) label = "hmul($j,$k)\nlevel=$(level+1)"
+                end
+            end
+        end
+    end
+    return M
+end
+
 function LinearAlgebra.ldiv!(A::HLU, y::AbstractVector; global_index = true)
     p = A.factors # underlying data
     ctree = coltree(p)

diff --git a/src/utils.jl b/src/utils.jl
@@ -64,7 +64,7 @@ function leaves(tree)
 end
 
 """
-    leaves(tree)
+    nodes(tree)
 
 Return a vector containing all the nodes of `tree`.
 """

diff --git a/test/Project.toml b/test/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+DataFlowTasks = "d1549cb6-e9f4-42f8-98cc-ffc8d067ff5b"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

diff --git a/test/hmatrix_test.jl b/test/hmatrix_test.jl
@@ -3,6 +3,7 @@ using StaticArrays
 using HMatrices
 using LinearAlgebra
 using SparseArrays
+using DataFlowTasks
 
 include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl"))
 
@@ -58,3 +59,32 @@ end
     Hnew = axpy!(true, S, deepcopy(H))
     @test Matrix(Hnew) == (H_full + Matrix(S))
 end
+
+@testset "Memory overlap" begin
+    m = 1000
+    T = Float64
+
+    X = points_on_sphere(m)
+    Y = X
+    K = laplace_matrix(X, X)
+
+    X1 = points_on_sphere(m)
+    Y1 = X1
+    K1 = laplace_matrix(X1, X1)
+
+    splitter = CardinalitySplitter(; nmax = 50)
+    Xclt = ClusterTree(X, splitter)
+    Yclt = ClusterTree(Y, splitter)
+    X1clt = ClusterTree(X1, splitter)
+    Y1clt = ClusterTree(Y1, splitter)
+    adm = StrongAdmissibilityStd(3)
+    comp = PartialACA(; atol = 1e-10)
+
+    H = assemble_hmatrix(K, Xclt, Yclt; adm, comp, threads = false, distributed = false)
+    H1 = assemble_hmatrix(K1, X1clt, Y1clt; adm, comp, threads = false, distributed = false)
+
+    # Test memory overlap function
+    @test DataFlowTasks.memory_overlap(H, H) == true
+    @test DataFlowTasks.memory_overlap(H, H1) == false
+    @test DataFlowTasks.memory_overlap(H, H.children[1]) == true
+end
diff --git a/test/lu_test.jl b/test/lu_test.jl
@@ -24,7 +24,7 @@ adm = StrongAdmissibilityStd(3)
 comp = PartialACA(; atol = 1e-10)
 for threads in (false, true)
     H = assemble_hmatrix(K, Xclt, Yclt; adm, comp, threads, distributed = false)
-    hlu = lu(H; atol = 1e-10)
+    hlu = lu(H; atol = 1e-10, threads = threads)
     y = rand(m)
     M = Matrix(K)
     exact = M \ y