Merge branch 'master' into multicomponents

JuliaMolSim · Dec 21, 2023 · 1683462 · 1683462
2 parents 500978a + ecdfa73
commit 1683462
Show file tree

Hide file tree

Showing 46 changed files with 865 additions and 435 deletions.
diff --git a/docs/src/developer/setup.md b/docs/src/developer/setup.md
@@ -72,6 +72,7 @@ For instance, to launch core functionality tests, one can use
 using TestEnv       # Optional: automatically installs required packages
 TestEnv.activate()  # for tests in a temporary environment.
 using TestItemRunner
+cd("test")          # By default, the following macro runs everything from the parent folder.
 @run_package_tests filter = ti -> :core ∈ ti.tags
 ```
 Or to only run the tests of a particular file `serialisation.jl` use

diff --git a/src/DFTK.jl b/src/DFTK.jl
@@ -27,6 +27,7 @@ export mpi_nprocs
 export mpi_master
 export setup_threading, disable_threading
 include("common/timer.jl")
+include("common/constants.jl")
 include("common/ortho.jl")
 include("common/types.jl")
 include("common/spherical_bessels.jl")
@@ -57,6 +58,7 @@ export ElementGaussian
 export charge_nuclear
 export charge_ionic
 export atomic_symbol
+export atomic_mass
 export n_elec_valence
 export n_elec_core
 include("elements.jl")
@@ -207,7 +209,6 @@ export compute_forces_cart
 include("postprocess/forces.jl")
 export compute_stresses_cart
 include("postprocess/stresses.jl")
-include("postprocess/phonon.jl")
 export compute_dos
 export compute_ldos
 export plot_dos
@@ -219,6 +220,11 @@ include("response/chi0.jl")
 include("response/hessian.jl")
 export compute_current
 include("postprocess/current.jl")
+export phonon_modes
+export phonon_modes_cart
+export compute_dynmat
+export compute_dynmat_cart
+include("postprocess/phonon.jl")
 
 # Workarounds
 include("workarounds/dummy_inplace_fft.jl")

diff --git a/src/Model.jl b/src/Model.jl
@@ -351,7 +351,10 @@ recip_vector_red_to_cart(model::Model, vec) = recip_vector_red_to_cart(model)(ve
 recip_vector_cart_to_red(model::Model, vec) = recip_vector_cart_to_red(model)(vec)
 
 #=
-Transformations on vectors and covectors are matrices and comatrices.
+Transformations on vectors and covectors are matrices and comatrices, i.e. matrices act on
+a vector to give a vector and comatrices act on covector to give a covector.
+Beware that some quantities (e.g., the force constant matrix) map vectors to covectors and
+are therefore not matrices nor comatrices; they fall outside of these methods.
 
 Consider two covectors f and g related by a transformation matrix B. In reduced
 coordinates g_red = B_red f_red and in cartesian coordinates we want g_cart = B_cart f_cart.

diff --git a/src/PlaneWaveBasis.jl b/src/PlaneWaveBasis.jl
@@ -129,37 +129,48 @@ Base.Broadcast.broadcastable(basis::PlaneWaveBasis) = Ref(basis)
 
 Base.eltype(::PlaneWaveBasis{T}) where {T} = T
 
+
+function Kpoint(spin::Integer, coordinate::AbstractVector{<:Real},
+                recip_lattice::AbstractMatrix{T}, fft_size, Ecut;
+                variational=true, architecture::AbstractArchitecture) where {T}
+    mapping = Int[]
+    Gvecs_k = Vec3{Int}[]
+    k = Vec3{T}(coordinate)
+    # provide a rough hint so that the arrays don't have to be resized so much
+    n_guess = div(prod(fft_size), 8)
+    sizehint!(mapping, n_guess)
+    sizehint!(Gvecs_k, n_guess)
+    for (i, G) in enumerate(G_vectors(fft_size))
+        if !variational || norm2(recip_lattice * (G + k)) / 2 ≤ Ecut
+            push!(mapping, i)
+            push!(Gvecs_k, G)
+        end
+    end
+    Gvecs_k = to_device(architecture, Gvecs_k)
+
+    mapping_inv = Dict(ifull => iball for (iball, ifull) in enumerate(mapping))
+    Kpoint(spin, k, mapping, mapping_inv, Gvecs_k)
+end
+function Kpoint(basis::PlaneWaveBasis, coordinate::AbstractVector, spin::Int)
+    Kpoint(spin, coordinate, basis.model.recip_lattice, basis.fft_size, basis.Ecut;
+           basis.variational, basis.architecture)
+end
+
 @timing function build_kpoints(model::Model{T}, fft_size, kcoords, Ecut;
                                variational=true,
                                architecture::AbstractArchitecture) where {T}
-    kpoints_per_spin = [Kpoint[] for _ = 1:model.n_spin_components]
-    for k in kcoords
-        k = Vec3{T}(k)  # rationals are sloooow
-        mapping = Int[]
-        Gvecs_k = Vec3{Int}[]
-        # provide a rough hint so that the arrays don't have to be resized so much
-        n_guess = div(prod(fft_size), 8)
-        sizehint!(mapping, n_guess)
-        sizehint!(Gvecs_k, n_guess)
-        for (i, G) in enumerate(G_vectors(fft_size))
-            if !variational || norm2(model.recip_lattice * (G + k)) / 2 ≤ Ecut
-                push!(mapping, i)
-                push!(Gvecs_k, G)
-            end
-        end
-        Gvecs_k = to_device(architecture, Gvecs_k)
-
-        mapping_inv = Dict(ifull => iball for (iball, ifull) in enumerate(mapping))
-        for iσ = 1:model.n_spin_components
-            push!(kpoints_per_spin[iσ],
-                  Kpoint(iσ, k, mapping, mapping_inv, Gvecs_k))
+    # Build all k-points for the first spin
+    kpoints_spin_1 = [Kpoint(1, k, model.recip_lattice, fft_size, Ecut;
+                             variational, architecture)
+                      for k in kcoords]
+    all_kpoints = similar(kpoints_spin_1, 0)
+    for iσ = 1:model.n_spin_components
+        for kpt in kpoints_spin_1
+            push!(all_kpoints, Kpoint(iσ, kpt.coordinate,
+                                      kpt.mapping, kpt.mapping_inv, kpt.G_vectors))
         end
     end
-    vcat(kpoints_per_spin...)  # put all spin up first, then all spin down
-end
-function build_kpoints(basis::PlaneWaveBasis, kcoords)
-    build_kpoints(basis.model, basis.fft_size, kcoords, basis.Ecut;
-                  variational=basis.variational, basis.architecture)
+    all_kpoints
 end
 
 # Lowest-level constructor, should not be called directly.

diff --git a/src/common/constants.jl b/src/common/constants.jl
@@ -0,0 +1,2 @@
+# Phonons modes are usually given in cm⁻¹.
+const hartree_to_cm⁻¹ = ustrip(u"cm^-1", 1u"bohr^-1") ./ austrip(1u"c0") / 2π
diff --git a/src/common/threading.jl b/src/common/threading.jl
@@ -21,3 +21,22 @@ function disable_threading()
     @assert n_julia == 1  # To exit in non-master MPI nodes
     setup_threading(;n_fft=1, n_blas=1)
 end
+
+# Parallelization loop breaking range into chunks.
+function parallel_loop_over_range(fun, storages::AbstractVector, range)
+    chunk_length = cld(length(range), Threads.nthreads())
+
+    iszero(chunk_length) && return storages
+
+    @sync for (ichunk, chunk) in enumerate(Iterators.partition(range, chunk_length))
+        Threads.@spawn for idc in chunk  # spawn a task per chunk
+            fun(storages[ichunk], idc)
+        end
+    end
+
+    return storages
+end
+function parallel_loop_over_range(fun, allocate_local_storage::Function, range)
+    storages = [allocate_local_storage() for _ = 1:Threads.nthreads()]
+    parallel_loop_over_range(fun, storages, range)
+end
diff --git a/src/densities.jl b/src/densities.jl
@@ -14,45 +14,34 @@ using an optional `occupation_threshold`. By default all occupation numbers are
 """
 @views @timing function compute_density(basis::PlaneWaveBasis{T}, ψ, occupation;
                                         occupation_threshold=zero(T)) where {T}
-    S = promote_type(T, real(eltype(ψ[1])))
-    # occupation should be on the CPU as we are going to be doing scalar indexing.
-    occupation = [to_cpu(oc) for oc in occupation]
+    Tρ = promote_type(T, real(eltype(ψ[1])))
+    n_components = basis.model.n_components
 
+    # Occupation should be on the CPU as we are going to be doing scalar indexing.
+    occupation = [to_cpu(oc) for oc in occupation]
     mask_occ = [findall(occnk -> abs(occnk) ≥ occupation_threshold, occk)
                 for occk in occupation]
-    if all(isempty, mask_occ)  # No non-zero occupations => return zero density
-        ρ = zeros_like(basis.G_vectors, S, basis.fft_size..., basis.model.n_spin_components)
-    else
-        # we split the total iteration range (ik, n) in chunks, and parallelize over them
-        ik_n = [(ik, n) for ik = 1:length(basis.kpoints) for n = mask_occ[ik]]
-        chunk_length = cld(length(ik_n), Threads.nthreads())
 
-        # chunk-local variables
-        ρ_chunklocal = map(1:Threads.nthreads()) do i
-            zeros_like(basis.G_vectors, S, basis.fft_size..., basis.model.n_spin_components)
-        end
-        ψnk_real_chunklocal = [zeros_like(basis.G_vectors, complex(S),
-                                           basis.model.n_components, basis.fft_size...)
-                                for _ = 1:Threads.nthreads()]
-
-        @sync for (ichunk, chunk) in enumerate(Iterators.partition(ik_n, chunk_length))
-            Threads.@spawn for (ik, n) in chunk  # spawn a task per chunk
-                ρ_loc = ρ_chunklocal[ichunk]
-                ψnk_real = ψnk_real_chunklocal[ichunk]
-                kpt = basis.kpoints[ik]
-
-                ifft!(ψnk_real, basis, kpt, ψ[ik][:, :, n])
-                for σ = 1:basis.model.n_components
-                    ρ_loc[:, :, :, kpt.spin] .+= (occupation[ik][n] .* basis.kweights[ik]
-                                                  .* abs2.(ψnk_real[σ, :, :, :]))
-
-                end
-                synchronize_device(basis.architecture)
-            end
+    function allocate_local_storage()
+        (; ρ=zeros_like(basis.G_vectors, Tρ, basis.fft_size..., basis.model.n_spin_components),
+         ψnk_real=zeros_like(basis.G_vectors, complex(Tρ), n_components, basis.fft_size...))
+    end
+    # We split the total iteration range (ik, n) in chunks, and parallelize over them.
+    range = [(ik, n) for ik = 1:length(basis.kpoints) for n = mask_occ[ik]]
+
+    storages = parallel_loop_over_range(allocate_local_storage, range) do storage, kn
+        (ik, n) = kn
+        kpt = basis.kpoints[ik]
+
+        ifft!(storage.ψnk_real, basis, kpt, ψ[ik][:, :, n])
+        for σ = 1:n_components
+            storage.ρ[:, :, :, kpt.spin] .+= (occupation[ik][n] .* basis.kweights[ik]
+                                                  .* abs2.(storage.ψnk_real[σ, :, :, :]))
         end
 
-        ρ = sum(ρ_chunklocal)
+        synchronize_device(basis.architecture)
     end
+    ρ = sum(getfield.(storages, :ρ))
 
     mpi_sum!(ρ, basis.comm_kpts)
     ρ = symmetrize_ρ(basis, ρ; do_lowpass=false)
@@ -62,18 +51,62 @@ using an optional `occupation_threshold`. By default all occupation numbers are
     negtol = max(sqrt(eps(T)), 10occupation_threshold)
     minimum(ρ) < -negtol && @warn("Negative ρ detected", min_ρ=minimum(ρ))
 
-    ρ
+    ρ::AbstractArray{Tρ, 4}
 end
 
 # Variation in density corresponding to a variation in the orbitals and occupations.
-@views @timing function compute_δρ(basis::PlaneWaveBasis{T}, ψ, δψ,
-                                   occupation, δoccupation=zero.(occupation);
-                                   occupation_threshold=zero(T)) where {T}
-    ForwardDiff.derivative(zero(T)) do ε
-        ψ_ε   = [ψk   .+ ε .* δψk   for (ψk,   δψk)   in zip(ψ, δψ)]
-        occ_ε = [occk .+ ε .* δocck for (occk, δocck) in zip(occupation, δoccupation)]
-        compute_density(basis, ψ_ε, occ_ε; occupation_threshold)
+@views @timing function compute_δρ(basis::PlaneWaveBasis{T}, ψ, δψ, occupation,
+                                   δoccupation=zero.(occupation);
+                                   occupation_threshold=zero(T), q=zero(Vec3{T})) where {T}
+    Tψ = promote_type(T, eltype(ψ[1]))
+    # δρ is expected to be real when computations are not phonon-related.
+    Tδρ = iszero(q) ? real(Tψ) : Tψ
+    real_qzero = iszero(q) ? real : identity
+    n_components = basis.model.n_components
+
+    ordering(kdata) = kdata[k_to_equivalent_kpq_permutation(basis, q)]
+
+    # occupation should be on the CPU as we are going to be doing scalar indexing.
+    occupation = [to_cpu(oc) for oc in occupation]
+    mask_occ = [findall(occnk -> abs(occnk) ≥ occupation_threshold, occk)
+                for occk in occupation]
+
+    function allocate_local_storage()
+        (; δρ=zeros_like(basis.G_vectors, Tδρ, basis.fft_size..., basis.model.n_spin_components),
+          ψnk_real=zeros_like(basis.G_vectors, Tψ, n_components, basis.fft_size...),
+         δψnk_real=zeros_like(basis.G_vectors, Tψ, n_components, basis.fft_size...))
     end
+    range = [(ik, n) for ik = 1:length(basis.kpoints) for n = mask_occ[ik]]
+
+    storages = parallel_loop_over_range(allocate_local_storage, range) do storage, kn
+        (ik, n) = kn
+
+        kpt = basis.kpoints[ik]
+        ifft!(storage.ψnk_real, basis, kpt, ψ[ik][:, :, n])
+        # We return the δψk in the basis k+q which are associated to a displacement of the ψk.
+        kpt_plus_q, δψk_plus_q = kpq_equivalent_blochwave_to_kpq(basis, kpt, q,
+                                                                 ordering(δψ)[ik])
+        # The perturbation of the density
+        #   |ψ_nk|² is 2 ψ_{n,k} * δψ_{n,k+q}.
+        ifft!(storage.δψnk_real, basis, kpt_plus_q, δψk_plus_q[:, :, n])
+
+        for σ = 1:basis.model.n_components
+            storage.δρ[:, :, :, kpt.spin] .+= real_qzero(
+                2 .* occupation[ik][n] .* basis.kweights[ik]
+                                       .* conj(storage.ψnk_real[σ, :, :, :])
+                                       .* storage.δψnk_real[σ, :, :, :]
+                 .+ δoccupation[ik][n] .* basis.kweights[ik]
+                                       .* abs2.(storage.ψnk_real[σ, :, :, :]))
+        end
+
+        synchronize_device(basis.architecture)
+    end
+    δρ = sum(getfield.(storages, :δρ))
+
+    mpi_sum!(δρ, basis.comm_kpts)
+    δρ = symmetrize_ρ(basis, δρ; do_lowpass=false)
+
+    δρ
 end
 
 @views @timing function compute_kinetic_energy_density(basis::PlaneWaveBasis{TT}, ψ,
@@ -82,12 +115,12 @@ end
     T = promote_type(TT, real(eltype(ψ[1])))
     τ = similar(ψ[1], T, (basis.fft_size..., basis.model.n_spin_components))
     τ .= 0
-    dαψnk_real = zeros(complex(T), basis.fft_size)
+    dασψnk_real = zeros(complex(T), basis.fft_size)
     for (ik, kpt) in enumerate(basis.kpoints)
         G_plus_k = [[Gk[α] for Gk in Gplusk_vectors_cart(basis, kpt)] for α = 1:3]
-        for n = 1:size(ψ[ik], 3), α = 1:3, σ = 1:basis.model.n_components
-            ifft!(dαψnk_real, basis, kpt, im .* G_plus_k[α] .* ψ[ik][σ, :, n])
-            @. τ[:, :, :, kpt.spin] += occupation[ik][n] * basis.kweights[ik] / 2 * abs2(dαψnk_real)
+        for n = 1:size(ψ[ik], 3), σ = 1:basis.model.n_components, α = 1:3
+            ifft!(dασψnk_real, basis, kpt, im .* G_plus_k[α] .* ψ[ik][σ, :, n])
+            @. τ[:, :, :, kpt.spin] += occupation[ik][n] * basis.kweights[ik] / 2 * abs2(dασψnk_real)
         end
     end
     mpi_sum!(τ, basis.comm_kpts)

diff --git a/src/density_methods.jl b/src/density_methods.jl
@@ -157,7 +157,7 @@ function atomic_density_superposition(basis::PlaneWaveBasis{T},
         ρ_iG / sqrt(model.unit_cell_volume)
     end
     ρ = to_device(basis.architecture, ρ_cpu)
-    enforce_real!(basis, ρ)  # Symmetrize Fourier coeffs to have real iFFT
+    enforce_real!(ρ, basis)  # Symmetrize Fourier coeffs to have real iFFT
     irfft(basis, ρ)
 end
 

diff --git a/src/eigen/preconditioners.jl b/src/eigen/preconditioners.jl
@@ -83,10 +83,11 @@ ldiv!(P::PreconditionerTPA, R) = ldiv!(R, P, R)
 end
 (Base.:*)(P::PreconditionerTPA, R) = mul!(copy(R), P, R)
 
-function precondprep!(P::PreconditionerTPA, X)
+function precondprep!(P::PreconditionerTPA, X::AbstractArray)
     P.mean_kin = map(eachcol(X)) do x
         x = from_composite_σG(P.basis, P.kpoint, x)
         sum(real(dot(x[σ, :, :], Diagonal(P.kin), x[σ, :, :]))
             for σ = 1:P.basis.model.n_components)
     end
 end
+precondprep!(P::PreconditionerTPA, ::Nothing) = 1  # fallback for edge cases