diff --git a/docs/src/developer/setup.md b/docs/src/developer/setup.md
index 2800ffc0e9..13652a95af 100644
--- a/docs/src/developer/setup.md
+++ b/docs/src/developer/setup.md
@@ -72,6 +72,7 @@ For instance, to launch core functionality tests, one can use
 using TestEnv       # Optional: automatically installs required packages
 TestEnv.activate()  # for tests in a temporary environment.
 using TestItemRunner
+cd("test")          # By default, the following macro runs everything from the parent folder.
 @run_package_tests filter = ti -> :core ∈ ti.tags
 ```
 Or to only run the tests of a particular file `serialisation.jl` use
diff --git a/src/DFTK.jl b/src/DFTK.jl
index d5fa385e95..532a6e06ad 100644
--- a/src/DFTK.jl
+++ b/src/DFTK.jl
@@ -27,6 +27,7 @@ export mpi_nprocs
 export mpi_master
 export setup_threading, disable_threading
 include("common/timer.jl")
+include("common/constants.jl")
 include("common/ortho.jl")
 include("common/types.jl")
 include("common/spherical_bessels.jl")
@@ -57,6 +58,7 @@ export ElementGaussian
 export charge_nuclear
 export charge_ionic
 export atomic_symbol
+export atomic_mass
 export n_elec_valence
 export n_elec_core
 include("elements.jl")
@@ -207,7 +209,6 @@ export compute_forces_cart
 include("postprocess/forces.jl")
 export compute_stresses_cart
 include("postprocess/stresses.jl")
-include("postprocess/phonon.jl")
 export compute_dos
 export compute_ldos
 export plot_dos
@@ -219,6 +220,11 @@ include("response/chi0.jl")
 include("response/hessian.jl")
 export compute_current
 include("postprocess/current.jl")
+export phonon_modes
+export phonon_modes_cart
+export compute_dynmat
+export compute_dynmat_cart
+include("postprocess/phonon.jl")
 
 # Workarounds
 include("workarounds/dummy_inplace_fft.jl")
diff --git a/src/Model.jl b/src/Model.jl
index 2e5c51a43d..07491b100b 100644
--- a/src/Model.jl
+++ b/src/Model.jl
@@ -351,7 +351,10 @@ recip_vector_red_to_cart(model::Model, vec) = recip_vector_red_to_cart(model)(ve
 recip_vector_cart_to_red(model::Model, vec) = recip_vector_cart_to_red(model)(vec)
 
 #=
-Transformations on vectors and covectors are matrices and comatrices.
+Transformations on vectors and covectors are matrices and comatrices, i.e. matrices act on
+a vector to give a vector and comatrices act on covector to give a covector.
+Beware that some quantities (e.g., the force constant matrix) map vectors to covectors and
+are therefore not matrices nor comatrices; they fall outside of these methods.
 
 Consider two covectors f and g related by a transformation matrix B. In reduced
 coordinates g_red = B_red f_red and in cartesian coordinates we want g_cart = B_cart f_cart.
diff --git a/src/PlaneWaveBasis.jl b/src/PlaneWaveBasis.jl
index 6690bcd9ca..2d89cdd5d7 100644
--- a/src/PlaneWaveBasis.jl
+++ b/src/PlaneWaveBasis.jl
@@ -129,37 +129,48 @@ Base.Broadcast.broadcastable(basis::PlaneWaveBasis) = Ref(basis)
 
 Base.eltype(::PlaneWaveBasis{T}) where {T} = T
 
+
+function Kpoint(spin::Integer, coordinate::AbstractVector{<:Real},
+                recip_lattice::AbstractMatrix{T}, fft_size, Ecut;
+                variational=true, architecture::AbstractArchitecture) where {T}
+    mapping = Int[]
+    Gvecs_k = Vec3{Int}[]
+    k = Vec3{T}(coordinate)
+    # provide a rough hint so that the arrays don't have to be resized so much
+    n_guess = div(prod(fft_size), 8)
+    sizehint!(mapping, n_guess)
+    sizehint!(Gvecs_k, n_guess)
+    for (i, G) in enumerate(G_vectors(fft_size))
+        if !variational || norm2(recip_lattice * (G + k)) / 2 ≤ Ecut
+            push!(mapping, i)
+            push!(Gvecs_k, G)
+        end
+    end
+    Gvecs_k = to_device(architecture, Gvecs_k)
+
+    mapping_inv = Dict(ifull => iball for (iball, ifull) in enumerate(mapping))
+    Kpoint(spin, k, mapping, mapping_inv, Gvecs_k)
+end
+function Kpoint(basis::PlaneWaveBasis, coordinate::AbstractVector, spin::Int)
+    Kpoint(spin, coordinate, basis.model.recip_lattice, basis.fft_size, basis.Ecut;
+           basis.variational, basis.architecture)
+end
+
 @timing function build_kpoints(model::Model{T}, fft_size, kcoords, Ecut;
                                variational=true,
                                architecture::AbstractArchitecture) where {T}
-    kpoints_per_spin = [Kpoint[] for _ = 1:model.n_spin_components]
-    for k in kcoords
-        k = Vec3{T}(k)  # rationals are sloooow
-        mapping = Int[]
-        Gvecs_k = Vec3{Int}[]
-        # provide a rough hint so that the arrays don't have to be resized so much
-        n_guess = div(prod(fft_size), 8)
-        sizehint!(mapping, n_guess)
-        sizehint!(Gvecs_k, n_guess)
-        for (i, G) in enumerate(G_vectors(fft_size))
-            if !variational || norm2(model.recip_lattice * (G + k)) / 2 ≤ Ecut
-                push!(mapping, i)
-                push!(Gvecs_k, G)
-            end
-        end
-        Gvecs_k = to_device(architecture, Gvecs_k)
-
-        mapping_inv = Dict(ifull => iball for (iball, ifull) in enumerate(mapping))
-        for iσ = 1:model.n_spin_components
-            push!(kpoints_per_spin[iσ],
-                  Kpoint(iσ, k, mapping, mapping_inv, Gvecs_k))
+    # Build all k-points for the first spin
+    kpoints_spin_1 = [Kpoint(1, k, model.recip_lattice, fft_size, Ecut;
+                             variational, architecture)
+                      for k in kcoords]
+    all_kpoints = similar(kpoints_spin_1, 0)
+    for iσ = 1:model.n_spin_components
+        for kpt in kpoints_spin_1
+            push!(all_kpoints, Kpoint(iσ, kpt.coordinate,
+                                      kpt.mapping, kpt.mapping_inv, kpt.G_vectors))
         end
     end
-    vcat(kpoints_per_spin...)  # put all spin up first, then all spin down
-end
-function build_kpoints(basis::PlaneWaveBasis, kcoords)
-    build_kpoints(basis.model, basis.fft_size, kcoords, basis.Ecut;
-                  variational=basis.variational, basis.architecture)
+    all_kpoints
 end
 
 # Lowest-level constructor, should not be called directly.
diff --git a/src/common/constants.jl b/src/common/constants.jl
new file mode 100644
index 0000000000..14fb937231
--- /dev/null
+++ b/src/common/constants.jl
@@ -0,0 +1,2 @@
+# Phonons modes are usually given in cm⁻¹.
+const hartree_to_cm⁻¹ = ustrip(u"cm^-1", 1u"bohr^-1") ./ austrip(1u"c0") / 2π
diff --git a/src/common/threading.jl b/src/common/threading.jl
index b052affc3a..268688a6b5 100644
--- a/src/common/threading.jl
+++ b/src/common/threading.jl
@@ -21,3 +21,22 @@ function disable_threading()
     @assert n_julia == 1  # To exit in non-master MPI nodes
     setup_threading(;n_fft=1, n_blas=1)
 end
+
+# Parallelization loop breaking range into chunks.
+function parallel_loop_over_range(fun, storages::AbstractVector, range)
+    chunk_length = cld(length(range), Threads.nthreads())
+
+    iszero(chunk_length) && return storages
+
+    @sync for (ichunk, chunk) in enumerate(Iterators.partition(range, chunk_length))
+        Threads.@spawn for idc in chunk  # spawn a task per chunk
+            fun(storages[ichunk], idc)
+        end
+    end
+
+    return storages
+end
+function parallel_loop_over_range(fun, allocate_local_storage::Function, range)
+    storages = [allocate_local_storage() for _ = 1:Threads.nthreads()]
+    parallel_loop_over_range(fun, storages, range)
+end
diff --git a/src/densities.jl b/src/densities.jl
index 45d44036bb..ebab968065 100644
--- a/src/densities.jl
+++ b/src/densities.jl
@@ -14,45 +14,34 @@ using an optional `occupation_threshold`. By default all occupation numbers are
 """
 @views @timing function compute_density(basis::PlaneWaveBasis{T}, ψ, occupation;
                                         occupation_threshold=zero(T)) where {T}
-    S = promote_type(T, real(eltype(ψ[1])))
-    # occupation should be on the CPU as we are going to be doing scalar indexing.
-    occupation = [to_cpu(oc) for oc in occupation]
+    Tρ = promote_type(T, real(eltype(ψ[1])))
+    n_components = basis.model.n_components
 
+    # Occupation should be on the CPU as we are going to be doing scalar indexing.
+    occupation = [to_cpu(oc) for oc in occupation]
     mask_occ = [findall(occnk -> abs(occnk) ≥ occupation_threshold, occk)
                 for occk in occupation]
-    if all(isempty, mask_occ)  # No non-zero occupations => return zero density
-        ρ = zeros_like(basis.G_vectors, S, basis.fft_size..., basis.model.n_spin_components)
-    else
-        # we split the total iteration range (ik, n) in chunks, and parallelize over them
-        ik_n = [(ik, n) for ik = 1:length(basis.kpoints) for n = mask_occ[ik]]
-        chunk_length = cld(length(ik_n), Threads.nthreads())
 
-        # chunk-local variables
-        ρ_chunklocal = map(1:Threads.nthreads()) do i
-            zeros_like(basis.G_vectors, S, basis.fft_size..., basis.model.n_spin_components)
-        end
-        ψnk_real_chunklocal = [zeros_like(basis.G_vectors, complex(S),
-                                           basis.model.n_components, basis.fft_size...)
-                                for _ = 1:Threads.nthreads()]
-
-        @sync for (ichunk, chunk) in enumerate(Iterators.partition(ik_n, chunk_length))
-            Threads.@spawn for (ik, n) in chunk  # spawn a task per chunk
-                ρ_loc = ρ_chunklocal[ichunk]
-                ψnk_real = ψnk_real_chunklocal[ichunk]
-                kpt = basis.kpoints[ik]
-
-                ifft!(ψnk_real, basis, kpt, ψ[ik][:, :, n])
-                for σ = 1:basis.model.n_components
-                    ρ_loc[:, :, :, kpt.spin] .+= (occupation[ik][n] .* basis.kweights[ik]
-                                                  .* abs2.(ψnk_real[σ, :, :, :]))
-
-                end
-                synchronize_device(basis.architecture)
-            end
+    function allocate_local_storage()
+        (; ρ=zeros_like(basis.G_vectors, Tρ, basis.fft_size..., basis.model.n_spin_components),
+         ψnk_real=zeros_like(basis.G_vectors, complex(Tρ), n_components, basis.fft_size...))
+    end
+    # We split the total iteration range (ik, n) in chunks, and parallelize over them.
+    range = [(ik, n) for ik = 1:length(basis.kpoints) for n = mask_occ[ik]]
+
+    storages = parallel_loop_over_range(allocate_local_storage, range) do storage, kn
+        (ik, n) = kn
+        kpt = basis.kpoints[ik]
+
+        ifft!(storage.ψnk_real, basis, kpt, ψ[ik][:, :, n])
+        for σ = 1:n_components
+            storage.ρ[:, :, :, kpt.spin] .+= (occupation[ik][n] .* basis.kweights[ik]
+                                                  .* abs2.(storage.ψnk_real[σ, :, :, :]))
         end
 
-        ρ = sum(ρ_chunklocal)
+        synchronize_device(basis.architecture)
     end
+    ρ = sum(getfield.(storages, :ρ))
 
     mpi_sum!(ρ, basis.comm_kpts)
     ρ = symmetrize_ρ(basis, ρ; do_lowpass=false)
@@ -62,18 +51,62 @@ using an optional `occupation_threshold`. By default all occupation numbers are
     negtol = max(sqrt(eps(T)), 10occupation_threshold)
     minimum(ρ) < -negtol && @warn("Negative ρ detected", min_ρ=minimum(ρ))
 
-    ρ
+    ρ::AbstractArray{Tρ, 4}
 end
 
 # Variation in density corresponding to a variation in the orbitals and occupations.
-@views @timing function compute_δρ(basis::PlaneWaveBasis{T}, ψ, δψ,
-                                   occupation, δoccupation=zero.(occupation);
-                                   occupation_threshold=zero(T)) where {T}
-    ForwardDiff.derivative(zero(T)) do ε
-        ψ_ε   = [ψk   .+ ε .* δψk   for (ψk,   δψk)   in zip(ψ, δψ)]
-        occ_ε = [occk .+ ε .* δocck for (occk, δocck) in zip(occupation, δoccupation)]
-        compute_density(basis, ψ_ε, occ_ε; occupation_threshold)
+@views @timing function compute_δρ(basis::PlaneWaveBasis{T}, ψ, δψ, occupation,
+                                   δoccupation=zero.(occupation);
+                                   occupation_threshold=zero(T), q=zero(Vec3{T})) where {T}
+    Tψ = promote_type(T, eltype(ψ[1]))
+    # δρ is expected to be real when computations are not phonon-related.
+    Tδρ = iszero(q) ? real(Tψ) : Tψ
+    real_qzero = iszero(q) ? real : identity
+    n_components = basis.model.n_components
+
+    ordering(kdata) = kdata[k_to_equivalent_kpq_permutation(basis, q)]
+
+    # occupation should be on the CPU as we are going to be doing scalar indexing.
+    occupation = [to_cpu(oc) for oc in occupation]
+    mask_occ = [findall(occnk -> abs(occnk) ≥ occupation_threshold, occk)
+                for occk in occupation]
+
+    function allocate_local_storage()
+        (; δρ=zeros_like(basis.G_vectors, Tδρ, basis.fft_size..., basis.model.n_spin_components),
+          ψnk_real=zeros_like(basis.G_vectors, Tψ, n_components, basis.fft_size...),
+         δψnk_real=zeros_like(basis.G_vectors, Tψ, n_components, basis.fft_size...))
     end
+    range = [(ik, n) for ik = 1:length(basis.kpoints) for n = mask_occ[ik]]
+
+    storages = parallel_loop_over_range(allocate_local_storage, range) do storage, kn
+        (ik, n) = kn
+
+        kpt = basis.kpoints[ik]
+        ifft!(storage.ψnk_real, basis, kpt, ψ[ik][:, :, n])
+        # We return the δψk in the basis k+q which are associated to a displacement of the ψk.
+        kpt_plus_q, δψk_plus_q = kpq_equivalent_blochwave_to_kpq(basis, kpt, q,
+                                                                 ordering(δψ)[ik])
+        # The perturbation of the density
+        #   |ψ_nk|² is 2 ψ_{n,k} * δψ_{n,k+q}.
+        ifft!(storage.δψnk_real, basis, kpt_plus_q, δψk_plus_q[:, :, n])
+
+        for σ = 1:basis.model.n_components
+            storage.δρ[:, :, :, kpt.spin] .+= real_qzero(
+                2 .* occupation[ik][n] .* basis.kweights[ik]
+                                       .* conj(storage.ψnk_real[σ, :, :, :])
+                                       .* storage.δψnk_real[σ, :, :, :]
+                 .+ δoccupation[ik][n] .* basis.kweights[ik]
+                                       .* abs2.(storage.ψnk_real[σ, :, :, :]))
+        end
+
+        synchronize_device(basis.architecture)
+    end
+    δρ = sum(getfield.(storages, :δρ))
+
+    mpi_sum!(δρ, basis.comm_kpts)
+    δρ = symmetrize_ρ(basis, δρ; do_lowpass=false)
+
+    δρ
 end
 
 @views @timing function compute_kinetic_energy_density(basis::PlaneWaveBasis{TT}, ψ,
@@ -82,12 +115,12 @@ end
     T = promote_type(TT, real(eltype(ψ[1])))
     τ = similar(ψ[1], T, (basis.fft_size..., basis.model.n_spin_components))
     τ .= 0
-    dαψnk_real = zeros(complex(T), basis.fft_size)
+    dασψnk_real = zeros(complex(T), basis.fft_size)
     for (ik, kpt) in enumerate(basis.kpoints)
         G_plus_k = [[Gk[α] for Gk in Gplusk_vectors_cart(basis, kpt)] for α = 1:3]
-        for n = 1:size(ψ[ik], 3), α = 1:3, σ = 1:basis.model.n_components
-            ifft!(dαψnk_real, basis, kpt, im .* G_plus_k[α] .* ψ[ik][σ, :, n])
-            @. τ[:, :, :, kpt.spin] += occupation[ik][n] * basis.kweights[ik] / 2 * abs2(dαψnk_real)
+        for n = 1:size(ψ[ik], 3), σ = 1:basis.model.n_components, α = 1:3
+            ifft!(dασψnk_real, basis, kpt, im .* G_plus_k[α] .* ψ[ik][σ, :, n])
+            @. τ[:, :, :, kpt.spin] += occupation[ik][n] * basis.kweights[ik] / 2 * abs2(dασψnk_real)
         end
     end
     mpi_sum!(τ, basis.comm_kpts)
diff --git a/src/density_methods.jl b/src/density_methods.jl
index 64a52269a3..e47891bf44 100644
--- a/src/density_methods.jl
+++ b/src/density_methods.jl
@@ -157,7 +157,7 @@ function atomic_density_superposition(basis::PlaneWaveBasis{T},
         ρ_iG / sqrt(model.unit_cell_volume)
     end
     ρ = to_device(basis.architecture, ρ_cpu)
-    enforce_real!(basis, ρ)  # Symmetrize Fourier coeffs to have real iFFT
+    enforce_real!(ρ, basis)  # Symmetrize Fourier coeffs to have real iFFT
     irfft(basis, ρ)
 end
 
diff --git a/src/eigen/preconditioners.jl b/src/eigen/preconditioners.jl
index 5186b58459..e4053b48c6 100644
--- a/src/eigen/preconditioners.jl
+++ b/src/eigen/preconditioners.jl
@@ -83,10 +83,11 @@ ldiv!(P::PreconditionerTPA, R) = ldiv!(R, P, R)
 end
 (Base.:*)(P::PreconditionerTPA, R) = mul!(copy(R), P, R)
 
-function precondprep!(P::PreconditionerTPA, X)
+function precondprep!(P::PreconditionerTPA, X::AbstractArray)
     P.mean_kin = map(eachcol(X)) do x
         x = from_composite_σG(P.basis, P.kpoint, x)
         sum(real(dot(x[σ, :, :], Diagonal(P.kin), x[σ, :, :]))
             for σ = 1:P.basis.model.n_components)
     end
 end
+precondprep!(P::PreconditionerTPA, ::Nothing) = 1  # fallback for edge cases
diff --git a/src/elements.jl b/src/elements.jl
index b912f3c9db..4f96fa2db6 100644
--- a/src/elements.jl
+++ b/src/elements.jl
@@ -16,7 +16,9 @@ charge_nuclear(::Element) = 0
 
 """Chemical symbol corresponding to an element"""
 AtomsBase.atomic_symbol(::Element) = :X
-# The preceeding functions are fallback implementations that should be altered as needed.
+
+"""Return the atomic mass of an atom type"""
+AtomsBase.atomic_mass(el::Element) = element(atomic_symbol(el)).atomic_mass
 
 """Return the total ionic charge of an atom type (nuclear charge - core electrons)"""
 charge_ionic(el::Element) = charge_nuclear(el)
@@ -29,6 +31,7 @@ n_elec_core(el::Element) = charge_nuclear(el) - charge_ionic(el)
 
 """Check presence of model core charge density (non-linear core correction)."""
 has_core_density(::Element) = false
+# The preceeding functions are fallback implementations that should be altered as needed.
 
 # Fall back to the Gaussian table for Elements without pseudopotentials
 function valence_charge_density_fourier(el::Element, q::T)::T where {T <: Real}
@@ -51,10 +54,12 @@ Base.show(io::IO, el::Element) = print(io, "$(typeof(el))($(atomic_symbol(el)))"
 struct ElementCoulomb <: Element
     Z::Int  # Nuclear charge
     symbol  # Element symbol
+    mass    # Atomic mass
 end
 charge_ionic(el::ElementCoulomb)   = el.Z
 charge_nuclear(el::ElementCoulomb) = el.Z
 AtomsBase.atomic_symbol(el::ElementCoulomb) = el.symbol
+AtomsBase.atomic_mass(el::ElementCoulomb) = el.mass
 
 """
 Element interacting with electrons via a bare Coulomb potential
@@ -62,8 +67,9 @@ Element interacting with electrons via a bare Coulomb potential
 `key` may be an element symbol (like `:Si`), an atomic number (e.g. `14`)
 or an element name (e.g. `"silicon"`)
 """
-ElementCoulomb(key) = ElementCoulomb(periodic_table[key].number, Symbol(periodic_table[key].symbol))
-
+function ElementCoulomb(key; mass=element(key).atomic_mass)
+    ElementCoulomb(periodic_table[key].number, Symbol(periodic_table[key].symbol), mass)
+end
 
 function local_potential_fourier(el::ElementCoulomb, q::T) where {T <: Real}
     q == 0 && return zero(T)  # Compensating charge background
@@ -78,11 +84,16 @@ local_potential_real(el::ElementCoulomb, r::Real) = -el.Z / r
 struct ElementPsp <: Element
     Z::Int         # Nuclear charge
     symbol         # Element symbol
+    mass           # Atomic mass
     psp            # Pseudopotential data structure
 end
 function Base.show(io::IO, el::ElementPsp)
     pspid = isempty(el.psp.identifier) ? "custom" : el.psp.identifier
-    print(io, "ElementPsp($(el.symbol); psp=\"$pspid\")")
+    if el.mass == atomic_mass(el)
+        print(io, "ElementPsp($(el.symbol); psp=\"$pspid\")")
+    else
+        print(io, "ElementPsp($(el.symbol); psp=\"$pspid\", mass=\"$(el.mass)\")")
+    end
 end
 
 """
@@ -90,14 +101,15 @@ Element interacting with electrons via a pseudopotential model.
 `key` may be an element symbol (like `:Si`), an atomic number (e.g. `14`)
 or an element name (e.g. `"silicon"`)
 """
-function ElementPsp(key; psp)
-    ElementPsp(periodic_table[key].number, Symbol(periodic_table[key].symbol), psp)
+function ElementPsp(key; psp, mass=element(Symbol(periodic_table[key].symbol)).atomic_mass)
+    ElementPsp(periodic_table[key].number, Symbol(periodic_table[key].symbol), mass, psp)
 end
 
 charge_ionic(el::ElementPsp) = charge_ionic(el.psp)
 charge_nuclear(el::ElementPsp) = el.Z
 has_core_density(el::ElementPsp) = has_core_density(el.psp)
 AtomsBase.atomic_symbol(el::ElementPsp) = el.symbol
+AtomsBase.atomic_mass(el::ElementPsp) = el.mass
 
 function local_potential_fourier(el::ElementPsp, q::T) where {T <: Real}
     q == 0 && return zero(T)  # Compensating charge background
@@ -123,12 +135,14 @@ end
 struct ElementCohenBergstresser <: Element
     Z::Int  # Nuclear charge
     symbol  # Element symbol
+    mass    # Atomic mass
     V_sym   # Map |G|^2 (in units of (2π / lattice_constant)^2) to form factors
     lattice_constant  # Lattice constant (in Bohr) which is assumed
 end
 charge_ionic(el::ElementCohenBergstresser)   = 4
 charge_nuclear(el::ElementCohenBergstresser) = el.Z
 AtomsBase.atomic_symbol(el::ElementCohenBergstresser) = el.symbol
+AtomsBase.atomic_mass(el::ElementCohenBergstresser) = el.mass
 
 """
 Element where the interaction with electrons is modelled
@@ -173,7 +187,8 @@ function ElementCohenBergstresser(key; lattice_constant=nothing)
     V_sym = Dict(key => austrip(value) * unit_cell_volume / 2
                  for (key, value) in pairs(data[symbol].form_factors))
 
-    ElementCohenBergstresser(periodic_table[key].number, symbol, V_sym, lattice_constant)
+    ElementCohenBergstresser(periodic_table[key].number, symbol, element(symbol).atomic_mass,
+                             V_sym, lattice_constant)
 end
 
 function local_potential_fourier(el::ElementCohenBergstresser, q::T) where {T <: Real}
@@ -186,11 +201,13 @@ end
 
 
 struct ElementGaussian <: Element
-    α                               # Prefactor
-    L                               # Width of the Gaussian nucleus
+    α               # Prefactor
+    L               # Width of the Gaussian nucleus
     symbol::Symbol  # Element symbol
 end
 AtomsBase.atomic_symbol(el::ElementGaussian) = el.symbol
+# TODO: maybe to zero mass. Now: forces user to chose a mass.
+AtomsBase.atomic_mass(::ElementGaussian) = nothing
 
 """
 Element interacting with electrons via a Gaussian potential.
diff --git a/src/external/atomsbase.jl b/src/external/atomsbase.jl
index 7b438cfc95..dca4e3ce3d 100644
--- a/src/external/atomsbase.jl
+++ b/src/external/atomsbase.jl
@@ -19,11 +19,13 @@ function parse_system(system::AbstractSystem{D}) where {D}
     atoms = map(system) do atom
         pseudo = get(atom, :pseudopotential, "")
         if isempty(pseudo)
-            ElementCoulomb(atomic_number(atom))
+            ElementCoulomb(atomic_number(atom); mass=atomic_mass(atom))
         else
-            get!(cached_pspelements, pseudo) do
+            key = pseudo * string(atomic_mass(atom))
+            get!(cached_pspelements, key) do
                 kwargs = get(atom, :pseudopotential_kwargs, ())
-                ElementPsp(atomic_number(atom); psp=load_psp(pseudo; kwargs...))
+                ElementPsp(atomic_number(atom); psp=load_psp(pseudo; kwargs...),
+                           mass=atomic_mass(atom))
             end
         end
     end
@@ -104,7 +106,8 @@ function AtomsBase.atomic_system(lattice::AbstractMatrix{<:Number},
         if atomic_symbol(element) == :X  # dummy element ... should solve this upstream
             Atom(:X, position; atomic_symbol=:X, atomic_number=0, atomic_mass=0u"u", kwargs...)
         else
-            Atom(atomic_symbol(element), position; kwargs...)
+            Atom(atomic_symbol(element), position; atomic_mass=atomic_mass(element),
+                 kwargs...)
         end
     end
     periodic_system(atomsbase_atoms, collect(eachcol(lattice)) * u"bohr")
diff --git a/src/postprocess/phonon.jl b/src/postprocess/phonon.jl
index 07c592ebbf..f9de0640f4 100644
--- a/src/postprocess/phonon.jl
+++ b/src/postprocess/phonon.jl
@@ -1,30 +1,91 @@
 # Convert to Cartesian a dynamical matrix in reduced coordinates.
-function dynmat_red_to_cart(model::Model, dynamical_matrix)
+function dynmat_red_to_cart(model::Model, dynmat)
     inv_lattice = model.inv_lattice
 
     # The dynamical matrix `D` acts on vectors `dr` and gives a covector `dF`:
     #   dF = D · dr.
     # Thus the transformation between reduced and Cartesian coordinates is not a comatrix.
-    # To transform `dynamical_matrix` from reduced coordinates to `cart_mat` in Cartesian
+    # To transform `dynamical_matrix` from reduced coordinates to `dynmat_cart` in Cartesian
     # coordinates, we write
     #   dr_cart = lattice · dr_red,
     #   ⇒ dr_redᵀ · D_red · dr_red = dr_cartᵀ · lattice⁻ᵀ · D_red · lattice⁻¹ · dr_cart
     #                              = dr_cartᵀ · D_cart · dr_cart
     #   ⇒ D_cart = lattice⁻ᵀ · D_red · lattice⁻¹.
 
-    cart_mat = zero.(dynamical_matrix)
-    for τ = 1:size(cart_mat, 2), η = 1:size(cart_mat, 4)
-        cart_mat[:, η, :, τ] = inv_lattice' * dynamical_matrix[:, η, :, τ] * inv_lattice
+    dynmat_cart = zero.(dynmat)
+    for s = 1:size(dynmat_cart, 2), α = 1:size(dynmat_cart, 4)
+        dynmat_cart[:, α, :, s] = inv_lattice' * dynmat[:, α, :, s] * inv_lattice
     end
-    cart_mat
+    dynmat_cart
 end
 
+# Create a ``3×n_{\rm atoms}×3×n_{\rm atoms}`` tensor (for consistency with the format of
+# dynamical matrices) equivalent to a diagonal matrix with the atomic masses of the atoms in
+# a.u. on the diagonal.
+function mass_matrix(T, atoms)
+    n_atoms = length(atoms)
+    atoms_mass = atomic_mass.(atoms)
+    any(iszero.(atoms_mass)) && @warn "Some elements have unknown masses"
+    masses = zeros(T, 3, n_atoms, 3, n_atoms)
+    for s in eachindex(atoms_mass)
+        masses[:, s, :, s] = austrip(atoms_mass[s]) * I(3)
+    end
+    masses
+end
+mass_matrix(model::Model{T}) where {T} = mass_matrix(T, model.atoms)
+
+@doc raw"""
+Solve the eigenproblem for a dynamical matrix: returns the `frequencies` and eigenvectors
+(`vectors`).
 """
+function phonon_modes(basis::PlaneWaveBasis{T}, dynmat) where {T}
+    n_atoms = length(basis.model.positions)
+    M = reshape(mass_matrix(T, basis.model.atoms), 3*n_atoms, 3*n_atoms)
+
+    res = eigen(reshape(dynmat, 3*n_atoms, 3*n_atoms), M)
+    maximum(abs, imag(res.values)) > sqrt(eps(T)) &&
+        @warn "Some eigenvalues of the dynamical matrix have a large imaginary part"
+
+    signs = sign.(real(res.values))
+    frequencies = signs .* sqrt.(abs.(real(res.values)))
+
+    (; frequencies, res.vectors)
+end
+function phonon_modes_cart(basis::PlaneWaveBasis{T}, dynmat) where {T}
+    dynmat_cart = dynmat_red_to_cart(basis.model, dynmat)
+    phonon_modes(basis, dynmat_cart)
+end
+
+@doc raw"""
 Compute the dynamical matrix in the form of a ``3×n_{\rm atoms}×3×n_{\rm atoms}`` tensor
 in reduced coordinates.
 """
-@timing function compute_dynmat(basis::PlaneWaveBasis, ψ, occupation; kwargs...)
-    dynmats_per_term = [compute_dynmat(term, basis, ψ, occupation; kwargs...)
+@timing function compute_dynmat(basis::PlaneWaveBasis{T}, ψ, occupation; q=zero(Vec3{T}),
+                                ρ=nothing, ham=nothing, εF=nothing, eigenvalues=nothing,
+                                kwargs...) where {T}
+    model = basis.model
+    positions = model.positions
+    n_atoms = length(positions)
+    n_dim = model.n_dim
+
+    δρs = [zeros(complex(T), basis.fft_size..., basis.model.n_spin_components)
+           for _ = 1:3, _ = 1:n_atoms]
+    δoccupations = [zero.(occupation) for _ = 1:3, _ = 1:n_atoms]
+    δψs = [zero.(ψ) for _ = 1:3, _ = 1:n_atoms]
+    if !isempty(ψ)
+        for s = 1:n_atoms, α = 1:n_dim
+            δHψs_sα = compute_δHψ_sα(basis, ψ, q, s, α)
+            (; δψ, δρ, δoccupation) = solve_ΩplusK_split(ham, ρ, ψ, occupation, εF,
+                                                         eigenvalues, -δHψs_sα; q,
+                                                         kwargs...)
+            δoccupations[α, s] = δoccupation
+            δρs[α, s] = δρ
+            δψs[α, s] = δψ
+        end
+    end
+
+    dynmats_per_term = [compute_dynmat(term, basis, ψ, occupation; ρ, δψs, δρs,
+                                       δoccupations, q)
                         for term in basis.terms]
     sum(filter(!isnothing, dynmats_per_term))
 end
@@ -36,3 +97,25 @@ function compute_dynmat_cart(basis::PlaneWaveBasis, ψ, occupation; kwargs...)
     dynmats_reduced = compute_dynmat(basis, ψ, occupation; kwargs...)
     dynmat_red_to_cart(basis.model, dynmats_reduced)
 end
+
+function compute_dynmat(scfres::NamedTuple; kwargs...)
+    compute_dynmat(scfres.basis, scfres.ψ, scfres.occupation; scfres.ρ, scfres.ham,
+                   scfres.occupation_threshold, scfres.εF, scfres.eigenvalues, kwargs...)
+end
+
+function compute_dynmat_cart(scfres; kwargs...)
+    compute_dynmat_cart(scfres.basis, scfres.ψ, scfres.occupation; scfres.ρ, scfres.ham,
+                        scfres.occupation_threshold, scfres.εF, scfres.eigenvalues, kwargs...)
+end
+
+# TODO: Document relation to non-local potential in future phonon PR.
+"""
+Assemble the right-hand side term for the Sternheimer equation for all relevant quantities:
+Compute the perturbation of the Hamiltonian with respect to a variation of the local
+potential produced by a displacement of the atom s in the direction α.
+"""
+@timing function compute_δHψ_sα(basis::PlaneWaveBasis, ψ, q, s, α; kwargs...)
+    δHψ_per_term = [compute_δHψ_sα(term, basis, ψ, q, s, α; kwargs...)
+                    for term in basis.terms]
+    sum(filter(!isnothing, δHψ_per_term))
+end
diff --git a/src/pseudo/attach_psp.jl b/src/pseudo/attach_psp.jl
index 6554792101..807e7065f5 100644
--- a/src/pseudo/attach_psp.jl
+++ b/src/pseudo/attach_psp.jl
@@ -24,8 +24,7 @@ julia> attach_psp(system, Si="hgh/lda/si-q4", O="hgh/lda/o-q6")
 """
 function attach_psp(system::AbstractSystem, pspmap::AbstractDict{Symbol,String})
     particles = map(system) do atom
-        symbol = Symbol(element(atomic_number(atom)).symbol)
-        # symbol = element_symbol(atom) with AtomsBase master (02/07/2023)
+        symbol = element_symbol(atom)
 
         # Pseudo or explicit potential already set
         if haskey(atom, :pseudopotential) && !isempty(atom[:pseudopotential])
diff --git a/src/response/chi0.jl b/src/response/chi0.jl
index 6761a03ca7..57b72fbbd6 100644
--- a/src/response/chi0.jl
+++ b/src/response/chi0.jl
@@ -170,7 +170,8 @@ function sternheimer_solver(Hk, ψk, ε, rhs;
     end
     precon = PreconditionerTPA(basis, kpoint)
     # First column of ψk as there is no natural kinetic energy.
-    precondprep!(precon, ψk[:, 1])
+    # We take care of the (rare) cases when ψk is empty.
+    precondprep!(precon, size(ψk, 2) ≥ 1 ? ψk[:, 1] : nothing)
     function R_ldiv!(x, y)
         x .= R(precon \ R(y))
     end
@@ -251,7 +252,7 @@ The derivatives of the occupations are in-place stored in δocc.
 The tuple (; δocc, δεF) is returned. It is assumed the passed `δocc`
 are initialised to zero.
 """
-@views function compute_δocc!(δocc, basis, ψ, εF::T, ε, δHψ) where {T}
+@views function compute_δocc!(δocc, basis::PlaneWaveBasis{T}, ψ, εF, ε, δHψ) where {T}
     model = basis.model
     temperature = model.temperature
     smearing = model.smearing
@@ -290,34 +291,41 @@ Perform in-place computations of the derivatives of the wave functions by solvin
 a Sternheimer equation for each `k`-points. It is assumed the passed `δψ` are initialised
 to zero.
 """
-@views function compute_δψ!(δψ, basis, H, ψ, εF, ε, δHψ;
+@views function compute_δψ!(δψ, basis::PlaneWaveBasis{T}, H, ψ, εF, ε, δHψ, ε_minus_q=ε;
                             ψ_extra=[zeros_like(ψk, size(ψk)[1:2]..., 0) for ψk in ψ],
-                            kwargs_sternheimer...)
+                            q=zero(Vec3{T}), kwargs_sternheimer...) where {T}
+    # We solve the Sternheimer equation
+    #   (H_k - ε_{n,k-q}) δψ_{n,k} = - (1 - P_{k}) δHψ_{n, k-q},
+    # where P_{k} is the projector on ψ_{k} and with the conventions:
+    # * δψ_{k} is the variation of ψ_{k-q}, which implies (for ℬ_{k} the `basis.kpoints`)
+    #     δψ_{k-q} ∈ ℬ_{k-q} and δHψ_{k-q} ∈ ℬ_{k};
+    # * δHψ[ik] = δHψ_{k-q};
+    # * ε_minus_q[ik] = ε_{·, k-q}.
     model = basis.model
     temperature = model.temperature
     smearing = model.smearing
     filled_occ = filled_occupation(model)
-    Nk = length(basis.kpoints)
 
     # Compute δψnk band per band
-    for ik = 1:Nk
+    for ik = 1:length(ψ)
         Hk  = H[ik]
         ψk  = ψ[ik]
         εk  = ε[ik]
         δψk = δψ[ik]
+        εk_minus_q = ε_minus_q[ik]
 
-        for n = 1:length(εk)
-            fnk = filled_occ * Smearing.occupation(smearing, (εk[n]-εF) / temperature)
+        for n = 1:length(εk_minus_q)
+            fnk_minus_q = filled_occ * Smearing.occupation(smearing, (εk_minus_q[n]-εF) / temperature)
 
             # Explicit contributions (nonzero only for temperature > 0)
             for m = 1:length(εk)
-                # the n == m contribution in compute_δρ is obtained through
-                # δoccupation, see the explanation above
-                m == n && continue
+                # The n == m contribution in compute_δρ is obtained through δoccupation, see
+                # the explanation above; except if we perform phonon calculations.
+                iszero(q) && (m == n) && continue
                 fmk = filled_occ * Smearing.occupation(smearing, (εk[m]-εF) / temperature)
                 ddiff = Smearing.occupation_divided_difference
-                ratio = filled_occ * ddiff(smearing, εk[m], εk[n], εF, temperature)
-                αmn = compute_αmn(fmk, fnk, ratio)  # fnk * αmn + fmk * αnm = ratio
+                ratio = filled_occ * ddiff(smearing, εk[m], εk_minus_q[n], εF, temperature)
+                αmn = compute_αmn(fmk, fnk_minus_q, ratio)  # fnk_minus_q * αmn + fmk * αnm = ratio
                 δψk[:, :, n] .+= ψk[:, :, m] .* αmn .* dot(ψk[:, :, m], δHψ[ik][:, :, n])
             end
 
@@ -332,15 +340,17 @@ to zero.
             εk_extra = diag(real.(ψk_extra'Hψk_extra))
             δψkn = to_composite_σG(basis, kpt, δψk[:, :, n])
             #   … then, solve the problem.
-            δψkn .+= sternheimer_solver(Hk, ψk_σG, εk[n], δHψkn; ψk_extra, εk_extra,
+            δψkn .+= sternheimer_solver(Hk, ψk_σG, εk_minus_q[n], δHψkn; ψk_extra, εk_extra,
                                         Hψk_extra, kwargs_sternheimer...)
         end
     end
 end
 
 @views @timing function apply_χ0_4P(ham, ψ, occupation, εF, eigenvalues, δHψ;
-                                    occupation_threshold, kwargs_sternheimer...)
+                                    occupation_threshold, q=zero(Vec3{eltype(ham.basis)}),
+                                    kwargs_sternheimer...)
     basis = ham.basis
+    ordering(kdata) = kdata[k_to_equivalent_kpq_permutation(basis, -q)]
 
     # We first select orbitals with occupation number higher than
     # occupation_threshold for which we compute the associated response δψn,
@@ -354,23 +364,31 @@ end
 
     ψ_occ   = [ψ[ik][:, :, maskk] for (ik, maskk) in enumerate(mask_occ)]
     ψ_extra = [ψ[ik][:, :, maskk] for (ik, maskk) in enumerate(mask_extra)]
-
     ε_occ   = [eigenvalues[ik][maskk] for (ik, maskk) in enumerate(mask_occ)]
-    δHψ_occ = [δHψ[ik][:, :, maskk] for (ik, maskk) in enumerate(mask_occ)]
+    δHψ_minus_q_occ = [δHψ[ik][:, :, maskk] for (ik, maskk) in enumerate(ordering(mask_occ))]
+    # Only needed for phonon calculations.
+    ε_minus_q_occ  = ordering([eigenvalues[ik][maskk] for (ik, maskk) in enumerate(mask_occ)])
 
     # First we compute δoccupation. We only need to do this for the actually occupied
     # orbitals. So we make a fresh array padded with zeros, but only alter the elements
     # corresponding to the occupied orbitals. (Note both compute_δocc! and compute_δψ!
     # assume that the first array argument has already been initialised to zero).
+    # For phonon calculations when q ≠ 0, we do not use δoccupation, and compute directly
+    # the full perturbation δψ.
     δoccupation = zero.(occupation)
-    δocc_occ = [δoccupation[ik][maskk] for (ik, maskk) in enumerate(mask_occ)]
-    δεF = compute_δocc!(δocc_occ, basis, ψ_occ, εF, ε_occ, δHψ_occ).δεF
+    if iszero(q)
+        δocc_occ = [δoccupation[ik][maskk] for (ik, maskk) in enumerate(mask_occ)]
+        δεF = compute_δocc!(δocc_occ, basis, ψ_occ, εF, ε_occ, δHψ_minus_q_occ).δεF
+    else
+        δεF = zero(εF)
+    end
 
-    # Then we compute δψ, again in-place into a zero-padded array
-    δψ = zero.(ψ)
-    δψ_occ = [δψ[ik][:, :, maskk] for (ik, maskk) in enumerate(mask_occ)]
-    compute_δψ!(δψ_occ, basis, ham.blocks, ψ_occ, εF, ε_occ, δHψ_occ; ψ_extra,
-                kwargs_sternheimer...)
+    # Then we compute δψ (again in-place into a zero-padded array) with elements of
+    # `basis.kpoints` that are equivalent to `k+q`.
+    δψ = zero.(δHψ)  # δHψ ≠ δHψ_minus_q
+    δψ_occ = [δψ[ik][:, :, maskk] for (ik, maskk) in enumerate(ordering(mask_occ))]
+    compute_δψ!(δψ_occ, basis, ham.blocks, ψ_occ, εF, ε_occ, δHψ_minus_q_occ, ε_minus_q_occ;
+                ψ_extra, q, kwargs_sternheimer...)
 
     (; δψ, δoccupation, δεF)
 end
@@ -378,9 +396,9 @@ end
 """
 Get the density variation δρ corresponding to a potential variation δV.
 """
-function apply_χ0(ham, ψ, occupation, εF, eigenvalues, δV::AbstractArray{T};
-                  occupation_threshold=default_occupation_threshold(T),
-                  kwargs_sternheimer...) where {T}
+function apply_χ0(ham, ψ, occupation, εF::T, eigenvalues, δV::AbstractArray{TδV};
+                  occupation_threshold=default_occupation_threshold(TδV),
+                  q=zero(Vec3{eltype(ham.basis)}), kwargs_sternheimer...) where {T, TδV}
 
     basis = ham.basis
 
@@ -393,19 +411,21 @@ function apply_χ0(ham, ψ, occupation, εF, eigenvalues, δV::AbstractArray{T};
     # Sternheimer linear solver (it makes the rhs be order 1 even if
     # δV is small)
     normδV = norm(δV)
-    normδV < eps(typeof(εF)) && return zero(δV)
+    normδV < eps(T) && return zero(δV)
     δV ./= normδV
 
-    δHψ = [RealSpaceMultiplication(basis, kpt, @views δV[:, :, :, kpt.spin]) * ψ[ik]
-           for (ik, kpt) in enumerate(basis.kpoints)]
+    # For phonon calculations, assemble
+    #   δHψ_k = δV_{q} · ψ_{k-q}.
+    δHψ = multiply_ψ_by_blochwave(basis, ψ, δV, q)
     (; δψ, δoccupation) = apply_χ0_4P(ham, ψ, occupation, εF, eigenvalues, δHψ;
-                                      occupation_threshold, kwargs_sternheimer...)
-    δρ = compute_δρ(basis, ψ, δψ, occupation, δoccupation; occupation_threshold)
+                                      occupation_threshold, q, kwargs_sternheimer...)
+
+    δρ = compute_δρ(basis, ψ, δψ, occupation, δoccupation; occupation_threshold, q)
+
     δρ * normδV
 end
 
 function apply_χ0(scfres, δV; kwargs_sternheimer...)
-    apply_χ0(scfres.ham, scfres.ψ, scfres.occupation, scfres.εF,
-             scfres.eigenvalues, δV; scfres.occupation_threshold,
-             kwargs_sternheimer...)
+    apply_χ0(scfres.ham, scfres.ψ, scfres.occupation, scfres.εF, scfres.eigenvalues, δV;
+             scfres.occupation_threshold, kwargs_sternheimer...)
 end
diff --git a/src/response/hessian.jl b/src/response/hessian.jl
index 45848b0135..5525eda573 100644
--- a/src/response/hessian.jl
+++ b/src/response/hessian.jl
@@ -73,7 +73,7 @@ end
 Return δψ where (Ω+K) δψ = rhs
 """
 @timing function solve_ΩplusK(basis::PlaneWaveBasis{T}, ψ, rhs, occupation;
-                      callback=identity, tol=1e-10) where {T}
+                              callback=identity, tol=1e-10) where {T}
     filled_occ = filled_occupation(basis.model)
     # for now, all orbitals have to be fully occupied -> need to strip them beforehand
     @assert all(all(occ_k .== filled_occ) for occ_k in occupation)
@@ -147,9 +147,10 @@ Solve the problem `(Ω+K) δψ = rhs` using a split algorithm, where `rhs` is ty
     - `δVind`: Change in potential induced by `δρ` (the term needed on top of `δHextψ`
       to get `δHψ`).
 """
-@timing function solve_ΩplusK_split(ham::Hamiltonian, ρ::AbstractArray{T}, ψ, occupation, εF,
-                                    eigenvalues, rhs; tol=1e-8, tol_sternheimer=tol/10,
-                                    verbose=false, occupation_threshold, kwargs...) where {T}
+@timing function solve_ΩplusK_split(ham::Hamiltonian, ρ::AbstractArray{T}, ψ, occupation, εF, 
+                                     eigenvalues, rhs; tol=1e-8, tol_sternheimer=tol/10, 
+                                     verbose=false, occupation_threshold, q=zero(Vec3{real(T)}), 
+                                     kwargs...) where {T}
     # Using χ04P = -Ω^-1, E extension operator (2P->4P) and R restriction operator:
     # (Ω+K)^-1 =  Ω^-1 ( 1 -   K   (1 + Ω^-1 K  )^-1    Ω^-1  )
     #          = -χ04P ( 1 -   K   (1 - χ04P K  )^-1   (-χ04P))
@@ -160,21 +161,21 @@ Solve the problem `(Ω+K) δψ = rhs` using a split algorithm, where `rhs` is ty
 
     # compute δρ0 (ignoring interactions)
     δψ0, δoccupation0 = apply_χ0_4P(ham, ψ, occupation, εF, eigenvalues, -rhs;
-                                    tol=tol_sternheimer, occupation_threshold,
+                                    tol=tol_sternheimer, occupation_threshold, q,
                                     kwargs...)  # = -χ04P * rhs
-    δρ0 = compute_δρ(basis, ψ, δψ0, occupation, δoccupation0; occupation_threshold)
+    δρ0 = compute_δρ(basis, ψ, δψ0, occupation, δoccupation0; occupation_threshold, q)
 
     # compute total δρ
     pack(δρ)   = vec(δρ)
     unpack(δρ) = reshape(δρ, size(ρ))
     function eps_fun(δρ)
         δρ = unpack(δρ)
-        δV = apply_kernel(basis, δρ; ρ)
+        δV = apply_kernel(basis, δρ; ρ, q)
         # TODO
         # Would be nice to play with abstol / reltol etc. to avoid over-solving
         # for the initial GMRES steps.
         χ0δV = apply_χ0(ham, ψ, occupation, εF, eigenvalues, δV;
-                        occupation_threshold, tol=tol_sternheimer, kwargs...)
+                        occupation_threshold, tol=tol_sternheimer, q, kwargs...)
         pack(δρ - χ0δV)
     end
     J = LinearMap{T}(eps_fun, prod(size(δρ0)))
@@ -182,11 +183,10 @@ Solve the problem `(Ω+K) δψ = rhs` using a split algorithm, where `rhs` is ty
     δρ = unpack(δρ)
 
     # Compute total change in Hamiltonian applied to ψ
-    δVind = apply_kernel(basis, δρ; ρ)  # Change in potential induced by δρ
-    δHψ = @views map(basis.kpoints, ψ, rhs) do kpt, ψk, rhsk
-        δVindψk = RealSpaceMultiplication(basis, kpt, δVind[:, :, :, kpt.spin]) * ψk
-        δVindψk - rhsk
-    end
+    δVind = apply_kernel(basis, δρ; ρ, q)  # Change in potential induced by δρ
+    # For phonon calculations, assemble
+    #   δHψ_k = δV_{q} · ψ_{k-q}.
+    δHψ = multiply_ψ_by_blochwave(basis, ψ, δVind, q) - rhs
 
     # Compute total change in eigenvalues
     δeigenvalues = map(ψ, δHψ) do ψk, δHψk
@@ -195,9 +195,9 @@ Solve the problem `(Ω+K) δψ = rhs` using a split algorithm, where `rhs` is ty
         end
     end
 
-    δψ, δoccupation, δεF = apply_χ0_4P(ham, ψ, occupation, εF, eigenvalues, δHψ;
-                                       occupation_threshold, tol=tol_sternheimer,
-                                       kwargs...)
+    (; δψ, δoccupation, δεF) = apply_χ0_4P(ham, ψ, occupation, εF, eigenvalues, δHψ;
+                                           occupation_threshold, tol=tol_sternheimer, q,
+                                           kwargs...)
 
     (; δψ, δρ, δHψ, δVind, δeigenvalues, δoccupation, δεF, history)
 end
diff --git a/src/scf/mixing.jl b/src/scf/mixing.jl
index b2b9f08f8b..50a813f95b 100644
--- a/src/scf/mixing.jl
+++ b/src/scf/mixing.jl
@@ -73,7 +73,7 @@ end
     δFtot_fourier  = total_density(δF_fourier)
     δFspin_fourier = spin_density(δF_fourier)
     δρtot_fourier = δFtot_fourier .* G² ./ (kTF.^2 .+ G²)
-    enforce_real!(basis, δρtot_fourier)
+    enforce_real!(δρtot_fourier, basis)
     δρtot = irfft(basis, δρtot_fourier)
 
     # Copy DC component, otherwise it never gets updated
@@ -83,7 +83,7 @@ end
         ρ_from_total_and_spin(δρtot, nothing)
     else
         δρspin_fourier = @. δFspin_fourier - δFtot_fourier * (4π * ΔDOS_Ω) / (kTF^2 + G²)
-        enforce_real!(basis, δρspin_fourier)
+        enforce_real!(δρspin_fourier, basis)
         δρspin = irfft(basis, δρspin_fourier)
         ρ_from_total_and_spin(δρtot, δρspin)
     end
diff --git a/src/symmetry.jl b/src/symmetry.jl
index efcccdc061..6a4903793b 100644
--- a/src/symmetry.jl
+++ b/src/symmetry.jl
@@ -234,8 +234,8 @@ function apply_symop(symop::SymOp, basis, kpoint, ψk::AbstractArray)
         all(isinteger, basis.kpoints[idx].coordinate - Sk)
     end
     if isnothing(ikfull)
-        # Build a new k-point datastructure:
-        Skpoint = build_kpoints(basis, [Sk])[1]
+        # Build new k-point datastructure
+        Skpoint = Kpoint(basis, Sk, kpoint.spin)
     else
         Skpoint = basis.kpoints[ikfull]
         @assert Skpoint.coordinate ≈ Sk
@@ -319,7 +319,8 @@ end
 """
 Symmetrize a density by applying all the basis (by default) symmetries and forming the average.
 """
-@views @timing function symmetrize_ρ(basis, ρ; symmetries=basis.symmetries, do_lowpass=true)
+@views @timing function symmetrize_ρ(basis, ρ::AbstractArray{T};
+                                     symmetries=basis.symmetries, do_lowpass=true) where {T}
     ρin_fourier  = to_cpu(fft(basis, ρ))
     ρout_fourier = zero(ρin_fourier)
     for σ = 1:size(ρ, 4)
@@ -327,7 +328,8 @@ Symmetrize a density by applying all the basis (by default) symmetries and formi
                                     ρin_fourier[:, :, :, σ], basis, symmetries)
         do_lowpass && lowpass_for_symmetry!(ρout_fourier[:, :, :, σ], basis; symmetries)
     end
-    irfft(basis, to_device(basis.architecture, ρout_fourier) ./ length(symmetries))
+    inv_fft = T <: Real ? irfft : ifft
+    inv_fft(basis, to_device(basis.architecture, ρout_fourier) ./ length(symmetries))
 end
 
 """
@@ -465,6 +467,6 @@ end
 Ensure its real-space equivalent of passed Fourier-space representation is entirely real by
 removing wavevectors `G` that don't have a `-G` counterpart in the basis.
 """
-@timing function enforce_real!(basis, fourier_coeffs)
+@timing function enforce_real!(fourier_coeffs, basis::PlaneWaveBasis)
     lowpass_for_symmetry!(fourier_coeffs, basis; symmetries=[SymOp(-Mat3(I), Vec3(0, 0, 0))])
 end
diff --git a/src/terms/Hamiltonian.jl b/src/terms/Hamiltonian.jl
index 9d5e4a89b5..87ae65211f 100644
--- a/src/terms/Hamiltonian.jl
+++ b/src/terms/Hamiltonian.jl
@@ -15,7 +15,7 @@ struct GenericHamiltonianBlock <: HamiltonianBlock
                        # (as many as there are terms), kept for easier exploration
     optimized_operators::Vector  # Optimized list of RealFourierOperator, for application
 
-    scratch # dummy field
+    scratch  # dummy field
 end
 
 # More optimized HamiltonianBlock for the important case of a DFT Hamiltonian
@@ -55,9 +55,9 @@ function HamiltonianBlock(basis, kpoint, operators; scratch=ham_allocate_scratch
     end
 end
 function ham_allocate_scratch_(basis::PlaneWaveBasis{T}) where {T}
-    (; ψ_reals=[zeros_like(basis.G_vectors, complex(T),
-                         basis.model.n_components, basis.fft_size...)
-                for _ = 1:Threads.nthreads()])
+    [(; ψ_reals=zeros_like(basis.G_vectors, complex(T),
+                           basis.model.n_components, basis.fft_size...))
+     for _ = 1:Threads.nthreads()]
 end
 
 Base.:*(H::HamiltonianBlock, ψ) = mul!(similar(ψ), H, ψ)
@@ -107,26 +107,35 @@ end
 @views @timing "Hamiltonian multiplication" function LinearAlgebra.mul!(Hψ::AbstractArray3,
                                                                         H::GenericHamiltonianBlock,
                                                                         ψ::AbstractArray3)
-    T = eltype(H.basis)
-    n_bands = size(ψ, 3)
-    Hψ_fourier = similar(Hψ[:, :, 1])
-    ψ_real  = similar(ψ, complex(T), H.basis.model.n_components, H.basis.fft_size...)
-    Hψ_real = similar(Hψ, complex(T), H.basis.model.n_components, H.basis.fft_size...)
-    # take ψi, IFFT it to ψ_real, apply each term to Hψ_fourier and Hψ_real, and add it to Hψ
-    for iband = 1:n_bands
-        Hψ_real .= 0
-        Hψ_fourier .= 0
-        ifft!(ψ_real, H.basis, H.kpoint, ψ[:, :, iband])
+    basis = H.basis
+    function allocate_local_storage()
+        T = eltype(basis)
+        (; Hψ_fourier = similar(Hψ[:, :, 1]),
+           ψ_real  = similar(ψ, complex(T), basis.model.n_components, basis.fft_size...),
+           Hψ_real = similar(Hψ, complex(T), basis.model.n_components, basis.fft_size...))
+    end
+    parallel_loop_over_range(allocate_local_storage, 1:size(ψ, 3)) do storage, iband
+        to = TimerOutput()  # Thread-local timer output
+
+        # Take ψi, IFFT it to ψ_real, apply each term to Hψ_fourier and Hψ_real, and add it
+        # to Hψ.
+        storage.Hψ_real .= 0
+        storage.Hψ_fourier .= 0
+        ifft!(storage.ψ_real, basis, H.kpoint, ψ[:, :, iband])
         for op in H.optimized_operators
-            @timing "$(nameof(typeof(op)))" begin
-                apply!((; fourier=Hψ_fourier, real=Hψ_real),
+            @timeit to "$(nameof(typeof(op)))" begin
+                apply!((; fourier=storage.Hψ_fourier, real=storage.Hψ_real),
                        op,
-                       (; fourier=ψ[:, :, iband], real=ψ_real))
+                       (; fourier=ψ[:, :, iband], real=storage.ψ_real))
             end
         end
-        Hψ[:, :, iband] .= Hψ_fourier
-        fft!(Hψ_fourier, H.basis, H.kpoint, Hψ_real)
-        Hψ[:, :, iband] .+= Hψ_fourier
+        Hψ[:, :, iband] .= storage.Hψ_fourier
+        fft!(storage.Hψ_fourier, basis, H.kpoint, storage.Hψ_real)
+        Hψ[:, :, iband] .+= storage.Hψ_fourier
+
+        if Threads.threadid() == 1
+            merge!(DFTK.timer, to; tree_point=[t.name for t in DFTK.timer.timer_stack])
+        end
     end
 
     Hψ
@@ -143,43 +152,38 @@ end
     # Notice that we use unnormalized plans for extra speed
     potential = H.local_op.potential / prod(H.basis.fft_size)
 
-    # parallelize the loop by breaking it into nthreads() chunks. The ψ_reals are chunk-local
-    chunk_length = cld(n_bands, Threads.nthreads())
-    @sync for (ichunk, chunk) in enumerate(Iterators.partition(1:n_bands, chunk_length))
-        Threads.@spawn for iband in chunk # spawn a task per chunk
-            to = TimerOutput()  # Thread-local timer output
-            ψ_real = H.scratch.ψ_reals[ichunk]
-
-            @timeit to "local+kinetic" begin
-                ifft!(ψ_real, H.basis, H.kpoint, ψ[:, :, iband]; normalize=false)
-                for σ = 1:H.basis.model.n_components
-                    ψ_real[σ, :, :, :] .*= potential
-                end
-                fft!(Hψ[:, :, iband], H.basis, H.kpoint, ψ_real; normalize=false)  # overwrites ψ_real
-                for σ = 1:H.basis.model.n_components
-                    Hψ[σ, :, iband] .+= H.fourier_op.multiplier .* ψ[σ, :, iband]
-                end
-            end
+    parallel_loop_over_range(H.scratch, 1:n_bands) do storage, iband
+        to = TimerOutput()  # Thread-local timer output
+        ψ_real = storage.ψ_reals
 
-            if have_divAgrad
-                @assert H.basis.model.n_components == 1
-                @timeit to "divAgrad" begin
-                    apply!((; fourier=Hψ[:, :, iband], real=nothing),
-                           H.divAgrad_op,
-                           (; fourier=ψ[:, :, iband], real=nothing),
-                           ψ_real)  # ψ_real used as scratch
-                end
+        @timeit to "local+kinetic" begin
+            ifft!(ψ_real, H.basis, H.kpoint, ψ[:, :, iband]; normalize=false)
+            for σ = 1:H.basis.model.n_components
+                ψ_real[σ, :, :, :] .*= potential
             end
+            fft!(Hψ[:, :, iband], H.basis, H.kpoint, ψ_real; normalize=false)  # overwrites ψ_real
+            for σ = 1:H.basis.model.n_components
+                Hψ[σ, :, iband] .+= H.fourier_op.multiplier .* ψ[σ, :, iband]
+            end
+        end
 
-            if Threads.threadid() == 1
-                merge!(DFTK.timer, to; tree_point=[t.name for t in DFTK.timer.timer_stack])
+        if have_divAgrad
+            @timeit to "divAgrad" begin
+                apply!((; fourier=Hψ[:, :, iband], real=nothing),
+                       H.divAgrad_op,
+                       (; fourier=ψ[:, :, iband], real=nothing),
+                       ψ_real)  # ψ_real used as scratch
             end
+        end
+
+        if Threads.threadid() == 1
+            merge!(DFTK.timer, to; tree_point=[t.name for t in DFTK.timer.timer_stack])
+        end
 
-            synchronize_device(H.basis.architecture)
-       end
+        synchronize_device(H.basis.architecture)
     end
 
-    # Apply the nonlocal operator
+    # Apply the nonlocal operator.
     if !isnothing(H.nonlocal_op)
         @timing "nonlocal" begin
             apply!((; fourier=Hψ, real=nothing),
diff --git a/src/terms/ewald.jl b/src/terms/ewald.jl
index 3d23d166e8..b7b19984cc 100644
--- a/src/terms/ewald.jl
+++ b/src/terms/ewald.jl
@@ -185,7 +185,7 @@ function energy_forces_ewald(S, lattice::AbstractArray{T}, charges, positions, q
 end
 
 # TODO: See if there is a way to express this with AD.
-function dynmat_ewald_recip(model::Model{T}, τ, σ; η, q=zero(Vec3{T})) where {T}
+function dynmat_ewald_recip(model::Model{T}, s, t; η, q=zero(Vec3{T})) where {T}
     # Numerical cutoffs to obtain meaningful contributions. These are very conservative.
     # The largest argument to the exp(-x) function
     max_exp_arg = -log(eps(T)) + 5  # add some wiggle room
@@ -201,25 +201,25 @@ function dynmat_ewald_recip(model::Model{T}, τ, σ; η, q=zero(Vec3{T})) where
     charges   = T.(charge_ionic.(model.atoms))
     positions = model.positions
     @assert length(charges) == length(positions)
-    pτ = positions[τ]
-    pσ = positions[σ]
+    ps = positions[s]
+    pt = positions[t]
 
     dynmat_recip = zeros(complex(T), length(q), length(q))
     for G1 in -Glims[1]:Glims[1], G2 in -Glims[2]:Glims[2], G3 in -Glims[3]:Glims[3]
         G = Vec3(G1, G2, G3)
         if !iszero(G + q)
             Gsqq = sum(abs2, recip_lattice * (G + q))
-            term = exp(-Gsqq / 4η^2) / Gsqq * charges[σ] * charges[τ]
-            term *= cis2pi(dot(G + q, pσ - pτ))
+            term = exp(-Gsqq / 4η^2) / Gsqq * charges[t] * charges[s]
+            term *= cis2pi(dot(G + q, pt - ps))
             term *= (2T(π) * (G + q)) * transpose(2T(π) * (G + q))
             dynmat_recip += term
         end
 
-        (iszero(G) || σ ≢ τ) && continue
+        (iszero(G) || t ≢ s) && continue
         Gsq = sum(abs2, recip_lattice * G)
 
-        strucfac = sum(Z * cos2pi(dot(pσ - r, G)) for (r, Z) in zip(positions, charges))
-        dsum = charges[σ] * strucfac
+        strucfac = sum(Z * cos2pi(dot(pt - r, G)) for (r, Z) in zip(positions, charges))
+        dsum = charges[t] * strucfac
         dsum *= (2T(π) * G) * transpose(2T(π) * G)
         dynmat_recip -= exp(-Gsq / 4η^2) / Gsq * dsum
     end
@@ -238,25 +238,21 @@ function compute_dynmat(ewald::TermEwald, basis::PlaneWaveBasis{T}, ψ, occupati
 
     dynmat = zeros(complex(T), n_dim, n_atoms, n_dim, n_atoms)
     # Real part
-    for τ = 1:n_atoms
-        for γ = 1:n_dim
-            displacement = zero.(model.positions)
-            displacement[τ] = setindex(displacement[τ], one(T), γ)
-            real_part = -ForwardDiff.derivative(zero(T)) do ε
-                ph_disp = ε .* displacement
-                forces = energy_forces_ewald(model.lattice, charges, model.positions,
-                                             q, ph_disp; ewald.η).forces
-                reduce(hcat, forces)
-            end
-
-            dynmat[:, :, γ, τ] = real_part[1:n_dim, :]
+    for s = 1:n_atoms, α = 1:n_dim
+        displacement = zero.(model.positions)
+        displacement[s] = setindex(displacement[s], one(T), α)
+        real_part = -ForwardDiff.derivative(zero(T)) do ε
+            ph_disp = ε .* displacement
+            forces = energy_forces_ewald(model.lattice, charges, model.positions, q,
+                                         ph_disp; ewald.η).forces
+            reduce(hcat, forces)
         end
+
+        dynmat[:, :, α, s] = real_part
     end
     # Reciprocal part
-    for τ = 1:n_atoms
-        for σ = 1:n_atoms
-            dynmat[:, σ, :, τ] += dynmat_ewald_recip(model, σ, τ; ewald.η, q)
-        end
+    for s = 1:n_atoms, t = 1:n_atoms
+        dynmat[:, t, :, s] += dynmat_ewald_recip(model, t, s; ewald.η, q)
     end
     dynmat
 end
diff --git a/src/terms/hartree.jl b/src/terms/hartree.jl
index 0c4cd78c12..d039f3ba58 100644
--- a/src/terms/hartree.jl
+++ b/src/terms/hartree.jl
@@ -26,17 +26,26 @@ struct TermHartree <: TermNonlinear
     # Fourier coefficients of the Green's function of the periodic Poisson equation
     poisson_green_coeffs::AbstractArray
 end
-function TermHartree(basis::PlaneWaveBasis{T}, scaling_factor) where {T}
+function compute_poisson_green_coeffs(basis::PlaneWaveBasis{T}, scaling_factor;
+                                      q=zero(Vec3{T})) where {T}
+    model = basis.model
+
     # Solving the Poisson equation ΔV = -4π ρ in Fourier space
     # is multiplying elementwise by 4π / |G|^2.
-
-    poisson_green_coeffs = 4T(π) ./ norm2.(G_vectors_cart(basis))
-    GPUArraysCore.@allowscalar poisson_green_coeffs[1] = 0  # Compensating charge background => Zero DC
-
-    enforce_real!(basis, poisson_green_coeffs)  # Symmetrize Fourier coeffs to have real iFFT
+    poisson_green_coeffs = 4T(π) ./ [sum(abs2, model.recip_lattice * (G + q))
+                                     for G in G_vectors(basis)]
+    if iszero(q)
+        # Compensating charge background => Zero DC.
+        GPUArraysCore.@allowscalar poisson_green_coeffs[1] = 0
+        # Symmetrize Fourier coeffs to have real iFFT.
+        enforce_real!(poisson_green_coeffs, basis)
+    end
     poisson_green_coeffs = to_device(basis.architecture, poisson_green_coeffs)
-
-    TermHartree(T(scaling_factor), T(scaling_factor) .* poisson_green_coeffs)
+    scaling_factor .* poisson_green_coeffs
+end
+function TermHartree(basis::PlaneWaveBasis{T}, scaling_factor) where {T}
+    poisson_green_coeffs = compute_poisson_green_coeffs(basis, scaling_factor)
+    TermHartree(T(scaling_factor), poisson_green_coeffs)
 end
 
 @timing "ene_ops: hartree" function ene_ops(term::TermHartree, basis::PlaneWaveBasis{T},
@@ -57,9 +66,17 @@ function compute_kernel(term::TermHartree, basis::PlaneWaveBasis; kwargs...)
     basis.model.n_spin_components == 1 ? K : [K K; K K]
 end
 
-function apply_kernel(term::TermHartree, basis::PlaneWaveBasis, δρ; kwargs...)
+function apply_kernel(term::TermHartree, basis::PlaneWaveBasis{T}, δρ::AbstractArray{Tδρ};
+                      q=zero(Vec3{T}), kwargs...) where {T, Tδρ}
     δV = zero(δρ)
     δρtot = total_density(δρ)
-    # note broadcast here: δV is 4D, and all its spin components get the same potential
-    δV .= irfft(basis, term.poisson_green_coeffs .* fft(basis, δρtot))
+    if iszero(q)
+        # Note broadcast here: δV is 4D, and all its spin components get the same potential.
+        δV .= irfft(basis, term.poisson_green_coeffs .* fft(basis, δρtot))  # Note the irfft
+    else
+        # Coefficients with q != 0 not in memory => recompute
+        coeffs = compute_poisson_green_coeffs(basis, term.scaling_factor; q)
+        δV .= ifft(basis, coeffs .* fft(basis, δρtot))  # Note the ifft
+    end
+    δV
 end
diff --git a/src/terms/local.jl b/src/terms/local.jl
index 3ee08cb40b..b4a95c4520 100644
--- a/src/terms/local.jl
+++ b/src/terms/local.jl
@@ -66,7 +66,7 @@ function (external::ExternalFromFourier)(basis::PlaneWaveBasis{T}) where {T}
     pot_fourier = map(G_vectors_cart(basis)) do G
         convert_dual(complex(T), external.potential(G) / sqrt(unit_cell_volume))
     end
-    enforce_real!(basis, pot_fourier)  # Symmetrize Fourier coeffs to have real iFFT
+    enforce_real!(pot_fourier, basis)  # Symmetrize Fourier coeffs to have real iFFT
     TermExternal(irfft(basis, pot_fourier))
 end
 
@@ -82,78 +82,138 @@ end
 Atomic local potential defined by `model.atoms`.
 """
 struct AtomicLocal end
-function (::AtomicLocal)(basis::PlaneWaveBasis{T}) where {T}
+function compute_local_potential(S, basis::PlaneWaveBasis{T}; positions=basis.model.positions,
+                                 q=zero(Vec3{T})) where {T}
     # pot_fourier is <e_G|V|e_G'> expanded in a basis of e_{G-G'}
     # Since V is a sum of radial functions located at atomic
     # positions, this involves a form factor (`local_potential_fourier`)
     # and a structure factor e^{-i G·r}
     model = basis.model
-    G_cart = to_cpu(G_vectors_cart(basis))
-    # TODO Bring G_cart on the CPU for compatibility with the pseudopotentials which
+    Gqs_cart = [model.recip_lattice * (G + q) for G in G_vectors(basis)]
+    # TODO Bring Gqs_cart on the CPU for compatibility with the pseudopotentials which
     #      are not isbits ... might be able to solve this by restructuring the loop
 
-
     # Pre-compute the form factors at unique values of |G| to speed up
     # the potential Fourier transform (by a lot). Using a hash map gives O(1)
     # lookup.
     form_factors = IdDict{Tuple{Int,T},T}()  # IdDict for Dual compatability
-    for G in G_cart
-        q = norm(G)
+    for G in Gqs_cart
+        p = norm(G)
         for (igroup, group) in enumerate(model.atom_groups)
-            if !haskey(form_factors, (igroup, q))
+            if !haskey(form_factors, (igroup, p))
                 element = model.atoms[first(group)]
-                form_factors[(igroup, q)] = local_potential_fourier(element, q)
+                form_factors[(igroup, p)] = local_potential_fourier(element, p)
             end
         end
     end
 
-    Gs = to_cpu(G_vectors(basis))  # TODO Again for GPU compatibility
-    pot_fourier = map(enumerate(Gs)) do (iG, G)
-        q = norm(G_cart[iG])
+    Gqs = [G + q for G in G_vectors(basis)]  # TODO Again for GPU compatibility
+    pot_fourier = map(enumerate(Gqs)) do (iG, G)
+        p = norm(Gqs_cart[iG])
         pot = sum(enumerate(model.atom_groups)) do (igroup, group)
-            structure_factor = sum(r -> cis2pi(-dot(G, r)), @view model.positions[group])
-            form_factors[(igroup, q)] * structure_factor
+            structure_factor = sum(r -> cis2pi(-dot(G, r)), @view positions[group])
+            form_factors[(igroup, p)] * structure_factor
         end
         pot / sqrt(model.unit_cell_volume)
     end
 
-    enforce_real!(basis, pot_fourier)  # Symmetrize Fourier coeffs to have real iFFT
-    pot_real = irfft(basis, to_device(basis.architecture, pot_fourier))
-
-    TermAtomicLocal(pot_real)
+    if iszero(q)
+        enforce_real!(pot_fourier, basis)  # Symmetrize coeffs to have real iFFT
+        return irfft(basis, to_device(basis.architecture, pot_fourier))
+    else
+        return ifft(basis, to_device(basis.architecture, pot_fourier))
+    end
 end
+(::AtomicLocal)(basis::PlaneWaveBasis{T}) where {T} =
+    TermAtomicLocal(compute_local_potential(T, basis))
 
-@timing "forces: local" function compute_forces(::TermAtomicLocal, basis::PlaneWaveBasis{TT},
-                                                ψ, occupation; ρ, kwargs...) where {TT}
-    T = promote_type(TT, real(eltype(ψ[1])))
+function compute_forces(term::TermAtomicLocal, basis::PlaneWaveBasis{T}, ψ, occupation;
+                        ρ, kwargs...) where {T}
+    S = promote_type(T, real(eltype(ψ[1])))
+    forces_local(S, basis, ρ, zero(Vec3{T}))
+end
+@timing "forces: local" function forces_local(S, basis::PlaneWaveBasis{T}, ρ, q) where {T}
     model = basis.model
+    recip_lattice = model.recip_lattice
     ρ_fourier = fft(basis, total_density(ρ))
+    real_ifSreal = S <: Real ? real : identity
 
     # energy = sum of form_factor(G) * struct_factor(G) * rho(G)
     # where struct_factor(G) = e^{-i G·r}
-    forces = [zero(Vec3{T}) for _ = 1:length(model.positions)]
+    forces = [zero(Vec3{S}) for _ = 1:length(model.positions)]
     for group in model.atom_groups
         element = model.atoms[first(group)]
-        form_factors = [Complex{T}(local_potential_fourier(element, norm(G)))
-                        for G in G_vectors_cart(basis)]
+        form_factors = [complex(S)(local_potential_fourier(element, norm(recip_lattice * (G + q))))
+                        for G in G_vectors(basis)]
         for idx in group
             r = model.positions[idx]
-            forces[idx] = _force_local_internal(basis, ρ_fourier, form_factors, r)
+            forces[idx] = -real_ifSreal(sum(conj(ρ_fourier[iG])
+                                        * form_factors[iG]
+                                        * cis2pi(-dot(G + q, r))
+                                        * (-2T(π)) * (G + q) * im
+                                        / sqrt(model.unit_cell_volume)
+                                  for (iG, G) in enumerate(G_vectors(basis))))
         end
     end
     forces
 end
 
-# function barrier to work around various type instabilities
-function _force_local_internal(basis, ρ_fourier, form_factors, r)
-    T = real(eltype(ρ_fourier))
-    f = zero(Vec3{T})
-    for (iG, G) in enumerate(G_vectors(basis))
-        f -= real(conj(ρ_fourier[iG])
-                  .* form_factors[iG]
-                  .* cis2pi(-dot(G, r))
-                  .* (-2T(π)) .* G .* im
-                  ./ sqrt(basis.model.unit_cell_volume))
+# Phonon: Perturbation of the local potential with respect to a displacement on the
+# direction α of the atom s.
+function compute_δV_sα(::TermAtomicLocal, basis::PlaneWaveBasis{T}, s, α; q=zero(Vec3{T}),
+                       positions=basis.model.positions) where {T}
+    S = iszero(q) ? T : complex(T)
+    displacement = zero.(positions)
+    displacement[s] = setindex(displacement[s], one(T), α)
+    ForwardDiff.derivative(zero(T)) do ε
+        positions = ε*displacement .+ positions
+        compute_local_potential(S, basis; q, positions)
+    end
+end
+
+# Phonon: Second-order perturbation of the local potential with respect to a displacement on
+# the directions α and β of the atoms s and t.
+function compute_δ²V(term::TermAtomicLocal, basis::PlaneWaveBasis{T}, β, t, α, s) where {T}
+    model = basis.model
+
+    displacement = zero.(model.positions)
+    displacement[t] = setindex(displacement[t], one(T), β)
+    ForwardDiff.derivative(zero(T)) do ε
+        positions = ε*displacement .+ model.positions
+        compute_δV_sα(term, basis, s, α; positions)
     end
-    f
+end
+
+function compute_dynmat(term::TermAtomicLocal, basis::PlaneWaveBasis{T}, ψ, occupation; ρ,
+                        δρs, q=zero(Vec3{T}), kwargs...) where {T}
+    S = complex(T)
+    model = basis.model
+    positions = model.positions
+    n_atoms = length(positions)
+    n_dim = model.n_dim
+
+    ∫δρδV = zeros(S, 3, n_atoms, 3, n_atoms)
+    for s = 1:n_atoms, α = 1:n_dim
+        ∫δρδV[:, :, α, s] .-= reduce(hcat, forces_local(S, basis, δρs[α, s], q))
+    end
+
+    ∫ρδ²V = zeros(S, 3, n_atoms, 3, n_atoms)
+    ρ_fourier = fft(basis, total_density(ρ))
+    δ²V_fourier = similar(ρ_fourier)
+    for s = 1:n_atoms, α = 1:n_dim
+        for t = 1:n_atoms, β = 1:n_dim
+            fft!(δ²V_fourier, basis, compute_δ²V(term, basis, β, t, α, s))
+            ∫ρδ²V[β, t, α, s] = sum(conj(ρ_fourier) .* δ²V_fourier)
+        end
+    end
+
+
+    ∫δρδV + ∫ρδ²V
+end
+
+function compute_δHψ_sα(term::TermAtomicLocal, basis::PlaneWaveBasis{T}, ψ, q, s, α) where {T}
+    δV_sα = similar(ψ[1], basis.fft_size..., basis.model.n_spin_components)
+    # All spin components get the same potential.
+    δV_sα .= compute_δV_sα(term, basis, s, α; q)
+    multiply_ψ_by_blochwave(basis, ψ, δV_sα, q)
 end
diff --git a/src/terms/local_nonlinearity.jl b/src/terms/local_nonlinearity.jl
index 34998827f1..94f80ae59b 100644
--- a/src/terms/local_nonlinearity.jl
+++ b/src/terms/local_nonlinearity.jl
@@ -28,8 +28,10 @@ function compute_kernel(term::TermLocalNonlinearity, ::AbstractBasis{T}; ρ, kwa
     Diagonal(vec(convert_dual.(T, fpp.(ρ))))
 end
 
-function apply_kernel(term::TermLocalNonlinearity, ::AbstractBasis{T}, δρ; ρ, kwargs...) where {T}
+function apply_kernel(term::TermLocalNonlinearity, ::AbstractBasis{T},
+                      δρ::AbstractArray{Tδρ}; ρ, kwargs...) where {T, Tδρ}
+    S = promote_type(T, Tδρ)
     fp(ρ) = ForwardDiff.derivative(term.f, ρ)
     fpp(ρ) = ForwardDiff.derivative(fp, ρ)
-    convert_dual.(T, fpp.(ρ) .* δρ)
+    convert_dual.(S, fpp.(ρ) .* δρ)
 end
diff --git a/src/terms/pairwise.jl b/src/terms/pairwise.jl
index 55f597459c..482bd78b6d 100644
--- a/src/terms/pairwise.jl
+++ b/src/terms/pairwise.jl
@@ -148,17 +148,15 @@ function compute_dynmat(term::TermPairwisePotential, basis::PlaneWaveBasis{T}, 
     symbols = Symbol.(atomic_symbol.(model.atoms))
 
     dynmat = zeros(complex(T), 3, n_atoms, 3, n_atoms)
-    for τ = 1:n_atoms
-        for γ = 1:n_dim
-            displacement = zero.(model.positions)
-            displacement[τ] = setindex(displacement[τ], one(T), γ)
-            dynmat[:, :, γ, τ] = -ForwardDiff.derivative(zero(T)) do ε
-                ph_disp = ε .* displacement
-                (; forces) = energy_forces_pairwise(model.lattice, symbols, model.positions,
-                                                    term.V, term.params, q, ph_disp;
-                                                    term.max_radius)
-                reduce(hcat, forces)
-            end
+    for s = 1:n_atoms, α = 1:n_dim
+        displacement = zero.(model.positions)
+        displacement[s] = setindex(displacement[s], one(T), α)
+        dynmat[:, :, α, s] = -ForwardDiff.derivative(zero(T)) do ε
+            ph_disp = ε .* displacement
+            (; forces) = energy_forces_pairwise(model.lattice, symbols, model.positions,
+                                                term.V, term.params, q, ph_disp;
+                                                term.max_radius)
+            reduce(hcat, forces)
         end
     end
     dynmat
diff --git a/src/terms/terms.jl b/src/terms/terms.jl
index b4bf7e4c52..a8bf5bf769 100644
--- a/src/terms/terms.jl
+++ b/src/terms/terms.jl
@@ -60,8 +60,10 @@ breaks_symmetries(::Anyonic) = true
 
 # forces computes either nothing or an array forces[at][α] (by default no forces)
 compute_forces(::Term, ::AbstractBasis, ψ, occupation; kwargs...) = nothing
-# dynamical matrix for phonons computations (array dynmat[n_dim, n_atom, n_dim, n_atom])
+# dynamical matrix for phonons computations (array dynmat[3, n_atom, 3, n_atom])
 compute_dynmat(::Term, ::AbstractBasis, ψ, occupation; kwargs...) = nothing
+# variation of the Hamiltonian applied to orbitals for phonons computations
+compute_δHψ_sα(::Term, ::AbstractBasis, ψ, q, s, α; kwargs...) = nothing
 
 @doc raw"""
     compute_kernel(basis::PlaneWaveBasis; kwargs...)
diff --git a/src/terms/xc.jl b/src/terms/xc.jl
index 3a79070d0e..03eaa50ec7 100644
--- a/src/terms/xc.jl
+++ b/src/terms/xc.jl
@@ -183,7 +183,8 @@ end
     forces
 end
 
-function _force_xc_internal(basis::PlaneWaveBasis{T}, Vxc_fourier::AbstractArray{U}, form_factors, igroup, r) where {T, U}
+function _force_xc_internal(basis::PlaneWaveBasis{T}, Vxc_fourier::AbstractArray{U},
+                            form_factors, igroup, r) where {T, U}
     TT = promote_type(T, real(U))
     f  = zero(Vec3{TT})
     for (iG, (G, G_cart)) in enumerate(zip(G_vectors(basis), G_vectors_cart(basis)))
@@ -359,7 +360,8 @@ function compute_kernel(term::TermXc, basis::PlaneWaveBasis; ρ, kwargs...)
 end
 
 
-function apply_kernel(term::TermXc, basis::PlaneWaveBasis{T}, δρ; ρ, kwargs...) where {T}
+function apply_kernel(term::TermXc, basis::PlaneWaveBasis{T}, δρ::AbstractArray{Tδρ};
+                      ρ, kwargs...) where {T, Tδρ}
     n_spin = basis.model.n_spin_components
     isempty(term.functionals) && return nothing
     @assert all(family(xc) in (:lda, :gga) for xc in term.functionals)
@@ -384,7 +386,7 @@ function apply_kernel(term::TermXc, basis::PlaneWaveBasis{T}, δρ; ρ, kwargs..
 
     # If the XC functional is not supported for an architecture, terms is on the CPU
     terms = kernel_terms(term.functionals, density)
-    δV = zero(ρ)  # [ix, iy, iz, iσ]
+    δV = zeros(Tδρ, size(ρ)...)  # [ix, iy, iz, iσ]
 
     Vρρ = to_device(basis.architecture, reshape(terms.Vρρ, n_spin, n_spin, basis.fft_size...))
     @views for s = 1:n_spin, t = 1:n_spin  # LDA term
diff --git a/src/transfer.jl b/src/transfer.jl
index 5b7d089411..69113e5ba6 100644
--- a/src/transfer.jl
+++ b/src/transfer.jl
@@ -188,10 +188,7 @@ Find the equivalent index of the coordinate `kcoord` ∈ ℝ³ in a list `kcoord
 `ΔG` is the vector of ℤ³ such that `kcoords[index] = kcoord + ΔG`.
 """
 function find_equivalent_kpt(basis::PlaneWaveBasis{T}, kcoord, spin; tol=sqrt(eps(T))) where {T}
-    kcoord_red = map(kcoord) do k
-                     k = mod(k, 1)              # coordinate in [0, 1)³
-                     k ≥ 0.5 - tol ? k - 1 : k  # coordinate in [-½, ½)³
-                 end
+    kcoord_red = normalize_kpoint_coordinate(kcoord .+ eps(T))
 
     ΔG = kcoord_red - kcoord
     # ΔG should be an integer.
@@ -206,25 +203,67 @@ function find_equivalent_kpt(basis::PlaneWaveBasis{T}, kcoord, spin; tol=sqrt(ep
     return (; index, ΔG)
 end
 
-"""
-Return the Fourier coefficients for `ψk · e^{i q·r}` in the basis of `kpt_out`, where `ψk`
-is defined on a basis `kpt_in`.
-"""
-function multiply_by_expiqr(basis, kpt_in, q, ψk)
-    shifted_kcoord = kpt_in.coordinate .+ q  # coordinate of ``k``-point in ℝ
-    index, ΔG = find_equivalent_kpt(basis, shifted_kcoord, kpt_in.spin)
-    kpt_out = basis.kpoints[index]
-    return transfer_blochwave_kpt(ψk, basis, kpt_in, kpt_out, ΔG)
-end
-
 """
 Return the indices of the `kpoints` shifted by `q`. That is for each `kpoint` of the `basis`:
-`kpoints[ik].coordinate + q = kpoints[indices[ik]].coordinate`.
+`kpoints[ik].coordinate + q` is equivalent to `kpoints[indices[ik]].coordinate`.
 """
-function k_to_kpq_mapping(basis::PlaneWaveBasis, qcoord)
+function k_to_equivalent_kpq_permutation(basis::PlaneWaveBasis, qcoord)
     kpoints = basis.kpoints
     indices = [find_equivalent_kpt(basis, kpt.coordinate + qcoord, kpt.spin).index
                for kpt in kpoints]
-    @assert sort(indices) == 1:length(basis.kpoints)
+    @assert isperm(indices)
     indices
 end
+
+@doc raw"""
+Create the Fourier expansion of ``ψ_{k+q}`` from ``ψ_{[k+q]}``, where ``[k+q]`` is in
+`basis.kpoints`. while ``k+q`` may or may not be inside.
+
+If ``ΔG ≔ [k+q] - (k+q)``, then we have that
+```math
+    ∑_G \hat{u}_{[k+q]}(G) e^{i(k+q+G)·r} &= ∑_{G'} \hat{u}_{k+q}(G'-ΔG) e^{i(k+q+ΔG+G')·r},
+```
+hence
+```math
+    u_{k+q}(G) = u_{[k+q]}(G + ΔG).
+```
+"""
+function kpq_equivalent_blochwave_to_kpq(basis, kpt, q, ψk_plus_q_equivalent)
+    kcoord_plus_q = kpt.coordinate .+ q
+    index, ΔG = find_equivalent_kpt(basis, kcoord_plus_q, kpt.spin)
+    equivalent_kpt_plus_q = basis.kpoints[index]
+
+    kpt_plus_q = Kpoint(basis, kcoord_plus_q, kpt.spin)
+    (; kpt=kpt_plus_q,
+     ψk=transfer_blochwave_kpt(ψk_plus_q_equivalent, basis, equivalent_kpt_plus_q,
+                               kpt_plus_q, -ΔG))
+end
+
+# Replaces `multiply_by_expiqr`.
+# TODO: Think about a clear and consistent way to define such a function when completing
+# phonon PRs.
+@doc raw"""
+    multiply_ψ_by_blochwave(basis::PlaneWaveBasis, ψ, f_real, q)
+
+Return the Fourier coefficients for the Bloch waves ``f^{\rm real}_{q} ψ_{k-q}`` in an
+element of `basis.kpoints` equivalent to ``k-q``.
+"""
+function multiply_ψ_by_blochwave(basis::PlaneWaveBasis, ψ, f_real, q)
+    ordering(kdata) = kdata[k_to_equivalent_kpq_permutation(basis, -q)]
+    fψ = zero.(ψ)
+    for (ik, kpt) in enumerate(basis.kpoints)
+        # First, express ψ_{[k-q]} in the basis of k-q points…
+        kpt_minus_q, ψk_minus_q = kpq_equivalent_blochwave_to_kpq(basis, kpt, -q,
+                                                                  ordering(ψ)[ik])
+        # … then perform the multiplication with f in real space and get the Fourier
+        # coefficients.
+        @views for n = 1:size(ψ[ik], 3)
+            ψkn_minus_q_real = ifft(basis, kpt_minus_q, ψk_minus_q[:, :, n])
+            for σ = 1:basis.model.n_components
+                fψ[ik][σ, :, n] = fft(basis, kpt, ψkn_minus_q_real[σ, :, :, :]
+                                                      .* f_real[:, :, :, kpt.spin])
+            end
+        end
+    end
+    fψ
+end
diff --git a/src/workarounds/forwarddiff_rules.jl b/src/workarounds/forwarddiff_rules.jl
index ac46c1584b..d355bc18c8 100644
--- a/src/workarounds/forwarddiff_rules.jl
+++ b/src/workarounds/forwarddiff_rules.jl
@@ -158,7 +158,7 @@ function construct_value(model::Model{T}) where {T <: ForwardDiff.Dual}
 end
 
 construct_value(el::Element) = el
-construct_value(el::ElementPsp) = ElementPsp(el.Z, el.symbol, construct_value(el.psp))
+construct_value(el::ElementPsp) = ElementPsp(el.Z, el.symbol, el.mass, construct_value(el.psp))
 construct_value(psp::PspHgh) = psp
 function construct_value(psp::PspHgh{T}) where {T <: ForwardDiff.Dual}
     PspHgh(psp.Zion,
diff --git a/test/elements.jl b/test/elements.jl
index 83b72c9afe..1c9d9cecf1 100644
--- a/test/elements.jl
+++ b/test/elements.jl
@@ -1,5 +1,7 @@
 @testitem "Check constructing ElementCoulomb" begin
     using DFTK
+    using Unitful
+    using UnitfulAtomic
     using DFTK: charge_nuclear, charge_ionic, n_elec_core, n_elec_valence
     using DFTK: ElementCoulomb, local_potential_fourier, local_potential_real
     using LinearAlgebra
@@ -15,6 +17,7 @@
     @test element.symbol == :Mg
 
     @test atomic_symbol(element) == :Mg
+    @test atomic_mass(element) == 24.305u"u"
     @test charge_nuclear(element) == 12
     @test charge_ionic(element) == 12
     @test n_elec_valence(element) == charge_ionic(element)
@@ -27,6 +30,8 @@ end
 
 @testitem "Check constructing ElementPsp" begin
     using DFTK
+    using Unitful
+    using UnitfulAtomic
     using DFTK: load_psp, charge_nuclear, charge_ionic, n_elec_core, n_elec_valence
     using DFTK: ElementPsp, local_potential_fourier, local_potential_real
 
@@ -44,6 +49,7 @@ end
     @test element.psp.identifier == "hgh/lda/c-q4"
 
     @test atomic_symbol(element) == :C
+    @test atomic_mass(element) == 12.011u"u"
     @test charge_nuclear(element) == 6
     @test charge_ionic(element) == 4
     @test n_elec_valence(element) == 4
@@ -56,6 +62,8 @@ end
 
 @testitem "Check constructing ElementCohenBergstresser" begin
     using DFTK
+    using Unitful
+    using UnitfulAtomic
     using DFTK: charge_nuclear, charge_ionic, n_elec_core, n_elec_valence
     using DFTK: ElementCohenBergstresser, local_potential_fourier
 
@@ -67,6 +75,7 @@ end
     @test element.symbol == :Si
 
     @test atomic_symbol(element) == :Si
+    @test atomic_mass(element) == 28.085u"u"
     @test charge_nuclear(element) == 14
     @test charge_ionic(element) == 4
     @test n_elec_valence(element) == 4
@@ -79,12 +88,14 @@ end
 
 @testitem "Check constructing ElementGaussian" begin
     using DFTK
+    using Unitful
+    using UnitfulAtomic
     using DFTK: local_potential_fourier, local_potential_real
 
     element = ElementGaussian(1.0, 0.5; symbol=:X1)
 
     @test atomic_symbol(element) == :X1
-
+    @test isnothing(atomic_mass(element))
     @test local_potential_fourier(element, 0.0) == -1.0
     @test local_potential_fourier(element, 2.0) == -0.6065306597126334
     @test local_potential_real(element, 2.0) == -0.00026766045152977074
diff --git a/test/external/atomsbase.jl b/test/external/atomsbase.jl
index b5aa05ef13..704f30c21a 100644
--- a/test/external/atomsbase.jl
+++ b/test/external/atomsbase.jl
@@ -15,6 +15,7 @@
 
     system = atomic_system(lattice, atoms, positions, magnetic_moments)
     @test atomic_symbol(system) == [:Si, :C, :H, :C]
+    @test atomic_mass(system)   == [28.085u"u", 12.011u"u", 1.008u"u", 12.011u"u"]
     @test bounding_box(system)  == collect(eachcol(lattice)) * u"bohr"
     @test position(system)      == [lattice * p * u"bohr" for p in positions]
 
@@ -51,6 +52,7 @@
         newsys = periodic_system(model, magnetic_moments)
 
         @test atomic_symbol(system)       == atomic_symbol(newsys)
+        @test atomic_mass(system)         == atomic_mass(newsys)
         @test bounding_box(system)        == bounding_box(newsys)
         @test boundary_conditions(system) == boundary_conditions(newsys)
         @test maximum(maximum, position(system) - position(newsys)) < 1e-12u"bohr"
@@ -191,18 +193,19 @@ end
     @test newsys[4, :pseudopotential] == "hgh/pbe/c-q4.hgh"
 end
 
-@testitem "AbstractSystem (unusual symbols) -> DFTK" begin
+@testitem "AbstractSystem (unusual symbols and masses) -> DFTK" begin
     using DFTK
     using Unitful
     using UnitfulAtomic
     using AtomsBase
 
     lattice     = [12u"bohr" * rand(3) for _ = 1:3]
-    atoms       = [Atom(6, randn(3)u"Å"; atomic_symbol=:C1),
-                   Atom(6, randn(3)u"Å"; atomic_symbol=:C2)]
+    atoms       = [Atom(6, randn(3)u"Å"; atomic_symbol=:C1, atomic_mass=-1u"u"),
+                   Atom(6, randn(3)u"Å"; atomic_symbol=:C2, atomic_mass=-2u"u")]
     system      = periodic_system(atoms, lattice)
     system_psp  = attach_psp(system; C="hgh/lda/c-q4.hgh")
     @test atomic_symbol(system_psp) == [:C1, :C2]
+    @test atomic_mass(system_psp) == [-1u"u", -2u"u"]
 
     pos_lattice = austrip.(reduce(hcat, lattice))
     let model = model_LDA(system_psp)
@@ -214,6 +217,7 @@ end
         @test length(model.atoms) == 2
         @test atomic_symbol(model.atoms[1]) == :C
         @test atomic_symbol(model.atoms[2]) == :C
+        @test atomic_mass.(model.atoms) == [-1u"u", -2u"u"]
         @test model.atoms[1].psp.identifier == "hgh/lda/c-q4.hgh"
         @test model.atoms[2].psp.identifier == "hgh/lda/c-q4.hgh"
     end
diff --git a/test/forces.jl b/test/forces.jl
index 466d06cfc9..94da96605e 100644
--- a/test/forces.jl
+++ b/test/forces.jl
@@ -52,7 +52,7 @@ end
     silicon = TestCases.silicon
 
     function energy_forces(positions)
-        Si = ElementPsp(silicon.atnum, :Si, load_psp(silicon.psp_upf))
+        Si = ElementPsp(silicon.atnum, :Si, silicon.mass, load_psp(silicon.psp_upf))
         atoms = fill(Si, length(silicon.atoms))
         model = model_DFT(silicon.lattice, atoms, positions, [:lda_x, :lda_c_pw])
         basis = PlaneWaveBasis(model; Ecut=7, kgrid=[2, 2, 2], kshift=[0, 0, 0],
diff --git a/test/iron_lda.jl b/test/iron_lda.jl
index 1076ccd4ec..130315df6d 100644
--- a/test/iron_lda.jl
+++ b/test/iron_lda.jl
@@ -49,5 +49,5 @@ end
 
 
 @testitem "Iron LDA (Float64)" tags=[:core] setup=[RunSCF, TestCases, IronLDA] begin
-    IronLDA.run_iron_lda(Float64; test_tol=5e-6, scf_tol=1e-11)
+    IronLDA.run_iron_lda(Float64; test_tol=5e-6, scf_ene_tol=1e-11)
 end
diff --git a/test/iron_pbe.jl b/test/iron_pbe.jl
index a710ce88b9..ab2b0a8693 100644
--- a/test/iron_pbe.jl
+++ b/test/iron_pbe.jl
@@ -67,5 +67,5 @@ end
 
 
 @testitem "Iron PBE (Float64)" tags=[:slow] setup=[RunSCF, TestCases, IronPBE] begin
-    IronPBE.run_iron_pbe(Float64; test_tol=5e-6, scf_tol=1e-12)
+    IronPBE.run_iron_pbe(Float64; test_tol=5e-6, scf_ene_tol=1e-12)
 end
diff --git a/test/occupation.jl b/test/occupation.jl
index 6dd1017c17..93e7094fd8 100644
--- a/test/occupation.jl
+++ b/test/occupation.jl
@@ -85,7 +85,7 @@ end
         (; occupation) = DFTK.compute_occupation(basis, eigenvalues, alg; tol_n_elec=1e-6)
 
         for ik = 1:n_k
-            @test all(isapprox.(occupation[ik], occupation0[ik], atol=1e-2))
+            @test all(isapprox.(occupation[ik], occupation0[ik]; atol=1e-2))
         end
     end
 end
diff --git a/test/phonon/ewald.jl b/test/phonon/ewald.jl
index c5df536c26..25074c6b04 100644
--- a/test/phonon/ewald.jl
+++ b/test/phonon/ewald.jl
@@ -16,30 +16,30 @@
               -0.10658869539441335
               -4.891274318712944e-16
               -3.773447798738169e-17
-              1.659776058962626e-15
-              0.09553958285993536
-              0.18062696253387409
-              0.18062696253387464
-              0.4959725605665635
-              0.4959725605665648
-              0.49597256056656597
-              0.5498259359834827
-              0.5498259359834833
-              0.6536453595829087
-              0.6536453595829091
-              0.6536453595829103
-              0.6536453595829105
-              0.6961890494198791
-              0.6961890494198807
-              0.7251587593311752
-              0.7251587593311782
-              0.9261195383192374
-              0.9261195383192381
-              1.2533843205271504
-              1.2533843205271538
-              1.7010950724885228
-              1.7010950724885254
-              1.752506588801463]
+               1.659776058962626e-15
+               0.09553958285993536
+               0.18062696253387409
+               0.18062696253387464
+               0.4959725605665635
+               0.4959725605665648
+               0.49597256056656597
+               0.5498259359834827
+               0.5498259359834833
+               0.6536453595829087
+               0.6536453595829091
+               0.6536453595829103
+               0.6536453595829105
+               0.6961890494198791
+               0.6961890494198807
+               0.7251587593311752
+               0.7251587593311782
+               0.9261195383192374
+               0.9261195383192381
+               1.2533843205271504
+               1.2533843205271538
+               1.7010950724885228
+               1.7010950724885254
+               1.752506588801463 ]
     Phonon.test_frequencies(testcase, terms, ω_ref)
 end
 
diff --git a/test/phonon/helpers.jl b/test/phonon/helpers.jl
index d843fcdb41..91e716a14e 100644
--- a/test/phonon/helpers.jl
+++ b/test/phonon/helpers.jl
@@ -2,39 +2,40 @@
 @testsetup module Phonon
 using Test
 using DFTK
+using DFTK: TermAtomicLocal, TermAtomicNonlocal
 using DFTK: compute_dynmat_cart, setindex, dynmat_red_to_cart, normalize_kpoint_coordinate
 using LinearAlgebra
 using ForwardDiff
 
 # We do not take the square root to compare eigenvalues with machine precision.
-function compute_squared_frequencies(matrix)
+function squared_frequencies(matrix)
     n, m = size(matrix, 1), size(matrix, 2)
     Ω = eigvals(reshape(matrix, n*m, n*m))
     real(Ω)
 end
 
 # Reference against automatic differentiation.
-function ph_compute_reference(basis_supercell)
-    model_supercell = basis_supercell.model
-    n_atoms = length(model_supercell.positions)
-    n_dim = model_supercell.n_dim
-    T = eltype(model_supercell.lattice)
+function reference_squared_frequencies(basis; kwargs...)
+    model = basis.model
+    n_atoms = length(model.positions)
+    n_dim = model.n_dim
+    T = eltype(model.lattice)
     dynmat_ad = zeros(T, 3, n_atoms, 3, n_atoms)
-    for τ = 1:n_atoms, γ = 1:n_dim
-        displacement = zero.(model_supercell.positions)
-        displacement[τ] = setindex(displacement[τ], one(T), γ)
-        dynmat_ad[:, :, γ, τ] = -ForwardDiff.derivative(zero(T)) do ε
-            lattice = convert(Matrix{eltype(ε)}, model_supercell.lattice)
-            positions = ε*displacement .+ model_supercell.positions
-            model_disp = Model(convert(Model{eltype(ε)}, model_supercell); lattice, positions)
+    for s = 1:n_atoms, α = 1:n_dim
+        displacement = zero.(model.positions)
+        displacement[s] = setindex(displacement[s], one(T), α)
+        dynmat_ad[:, :, α, s] = -ForwardDiff.derivative(zero(T)) do ε
+            lattice = convert(Matrix{eltype(ε)}, model.lattice)
+            positions = ε*displacement .+ model.positions
+            model_disp = Model(convert(Model{eltype(ε)}, model); lattice, positions)
             # TODO: Would be cleaner with PR #675.
             basis_disp_bs = PlaneWaveBasis(model_disp; Ecut=5)
             forces = compute_forces(basis_disp_bs, nothing, nothing)
             reduce(hcat, forces)
         end
     end
-    hessian_ad = dynmat_red_to_cart(model_supercell, dynmat_ad)
-    sort(compute_squared_frequencies(hessian_ad))
+    hessian_ad = DFTK.dynmat_red_to_cart(model, dynmat_ad)
+    sort(squared_frequencies(hessian_ad))
 end
 
 function generate_random_supercell(; max_length=6)
@@ -57,19 +58,16 @@ function generate_supercell_qpoints(; supercell_size=generate_random_supercell()
 end
 
 # Test against a reference array.
-function test_frequencies(testcase, terms, ω_ref; tol=1e-9)
+function test_frequencies(testcase, terms, ω_ref; tol=1e-9, supercell_size=[2, 1, 3])
     model = Model(testcase.lattice, testcase.atoms, testcase.positions; terms)
     basis_bs = PlaneWaveBasis(model; Ecut=5)
 
-    supercell_size = [2, 1, 3]
     phonon = (; supercell_size, generate_supercell_qpoints(; supercell_size).qpoints)
 
-    ω_uc = []
-    for q in phonon.qpoints
-        hessian = compute_dynmat_cart(basis_bs, nothing, nothing; q)
-        push!(ω_uc, compute_squared_frequencies(hessian))
-    end
-    ω_uc = sort!(collect(Iterators.flatten(ω_uc)))
+    ω_uc = sort!(reduce(vcat, map(phonon.qpoints) do q
+        hessian = compute_dynmat_cart(basis_bs, [], []; q)
+        squared_frequencies(hessian)
+    end))
 
     @test norm(ω_uc - ω_ref) < tol
 end
@@ -85,8 +83,8 @@ function test_rand_frequencies(testcase, terms; tol=1e-9)
 
     ω_uc = []
     for q in phonon.qpoints
-        hessian = compute_dynmat_cart(basis_bs, nothing, nothing; q)
-        push!(ω_uc, compute_squared_frequencies(hessian))
+        hessian = compute_dynmat_cart(basis_bs, [], []; q)
+        push!(ω_uc, squared_frequencies(hessian))
     end
     ω_uc = sort!(collect(Iterators.flatten(ω_uc)))
 
@@ -94,11 +92,11 @@ function test_rand_frequencies(testcase, terms; tol=1e-9)
                                  phonon.supercell_size)
     model_supercell = Model(supercell.lattice, supercell.atoms, supercell.positions; terms)
     basis_supercell_bs = PlaneWaveBasis(model_supercell; Ecut=5)
-    hessian_supercell = compute_dynmat_cart(basis_supercell_bs, nothing, nothing)
-    ω_supercell = sort(compute_squared_frequencies(hessian_supercell))
+    hessian_supercell = compute_dynmat_cart(basis_supercell_bs, [], [])
+    ω_supercell = sort(squared_frequencies(hessian_supercell))
     @test norm(ω_uc - ω_supercell) < tol
 
-    ω_ad = ph_compute_reference(basis_supercell_bs)
+    ω_ad = reference_squared_frequencies(basis_supercell_bs)
 
     @test norm(ω_ad - ω_supercell) < tol
 end
diff --git a/test/phonon/local.jl b/test/phonon/local.jl
new file mode 100644
index 0000000000..2e36cbcc0d
--- /dev/null
+++ b/test/phonon/local.jl
@@ -0,0 +1,114 @@
+# TODO: Will need to be factorized with `helpers.jl` functions.
+# Needs a tiny bit of work to compute only necessary quantities, e.g. no need for SCF for
+# Ewald.
+@testsetup module PhononLocal
+using DFTK
+using Test
+using LinearAlgebra
+using ForwardDiff
+using Random
+using ..Phonon
+
+# No exchange-correlation and only a local potential.
+function model_tested(lattice::AbstractMatrix,
+                      atoms::Vector{<:DFTK.Element},
+                      positions::Vector{<:AbstractVector};
+                      extra_terms=[], kinetic_blowup=BlowupIdentity(), kwargs...)
+    @assert !(:terms in keys(kwargs))
+    terms = [Kinetic(; blowup=kinetic_blowup),
+             AtomicLocal(),
+             Ewald(),
+             PspCorrection(),
+             Hartree(),
+             extra_terms...]
+    if :temperature in keys(kwargs) && kwargs[:temperature] != 0
+        terms = [terms..., Entropy()]
+    end
+    Model(lattice, atoms, positions; model_name="atomic", terms, kwargs...)
+end
+
+
+function are_approx_frequencies(ω_uc, ω_ref; tol=1e-10)
+    # Because three eigenvalues should be close to zero and the square root near
+    # zero decrease machine accuracy, we expect at least ``3×2×2 - 3 = 9``
+    # eigenvalues to have norm related to the accuracy of the SCF convergence
+    # parameter and the rest to be larger.
+    n_dim = 3
+    n_atoms = length(ω_uc) ÷ 3
+
+    @test count(abs.(ω_uc - ω_ref) .< sqrt(tol)) ≥ n_dim*n_atoms - n_dim
+    @test count(sqrt(tol) .< abs.(ω_uc - ω_ref) .< tol) ≤ n_dim
+end
+
+function test_frequencies(testcase; ω_ref=nothing)
+    Ecut  = 7
+    kgrid = [2, 1, 3]
+    qpoints = Phonon.generate_supercell_qpoints(; supercell_size=kgrid).qpoints
+    scf_tol = 1e-12
+    χ0_tol  = scf_tol/10
+    is_converged = DFTK.ScfConvergenceDensity(scf_tol)
+    determine_diagtol = DFTK.ScfDiagtol(diagtol_max=scf_tol)
+    scf_kwargs = (; is_converged, determine_diagtol)
+
+    model = model_tested(testcase.lattice, testcase.atoms, testcase.positions;
+                         symmetries=false, testcase.temperature)
+    nbandsalg = AdaptiveBands(model; occupation_threshold=1e-10)
+    scf_kwargs = merge(scf_kwargs, (; nbandsalg))
+    basis = PlaneWaveBasis(model; Ecut, kgrid)
+    scfres = self_consistent_field(basis; scf_kwargs...)
+    ω_uc = sort!(reduce(vcat, map(qpoints) do q
+        dynamical_matrix = compute_dynmat(scfres; q, tol=χ0_tol)
+        phonon_modes_cart(basis, dynamical_matrix).frequencies
+    end))
+
+    if !isnothing(ω_ref)
+        return are_approx_frequencies(ω_uc, ω_ref; tol=10scf_tol)
+    end
+
+    supercell_size = kgrid
+    kgrid_supercell = [1, 1, 1]
+
+    supercell = create_supercell(testcase.lattice, testcase.atoms, testcase.positions,
+                                 supercell_size)
+    model_supercell = model_tested(supercell.lattice, supercell.atoms, supercell.positions;
+                                   symmetries=false, testcase.temperature)
+    nbandsalg = AdaptiveBands(model_supercell; occupation_threshold=1e-10)
+    scf_kwargs = merge(scf_kwargs, (; nbandsalg))
+    basis_supercell = PlaneWaveBasis(model_supercell; Ecut, kgrid=kgrid_supercell)
+
+    scfres = self_consistent_field(basis_supercell; scf_kwargs...)
+    dynamical_matrix = compute_dynmat(scfres; tol=χ0_tol)
+    ω_sc = sort(phonon_modes_cart(basis_supercell, dynamical_matrix).frequencies)
+
+    return are_approx_frequencies(ω_uc, ω_sc; tol=10scf_tol)
+end
+end
+
+@testitem "Phonon: Local term: comparison to ref testcase" #=
+    =#    tags=[:phonon, :dont_test_mpi] setup=[Phonon, PhononLocal, TestCases] begin
+    # Values computed offline with automatic differentiation.
+    ω_ref = [ -3.6569888415715e-9
+              -3.6569888415715e-9
+              -2.263180017613055e-9
+               0.000443073786433812
+               0.0004675174987222679
+               0.00046751749874345965
+               0.000520667604960504
+               0.0005206676049755671
+               0.0008481450680251938
+               0.0009079870302639688
+               0.0009079870302721681
+               0.0010121409655813906
+               0.0010121409655813906
+               0.0013408306319911576
+               0.0013779547317006979
+               0.001377954731723582
+               0.0014021878602703752
+               0.001402187860292344 ]
+    PhononLocal.test_frequencies(TestCases.aluminium_primitive; ω_ref)
+end
+
+@testitem "Phonon: Local term: comparison to supercell" #=
+    =#    tags=[:phonon, :dont_test_mpi, :slow] setup=[Phonon, PhononLocal, TestCases] begin
+    PhononLocal.test_frequencies(TestCases.aluminium_primitive)
+end
diff --git a/test/phonon/lowlevel.jl b/test/phonon/lowlevel.jl
index 54b85f0688..a4345e663b 100644
--- a/test/phonon/lowlevel.jl
+++ b/test/phonon/lowlevel.jl
@@ -1,14 +1,8 @@
 @testitem "Phonon: Shifting functions" tags=[:phonon, :dont_test_mpi] begin
     using DFTK
-    using DFTK: cis2pi, multiply_by_expiqr, find_equivalent_kpt, k_to_kpq_mapping
+    using DFTK: k_to_equivalent_kpq_permutation
     using LinearAlgebra
 
-    # Real-space equivalent of `transfer_blochwave_kpt`.
-    function transfer_blochwave_kpt_real(ψk_in, basis::PlaneWaveBasis, kpt_in, kpt_out, ΔG)
-        ψk_out = zeros(eltype(ψk_in), basis.model.n_components, length(kpt_out.G_vectors))
-        exp_ΔGr = reshape(cis2pi.(-dot.(Ref(ΔG), r_vectors(basis))), 1, basis.fft_size...)
-        fft(basis, kpt_out, exp_ΔGr .* ifft(basis, kpt_in, ψk_in))
-    end
     tol = 1e-12
 
     positions = [[0.0, 0.0, 0.0]]
@@ -24,39 +18,18 @@
     atoms = [X for _ in positions]
 
     model = Model(lattice, atoms, positions; n_electrons=n_atoms, symmetries=false,
-                    spin_polarization=:collinear)
+                  spin_polarization=:collinear)
     kgrid = rand(2:10, 3)
     k1, k2, k3 = kgrid
     basis = PlaneWaveBasis(model; Ecut=100, kgrid)
 
-    # We consider a smooth periodic function with Fourier coefficients given if the basis
-    # e^(iG·x)
-    ψ = randn(ComplexF64, model.n_components, size(r_vectors(basis))...)
-
     # Random `q` shift
     q0 = rand(basis.kpoints).coordinate
     ishift = [rand(-k1*2:k1*2), rand(-k2*2:k2*2), rand(-k3*2:k3*2)]
     q = Vec3(q0 .* ishift)
-    @testset "Transfer function" begin
-        for kpt in unique(rand(basis.kpoints, 4))
-            ψk = fft(basis, kpt, ψ)
-
-            ψk_out_four = multiply_by_expiqr(basis, kpt, q, ψk)
-            ψk_out_real = let
-                shifted_kcoord = kpt.coordinate .+ q
-                index, ΔG = find_equivalent_kpt(basis, shifted_kcoord, kpt.spin)
-                kpt_out = basis.kpoints[index]
-                transfer_blochwave_kpt_real(ψk, basis, kpt, kpt_out, ΔG)
-            end
-            @testset "Testing kpoint $(kpt.coordinate) on kgrid $kgrid" begin
-                @test norm(ψk_out_four - ψk_out_real) < tol
-            end
-        end
-    end
 
     @testset "Ordering function" begin
-        kpoints_plus_q = k_to_kpq_mapping(basis, q)
-        ordering(kdata) = kdata[kpoints_plus_q]
+        ordering(kdata) = kdata[k_to_equivalent_kpq_permutation(basis, q)]
         kcoords = getfield.(basis.kpoints, :coordinate)
         for (ik, kcoord) in enumerate(kcoords)
             @test mod.(kcoord + q .- tol, 1) ≈ mod.(ordering(kcoords)[ik] .- tol, 1)
diff --git a/test/run_scf_and_compare.jl b/test/run_scf_and_compare.jl
index 4549a55dd0..cb0aa7e3ba 100644
--- a/test/run_scf_and_compare.jl
+++ b/test/run_scf_and_compare.jl
@@ -4,13 +4,13 @@ using DFTK
 using DFTK: mpi_sum
 
 function run_scf_and_compare(T, basis, ref_evals, ref_etot; n_ignored=0, test_tol=1e-6,
-                             scf_tol=1e-6, test_etot=true, kwargs...)
+                             scf_ene_tol=1e-6, test_etot=true, kwargs...)
     n_kpt    = length(ref_evals)
     n_bands  = length(ref_evals[1])
     kpt_done = zeros(Bool, n_kpt)
 
     nbandsalg    = AdaptiveBands(basis.model, n_bands_converge=n_bands)
-    is_converged = DFTK.ScfConvergenceEnergy(scf_tol)
+    is_converged = DFTK.ScfConvergenceEnergy(scf_ene_tol)
     scfres = self_consistent_field(basis; is_converged, nbandsalg, kwargs...)
     for (ik, ik_global) in enumerate(basis.krange_thisproc)
         @test eltype(scfres.eigenvalues[ik]) == T
diff --git a/test/silicon_lda.jl b/test/silicon_lda.jl
index de10c46984..9f5ce84d74 100644
--- a/test/silicon_lda.jl
+++ b/test/silicon_lda.jl
@@ -40,29 +40,29 @@ end
 @testitem "Silicon LDA (small, Float64)" #=
     =#    tags=[:minimal] setup=[RunSCF, TestCases, SiliconLDA] begin
     SiliconLDA.run_silicon_lda(Float64; Ecut=7, test_tol=0.03, n_ignored=0, grid_size=17,
-                               scf_tol=1e-5)
+                               scf_ene_tol=1e-5)
 end
 
 @testitem "Silicon LDA (large, Float64)" #=
     =#    tags=[:slow] setup=[RunSCF, TestCases, SiliconLDA] begin
     SiliconLDA.run_silicon_lda(Float64; Ecut=25, test_tol=1e-5, n_ignored=0, grid_size=33,
-                               scf_tol=1e-7)
+                               scf_ene_tol=1e-7)
 end
 
 @testitem "Silicon LDA (small, Float32)" #=
     =#    tags=[:minimal] setup=[RunSCF, TestCases, SiliconLDA] begin
     SiliconLDA.run_silicon_lda(Float32; Ecut=7, test_tol=0.03, n_ignored=1, grid_size=19,
-                               scf_tol=1e-4)
+                               scf_ene_tol=1e-4)
 end
 
 @testitem "Silicon LDA (small, collinear spin)" #=
     =#    tags=[:minimal] setup=[RunSCF, TestCases, SiliconLDA] begin
     SiliconLDA.run_silicon_lda(Float64; Ecut=7, test_tol=0.03, n_ignored=0, grid_size=17,
-                               scf_tol=1e-5, spin_polarization=:collinear)
+                               scf_ene_tol=1e-5, spin_polarization=:collinear)
 end
 
 @testitem "Silicon LDA (large, collinear spin)" #=
     =#    tags=[:slow] setup=[RunSCF, TestCases, SiliconLDA] begin
     SiliconLDA.run_silicon_lda(Float64; Ecut=25, test_tol=1e-5, n_ignored=0, grid_size=33,
-                               scf_tol=1e-7, spin_polarization=:collinear)
+                               scf_ene_tol=1e-7, spin_polarization=:collinear)
 end
diff --git a/test/silicon_nlcc.jl b/test/silicon_nlcc.jl
index 550c3a2142..4d21b0b531 100644
--- a/test/silicon_nlcc.jl
+++ b/test/silicon_nlcc.jl
@@ -46,7 +46,7 @@ end
 @testitem "Silicon NLCC (large, Float64)" #=
     =#    tags=[:slow] setup=[RunSCF, TestCases, SiliconNLCC] begin
     SiliconNLCC.run_silicon_nlcc(Float64; Ecut=25, test_tol=6e-5, n_ignored=0, grid_size=36,
-                                 scf_tol=1e-11)
+                                 scf_ene_tol=1e-11)
 end
 
 @testitem "Silicon NLCC (small, collinear spin)" setup=[RunSCF, TestCases, SiliconNLCC] begin
@@ -57,5 +57,5 @@ end
 @testitem "Silicon NLCC (large, collinear spin)" #=
     =#    tags=[:slow] setup=[RunSCF, TestCases, SiliconNLCC] begin
     SiliconNLCC.run_silicon_nlcc(Float64; Ecut=25, test_tol=6e-5, n_ignored=0, grid_size=36,
-                                 scf_tol=1e-11, spin_polarization=:collinear)
+                                 scf_ene_tol=1e-11, spin_polarization=:collinear)
 end
diff --git a/test/silicon_pbe.jl b/test/silicon_pbe.jl
index 3f6ab2d3f2..e15e5ae207 100644
--- a/test/silicon_pbe.jl
+++ b/test/silicon_pbe.jl
@@ -49,13 +49,13 @@ end
 @testitem "Silicon PBE (small, Float32)" #=
     =#    tags=[:core] setup=[RunSCF, TestCases, SiliconPBE] begin
     SiliconPBE.run_silicon_pbe(Float32; Ecut=7, test_tol=0.03, n_ignored=1, grid_size=17,
-                               scf_tol=1e-4)
+                               scf_ene_tol=1e-4)
 end
 
 @testitem "Silicon PBE (large, Float64)" #=
     =#    tags=[:slow, :core] setup=[RunSCF, TestCases, SiliconPBE] begin
     SiliconPBE.run_silicon_pbe(Float64; Ecut=25, test_tol=1e-5, n_ignored=0, grid_size=33,
-                               scf_tol=1e-8)
+                               scf_ene_tol=1e-8)
 end
 
 @testitem "Silicon PBE (small, collinear spin)" #=
@@ -67,5 +67,5 @@ end
 @testitem "Silicon PBE (large, collinear spin)" #=
     =#    tags=[:slow, :core] setup=[RunSCF, SiliconPBE, TestCases] begin
     SiliconPBE.run_silicon_pbe(Float64; Ecut=25, test_tol=1e-5, n_ignored=0, grid_size=33,
-                               scf_tol=1e-8, spin_polarization=:collinear)
+                               scf_ene_tol=1e-8, spin_polarization=:collinear)
 end
diff --git a/test/silicon_redHF.jl b/test/silicon_redHF.jl
index 9b28fb0f1c..841be6143d 100644
--- a/test/silicon_redHF.jl
+++ b/test/silicon_redHF.jl
@@ -54,7 +54,7 @@ end
 @testitem "Silicon without XC (large)" #=
     =#    tags=[:slow, :core] setup=[RunSCF, TestCases, SiliconRedHF] begin
     SiliconRedHF.run_silicon_redHF(Float64; Ecut=25, test_tol=1e-5, n_ignored=2,
-                                   grid_size=35, scf_tol=1e-7, test_etot=false)
+                                   grid_size=35, scf_ene_tol=1e-7, test_etot=false)
 end
 
 # There is a weird race condition with MPI + DoubleFloats. TODO debug
diff --git a/test/silicon_scan.jl b/test/silicon_scan.jl
index dcae137e64..755c0a8d9d 100644
--- a/test/silicon_scan.jl
+++ b/test/silicon_scan.jl
@@ -20,7 +20,8 @@
     Si = ElementPsp(silicon.atnum; psp=load_psp("hgh/pbe/Si-q4"))
     model = model_SCAN(silicon.lattice, [Si, Si], silicon.positions)
     basis = PlaneWaveBasis(Model{T}(model); Ecut=15, fft_size=(27, 27, 27), kgrid=(3, 3, 3))
-    run_scf_and_compare(T, basis, ref_scan, ref_etot; scf_tol=1e-9, test_tol=5e-5, n_ignored=1)
+    run_scf_and_compare(T, basis, ref_scan, ref_etot; scf_ene_tol=1e-9, test_tol=5e-5,
+                        n_ignored=1)
 end
 
 
diff --git a/test/stresses.jl b/test/stresses.jl
index 7ac7678700..11826cd049 100644
--- a/test/stresses.jl
+++ b/test/stresses.jl
@@ -29,11 +29,10 @@
     end
 
     function test_stresses(lattice, element)
-        is_converged = DFTK.ScfConvergenceDensity(1e-11)
-        scfres = self_consistent_field(make_basis(lattice, true, element); is_converged)
-        scfres_nosym = self_consistent_field(make_basis(lattice, false, element); is_converged)
+        scfres = self_consistent_field(make_basis(lattice, true, element); tol=1e-11)
+        scfres_nosym = self_consistent_field(make_basis(lattice, false, element); tol=1e-11)
         stresses = compute_stresses_cart(scfres)
-        @test isapprox(stresses, compute_stresses_cart(scfres_nosym), atol=1e-10)
+        @test isapprox(stresses, compute_stresses_cart(scfres_nosym); atol=1e-10)
 
         dir = MPI.bcast(randn(3, 3), 0, MPI.COMM_WORLD)
 
@@ -43,7 +42,7 @@
         end
         ref_scfres = FiniteDiff.finite_difference_derivative(0.0) do ε
             basis = make_basis(lattice + ε*dir*lattice, false, element)
-            scfres = self_consistent_field(basis; is_converged=DFTK.ScfConvergenceDensity(1e-13))
+            scfres = self_consistent_field(basis; tol=1e-13)
             scfres.energies.total
         end
         ref_HF = FiniteDiff.finite_difference_derivative(0.0) do ε
@@ -53,16 +52,17 @@
             hellmann_feynman_energy(scfres_nosym, lattice+ε*(dir*lattice), false, element)
         end
 
-        @test isapprox(ref_recompute, ref_scfres, atol=1e-8)
-        @test isapprox(ref_HF, ref_recompute, atol=1e-5)
-        @test isapprox(ref_HF, FD_HF, atol=1e-5)
-        @test isapprox(dE_stresses, ref_recompute, atol=1e-5)
+        @test isapprox(ref_recompute, ref_scfres; atol=1e-8)
+        @test isapprox(ref_HF, ref_recompute; atol=1e-5)
+        @test isapprox(ref_HF, FD_HF; atol=1e-5)
+        @test isapprox(dE_stresses, ref_recompute; atol=1e-5)
     end
 
     a = 10.0  # slightly compressed and twisted
     lattice = a / 2 * [[0 1 1.1];
                        [1 0 1.];
                        [1 1 0.]]
-    test_stresses(lattice, ElementPsp(silicon.atnum, :Si, load_psp(silicon.psp_hgh)))
-    test_stresses(lattice, ElementPsp(silicon.atnum, :Si, load_psp(silicon.psp_upf)))
+    element = ElementPsp(silicon.atnum, :Si, silicon.mass, load_psp(silicon.psp_hgh))
+    test_stresses(lattice, element)
+    test_stresses(lattice, element)
 end
diff --git a/test/testcases.jl b/test/testcases.jl
index dbdd95ec20..3c39882d80 100644
--- a/test/testcases.jl
+++ b/test/testcases.jl
@@ -1,16 +1,19 @@
 @testsetup module TestCases
 using DFTK
+using Unitful
+using UnitfulAtomic
 using LinearAlgebra: Diagonal, diagm
 using LazyArtifacts
 
 hgh_lda_family = artifact"hgh_lda_hgh"
 pd_lda_family  = artifact"pd_nc_sr_lda_standard_0.4.1_upf"
 
-silicon = (
+silicon = (;
     lattice = [0.0  5.131570667152971 5.131570667152971;
                5.131570667152971 0.0 5.131570667152971;
                5.131570667152971 5.131570667152971  0.0],
     atnum = 14,
+    mass = 28.085u"u",
     n_electrons = 8,
     temperature = 0.0,
     psp_hgh = joinpath(hgh_lda_family, "si-q4.hgh"),
@@ -25,11 +28,12 @@ silicon = (
 silicon = merge(silicon,
                 (; atoms=fill(ElementPsp(silicon.atnum; psp=load_psp(silicon.psp_hgh)), 2)))
 
-magnesium = (
+magnesium = (;
     lattice = [-3.0179389205999998 -3.0179389205999998 0.0000000000000000;
                -5.2272235447000002 5.2272235447000002 0.0000000000000000;
                0.0000000000000000 0.0000000000000000 -9.7736219469000005],
     atnum = 12,
+    mass = 24.305u"u",
     n_electrons = 4,
     psp_hgh = joinpath(hgh_lda_family, "mg-q2.hgh"),
     psp_upf = joinpath(pd_lda_family, "Mg.upf"),
@@ -47,10 +51,11 @@ magnesium = merge(magnesium,
                   (; atoms=fill(ElementPsp(magnesium.atnum; psp=load_psp(magnesium.psp_hgh)), 2)))
 
 
-aluminium = (
+aluminium = (;
     lattice = Matrix(Diagonal([4 * 7.6324708938577865, 7.6324708938577865,
                                7.6324708938577865])),
     atnum = 13,
+    mass = 39.9481u"u",
     n_electrons = 12,
     psp_hgh = joinpath(hgh_lda_family, "al-q3.hgh"),
     psp_upf = joinpath(pd_lda_family, "Al.upf"),
@@ -61,11 +66,12 @@ aluminium = merge(aluminium,
                   (; atoms=fill(ElementPsp(aluminium.atnum; psp=load_psp(aluminium.psp_hgh)), 4)))
 
 
-aluminium_primitive = (
+aluminium_primitive = (;
     lattice = [5.39697192863632 2.69848596431816 2.69848596431816;
                0.00000000000000 4.67391479368660 1.55797159787754;
                0.00000000000000 0.00000000000000 4.40660912710674],
     atnum = 13,
+    mass = 39.9481u"u",
     n_electrons = 3,
     psp_hgh = joinpath(hgh_lda_family, "al-q3.hgh"),
     psp_upf = joinpath(pd_lda_family, "Al.upf"),
@@ -77,11 +83,12 @@ aluminium_primitive = merge(aluminium_primitive,
                                                      psp=load_psp(aluminium_primitive.psp_hgh)), 1)))
 
 
-platinum_hcp = (
+platinum_hcp = (;
     lattice = [10.00000000000000 0.00000000000000 0.00000000000000;
                5.00000000000000 8.66025403784439 0.00000000000000;
                0.00000000000000 0.00000000000000 16.3300000000000],
     atnum = 78,
+    mass = 195.0849u"u",
     n_electrons = 36,
     psp_hgh = joinpath(hgh_lda_family, "pt-q18.hgh"),
     psp_upf = joinpath(pd_lda_family, "Pt.upf"),
@@ -91,9 +98,10 @@ platinum_hcp = (
 platinum_hcp = merge(platinum_hcp,
                      (; atoms=fill(ElementPsp(platinum_hcp.atnum; psp=load_psp(platinum_hcp.psp_hgh)), 2)))
 
-iron_bcc = (
+iron_bcc = (;
     lattice = 2.71176 .* [[-1 1 1]; [1 -1  1]; [1 1 -1]],
     atnum = 26,
+    mass = 55.8452u"u",
     n_electrons = 8,
     psp_hgh = joinpath(hgh_lda_family, "fe-q8.hgh"),
     psp_upf = joinpath(pd_lda_family, "Fe.upf"),
@@ -102,9 +110,10 @@ iron_bcc = (
 )
 iron_bcc = merge(iron_bcc, (; atoms=[ElementPsp(iron_bcc.atnum; psp=load_psp(iron_bcc.psp_hgh))]))
 
-o2molecule = (
+o2molecule = (;
     lattice = diagm([6.5, 6.5, 9.0]),
     atnum = 8,
+    mass = 15.999u"u",
     n_electrons = 12,
     psp_hgh = joinpath(hgh_lda_family, "O-q6.hgh"),
     psp_upf = joinpath(pd_lda_family, "O.upf"),
diff --git a/test/transfer.jl b/test/transfer.jl
index 182955512b..ff28c93bcb 100644
--- a/test/transfer.jl
+++ b/test/transfer.jl
@@ -63,7 +63,7 @@ end
         # not be an identity. To prevent this we use enforce_real! to explicitly set
         # the non-matched Fourier component to zero.
         ρ = random_density(basis, 1)
-        ρ_fourier_purified = DFTK.enforce_real!(basis, fft(basis, ρ))
+        ρ_fourier_purified = DFTK.enforce_real!(fft(basis, ρ), basis)
         ρ = irfft(basis, ρ_fourier_purified)
 
         ρ_b  = transfer_density(ρ,   basis,     basis_big)