From 008e93f443717521fc20cb61d96c597fc31aa92b Mon Sep 17 00:00:00 2001 From: jerryho Date: Sun, 26 Feb 2023 13:49:09 -0500 Subject: [PATCH 01/10] add bflow-mts.jl and regularization --- src/ACEpsi.jl | 1 + src/bflow-mts.jl | 616 +++++++++++++++++++++++++++++++++++++++++++++++ src/bflow.jl | 17 +- 3 files changed, 630 insertions(+), 4 deletions(-) create mode 100644 src/bflow-mts.jl diff --git a/src/ACEpsi.jl b/src/ACEpsi.jl index 6d3c2e5..2bc3a91 100644 --- a/src/ACEpsi.jl +++ b/src/ACEpsi.jl @@ -2,5 +2,6 @@ module ACEpsi include("bflow.jl") include("envelope.jl") +include("bflow-mts.jl") end diff --git a/src/bflow-mts.jl b/src/bflow-mts.jl new file mode 100644 index 0000000..a1ce8b7 --- /dev/null +++ b/src/bflow-mts.jl @@ -0,0 +1,616 @@ +using ACEcore, Polynomials4ML +using Polynomials4ML: OrthPolyBasis1D3T +using ACEcore: PooledSparseProduct, SparseSymmProdDAG, SparseSymmProd, release! +using ACEcore.Utils: gensparse +using LinearAlgebra: qr, I, logabsdet, pinv, mul!, dot , tr +using ACE +import ForwardDiff +import Base.Experimental: @aliasscope + +mutable struct BFwfs{T, TT, TPOLY, TE} + pos::Vector{T} + tpos::Vector{T} + trans::TT + polys::TPOLY + pooling::PooledSparseProduct{3} + corr::SparseSymmProdDAG{T} + W::Matrix{T} + envelope::TE + spec::Vector{Vector{Int64}} # corr.spec TODO: this needs to be remove + # ---------------- Temporaries + P::Matrix{T} + ∂P::Matrix{T} + dP::Matrix{T} + Φ::Matrix{T} + ∂Φ::Matrix{T} + A::Matrix{T} + ∂A::Matrix{T} + Ai::Vector{T} + ∂Ai::Vector{T} + Si::Matrix{Bool} + ∂AA::Matrix{T} + ∂Si::Matrix{T} + ∇AA::Array{T, 3} + T::Matrix{T} + dT::Matrix{T} + ∂T::Matrix{T} + Tc::Vector{T} + Ta::Matrix{T} + Tb::Matrix{T} +end + +(Φ::BFwf)(args...) = evaluate(Φ, args...) + +const ↑ = '↑' +const ↓ = '↓' +const Ø = 'Ø' + +function BFwf(Nel::Integer, polys, envelope::Function; totdeg = length(polys), + ν = 3, T = Float64, + pos = [0.0],tpos = [0,0], + trans = identity, + sd_admissible = bb -> (true)) + @assert length(trans) == length(tpos) + # 1-particle spec + K = length(polys) + M = length(trans) + spec1p = [ (k, σ, m) for σ in [1, 2, 3] for k in 1:K for m in 1:M] # (1, 2, 3) = (∅, ↑, ↓); + spec1p = sort(spec1p, by = b -> b[1]) # sorting to prevent gensparse being confused + pooling = PooledSparseProduct(spec1p) + # generate the many-particle spec + tup2b = vv -> [ spec1p[v] for v in vv[vv .> 0] ] + default_admissible = bb -> (length(bb) == 0) || (sum(b[1] - 1 for b in bb ) <= totdeg) + + specAA = gensparse(; NU = ν, tup2b = tup2b, admissible = default_admissible, + minvv = fill(0, ν), + maxvv = fill(length(spec1p), ν), + ordered = true) + + + spec = [ vv[vv .> 0] for vv in specAA if !(isempty(vv[vv .> 0]))] + + # further restrict + spec = [t for t in spec if sd_admissible([spec1p[t[j]] for j = 1:length(t)])] + + corr1 = SparseSymmProd(spec; T = Float64) + corr = corr1.dag + + # initial guess for weights + Q, _ = qr(randn(T, length(corr), Nel)) + W = Matrix(Q) + + return BFwfs(pos, tpos, trans, polys, pooling, corr, W, envelope, spec, + zeros(T, Nel, length(polys) * length(trans)), + zeros(T, Nel, length(polys) * length(trans)), + zeros(T, Nel, length(polys) * length(trans)), + zeros(T, Nel, Nel), + zeros(T, Nel, Nel), + zeros(T, Nel, length(pooling)), + zeros(T, Nel, length(pooling)), + zeros(T, length(pooling)), + zeros(T, length(pooling)), + zeros(Bool, Nel, 3), + zeros(T, Nel, length(corr)), + zeros(T, Nel, 3), + zeros(T, Nel, Nel, length(corr)), + ones(T, Nel, length(trans)), + zeros(T, Nel, length(trans)), + zeros(T, Nel, length(trans)), + zeros(T, length(trans)), # Tc: spec1p: exp(-Tc) + ones(T, Nel, length(pos)), # Ta: Φᵢ + ones(T, Nel, length(pos)) # Tb: Φᵢexp(-Tb) + ) +end + + +""" +This function return correct Si for pooling operation. +""" +function onehot!(Si, i, Σ) + Si .= 0 + for k = 1:length(Σ) + Si[k, spin2num(Σ[k])] = 1 + end + # set current electron to ϕ, also remove their contribution in the sum of ↑ or ↓ basis + Si[i, 1] = 1 + Si[i, 2] = 0 + Si[i, 3] = 0 +end + +""" +This function convert spin to corresponding integer value used in spec +""" +function spin2num(σ) + if σ == '↑' + return 2 + elseif σ == '↓' + return 3 + elseif σ == '∅' + return 1 + end + error("illegal spin char for spin2num") +end + +""" +This function convert num to corresponding spin string. +""" +function num2spin(σ) + if σ == 2 + return '↑' + elseif σ == 3 + return '↓' + elseif σ == 1 + return '∅' + end + error("illegal integer value for num2spin") +end + + +""" +This function returns a nice version of spec. +""" +function displayspec(wf::BFwfs) + K = length(wf.polys) + spec1p = [ (k, σ, m) for σ in [1, 2, 3] for k in 1:K for m = 1:length(wf.trans)] # (1, 2, 3) = (∅, ↑, ↓); + spec1p = sort(spec1p, by = b -> b[1]) + _getnicespec = l -> (l[1], num2spin(l[2]),l[3]) + nicespec = [] + for k = 1:length(wf.spec) + push!(nicespec, _getnicespec.([spec1p[wf.spec[k][j]] for j = 1:length(wf.spec[k])])) + end + return nicespec +end + +function assemble_A(wf::BFwfs, X::AbstractVector, Σ, Pnn=nothing) + nX = length(X) + # position embedding + P = wf.P # P = [P(trans[1]); P(trans[2]); ...; P(trans[M])] + T = wf.T + NP = length(wf.polys) + for i = 1:length(wf.trans) + Xt = wf.trans[i].(X) + P[:,(i-1) * NP + 1: i * NP] = evaluate!(P[:,(i-1) * NP + 1: i * NP], wf.polys, Xt) + T[:,i] = exp.(-wf.Tc[i] * sqrt.(Ref(1).+(X .- Ref(wf.tpos[i])).^2)) + end + + A = wf.A + Ai = wf.Ai + Si = wf.Si + + for i = 1:nX + onehot!(Si, i, Σ) + evalpool!(Ai, wf.pooling, (parent(P), Si, T), NP) + A[i, :] .= Ai + end + return A +end + +function evalpool!(A, basis::PooledSparseProduct{3}, BB, NP) + nX = size(BB[1], 1) + @assert all(B->size(B, 1) == nX, BB) + BB = ACEcore.constify(BB) # Assumes that no B aliases A + spec = ACEcore.constify(basis.spec) + + @aliasscope begin # No store to A aliases any read from any B + @inbounds for (iA, ϕ) in enumerate(spec) + ϕ1 = ((ϕ[3]-1) * NP + ϕ[1],ϕ[2],ϕ[3]) + a = zero(eltype(A)) + @simd ivdep for j = 1:nX + a += BB_prod(ϕ1, BB, j) # Tk∘atan_m * decay_m + end + A[iA] = a + end + end + return nothing +end + +@inline function BB_prod(ϕ::NTuple{3}, BB, j) + reduce(Base.FastMath.mul_fast, ntuple(Val(3)) do i + @inline + @inbounds BB[i][j, ϕ[i]] + end) +end + +function evaluate(wf::BFwfs, X::AbstractVector, Σ, Pnn=nothing) + nX = length(X) + A = assemble_A(wf, X, Σ) + AA = ACEcore.evaluate(wf.corr, A) + BB = _BB(wf.pos, wf.Ta, wf.Tb, wf.envelope, X) + Φ = wf.Φ + mul!(Φ, parent(AA), wf.W) # Φ = AA * W, nX x nX + Φ = Φ .* [Σ[i] == Σ[j] for j = 1:nX, i = 1:nX] .* BB # the resulting matrix should contains two block each comes from each spin + release!(AA) + return 2 * logabsdet(Φ)[1] +end + +function envelope(pos, Ta, Tb, f, X::Float64, i::Int) + M = length(pos) + a = zero(eltype(X)) + @simd ivdep for j = 1:M + a += Ta[i,j] * exp(-Tb[i,j] * f(X - pos[j])) + end + return a +end + +function _BB(pos, Ta, Tb, f, X) + nX = length(X) + return [envelope(pos, Ta, Tb, f, X[j], i) for j = 1:nX, i = 1:nX] +end + +function Φ!(_Φ, pos, Ta, Tb, f, X) + Φ = _Φ .* _BB(pos, Ta, Tb, f, X) + return logabsdet(Φ)[1] +end + +function gradp_evaluate(wf::BFwfs, X::AbstractVector, Σ) + # =================== evaluate Φ============================= # + nX = length(X) # 电子数 + BB = _BB(wf.pos, wf.Ta, wf.Tb, wf.envelope, X) + A = assemble_A(wf, X, Σ) + AA = ACEcore.evaluate(wf.corr, A) # nX x length(wf.corr) + Φ = wf.Φ + mul!(Φ, parent(AA), wf.W) + _Φ = Φ .* [Σ[i] == Σ[j] for j = 1:nX, i = 1:nX] + Φ = _Φ .* BB + # ================================================ # + + # ψ = log | det( Φ ) | + # ∂Φ = ∂ψ/∂Φ = Φ⁻ᵀ + ∂Φ = transpose(pinv(Φ)) + + # =============== ∂ψ / ∂W = ∂Φ * ∂Φ / ∂W================================= # + # ∂W = ∂ψ / ∂W = ∂Φ * ∂_W( AA * W ) = ∂Φ * AA + # ∂Wij = ∑_ab ∂Φab * ∂_Wij( ∑_k AA_ak W_kb ) + # = ∑_ab ∂Φab * ∑_k δ_ik δ_bj AA_ak + # = ∑_a ∂Φaj AA_ai = ∂Φaj' * AA_ai + ∇p = transpose(parent(AA)) * (∂Φ .* BB) + + release!(AA) + ∇p = ∇p * 2 + + ∇Ta = ForwardDiff.gradient(Ta -> Φ!(_Φ, wf.pos, Ta, wf.Tb, wf.envelope, X), wf.Ta) + ∇Tb = ForwardDiff.gradient(Tb -> Φ!(_Φ, wf.pos, wf.Ta, Tb, wf.envelope, X), wf.Tb) + ∇Ta = ∇Ta * 2 + ∇Tb = ∇Tb * 2 + return (∇p = ∇p, ∇Ta = ∇Ta, ∇Tb = ∇Tb) +end + + +# ----------------------- gradient + +struct ZeroNoEffect end +Base.size(::ZeroNoEffect, ::Integer) = Inf +Base.setindex!(A::ZeroNoEffect, args...) = nothing +Base.getindex(A::ZeroNoEffect, args...) = Bool(0) + +function gradient(wf::BFwfs, X, Σ) + nX = length(X) + # ------ forward pass ----- + + # position embedding (forward-mode) + # here we evaluate and differentiate at the same time, which is cheap + P = wf.P + dP = wf.dP # ∂P/∂x + T = wf.T + dT = wf.dT # ∂T/∂x + NP = length(wf.polys) + + for i = 1:length(wf.trans) + # =============== P, dP = ∂P/∂trans(x) by Polynomials4ML.evaluate_ed!================================= # + Xt = wf.trans[i].(X) + P[:,(i-1) * NP + 1: i * NP],dP[:,(i-1) * NP + 1: i * NP] = Polynomials4ML.evaluate_ed!(P[:,(i-1) * NP + 1: i * NP], dP[:,(i-1) * NP + 1: i * NP], wf.polys, Xt) + # =============== ∂Xt ================================= # + ∂Xt = ForwardDiff.derivative.(Ref(x -> wf.trans[i](x)), X) + # =============== dP = ∂P/∂trans(x) * ∂trans(x)/∂x ================================= # + @inbounds for k = 1:Int(size(dP, 2)/length(wf.trans)) + @simd ivdep for j = 1:nX + dP[j, (i-1)*Int(size(dP, 2)/length(wf.trans))+k] *= ∂Xt[j] + end + end + # =============== T ================================= # + T[:,i] = exp.(-wf.Tc[i] * sqrt.(Ref(1).+(X .- Ref(wf.tpos[i])).^2)) + # =============== dT = ∂T/∂x ================================= # + dT[:,i] = ForwardDiff.derivative.(Ref(x -> exp(-wf.Tc[i] * sqrt(1+(x - wf.tpos[i])^2))), X) + end + + # no gradients here - need to somehow "constify" this vector + # could use other packages for inspiration ... + + # pooling : need an elegant way to shift this loop into a kernel! + # e.g. by passing output indices to the pooling function. + + A = wf.A # zeros(nX, length(wf.pooling)) + Ai = wf.Ai # zeros(length(wf.pooling)) + Si = wf.Si # zeros(Bool, nX, 3) + + # =============== Ai: ∑ᵢ T_k∘atan_m(x) * decay_m ================================= # + for i = 1:nX + onehot!(Si, i, Σ) + evalpool!(Ai, wf.pooling, (parent(P), Si, T), NP) + A[i, :] .= Ai + end + + # n-correlations + AA = ACEcore.evaluate(wf.corr, A) # nX x length(wf.corr) + # =================== evaluate =============================== # + # generalized orbitals + BB = _BB(wf.pos, wf.Ta, wf.Tb, wf.envelope, X) + Φ = wf.Φ + mul!(Φ, parent(AA), wf.W) + _Φ = Φ .* [Σ[i] == Σ[j] for j = 1:nX, i = 1:nX] + Φ = _Φ .* BB + # ================================================== # + + # and finally the wave function + # ψ = logabsdet(Φ)[1] + + # ------ backward pass ------ + #∂Φ = ∂ψ / ∂Φ = Φ⁻ᵀ + ∂Φ = transpose(pinv(Φ)) + + # ∂AA = ∂ψ/∂AA = ∂ψ/∂Φ * ∂Φ/∂AA = ∂Φ * wf.W' + ∂AA = wf.∂AA + mul!(∂AA, ∂Φ .* BB, transpose(wf.W)) + + # ∂BB = ∂ψ/∂BB = ∂ψ/∂Φ * ∂Φ/∂BB = ∂Φ .* _Φ + ∂BB = ∂Φ .* _Φ + # dBBx = ∂BB/∂x + dBB = [∇env(wf.pos, wf.Ta, wf.Tb, wf.envelope, X[j], i) for j = 1:nX, i = 1:nX] + + # ∂A = ∂ψ/∂A = ∂ψ/∂AA * ∂AA/∂A -> use custom pullback + ∂A = wf.∂A # zeros(size(A)) + ACEcore.pullback_arg!(∂A, ∂AA, wf.corr, parent(AA)) + release!(AA) + + # ∂P = ∂ψ/∂P = ∂ψ/∂A * ∂A/∂P -> use custom pullback + # but need to do some work here since multiple + # pullbacks can be combined here into a single one maybe? + ∂P = wf.∂P # zeros(size(P)) + fill!(∂P, 0) + ∂Si = wf.∂Si # zeros(size(Si)) # should use ZeroNoEffect here ?!??! + ∂T = wf.∂T # zeros(size(T)) + fill!(∂T, 0) + Si_ = zeros(nX, 3) + for i = 1:nX + onehot!(Si_, i, Σ) + # note this line ADDS the pullback into ∂P, not overwrite the content!! + ∂Ai = @view ∂A[i, :] + _pullback_evalpool!((∂P, ∂Si, ∂T), ∂Ai, wf.pooling, (P, Si_, T), NP) + end + + # ∂X = ∂ψ/∂X = ∂ψ/∂P * ∂P/∂X + ∂ψ/∂T * ∂T/∂X + # here we can now finally employ the dP=∂P/∂X that we already know. + # ∂ψ/∂Xi = ∑_k ∂ψ/∂Pik * ∂Pik/∂Xi + # = ∑_k ∂P[i, k] * dP[i, k] + g = zeros(nX) + @inbounds for k = 1:size(dP,2) + @simd ivdep for i = 1:nX + g[i] += ∂P[i, k] * dP[i, k] + end + end + @inbounds for k = 1:size(dT,2) + @simd ivdep for i = 1:nX + g[i] += ∂T[i, k] * dT[i, k] + end + end + @inbounds for k = 1:size(dBB,2) + @simd ivdep for i = 1:nX + g[i] += ∂BB[i, k] * dBB[i, k] + end + end + # g = sum(∂P .* dP, dims = 2)[:] + g = g * 2 + return g +end + +function ∇env(pos, Ta, Tb, f, X::Float64, i::Int) + df = X -> ForwardDiff.derivative(x -> f(x), X) + M = length(pos) + a = zero(eltype(X)) + @simd ivdep for j = 1:M + dx = df(X - pos[j]) + a += Ta[i,j] * exp(-Tb[i,j] * f(X - pos[j])) * -Tb[i,j] * dx + end + return a +end + +function _pullback_evalpool!(∂BB, ∂A, basis::PooledSparseProduct{3}, BB::Tuple, NP) + nX = size(BB[1], 1) + NB = 3 + @assert all(nX <= size(BB[i], 1) for i = 1:NB) + @assert all(nX <= size(∂BB[i], 1) for i = 1:NB) + @assert all(size(∂BB[i], 2) >= size(BB[i], 2) for i = 1:NB) + @assert length(∂A) == length(basis) + @assert length(BB) == NB + @assert length(∂BB) == NB + + @inbounds for (iA, ϕ) in enumerate(basis.spec) + ϕ1 = ((ϕ[3]-1) * NP + ϕ[1],ϕ[2],ϕ[3]) + ∂A_iA = ∂A[iA] + @simd ivdep for j = 1:nX + b = ntuple(Val(NB)) do i + @inbounds BB[i][j, ϕ1[i]] + end + g = ACEcore._prod_grad(b, Val(NB)) + for i = 1:NB + ∂BB[i][j, ϕ1[i]] = muladd(∂A_iA, g[i], ∂BB[i][j, ϕ1[i]]) + end + end + end + return nothing +end + +# ------------------ Laplacian implementation + +function laplacian(wf::BFwfs, X, Σ) + A, ∇A, ΔA = _assemble_A_∇A_ΔA(wf, X, Σ) + AA, ∇AA, ΔAA = _assemble_AA_∇AA_ΔAA(A, ∇A, ΔA, wf) + + Δψ = _laplacian_inner(AA, ∇AA, ΔAA, wf, X, Σ) + Δψ = Δψ * 2 + return Δψ +end + +function _assemble_A_∇A_ΔA(wf::BFwfs, X, Σ) + TX = eltype(X) + lenA = length(wf.pooling) + nX = length(X) + A = zeros(TX, nX, lenA) + ∇A = zeros(TX, nX, nX, lenA) + ΔA = zeros(TX, nX, lenA) + spec_A = wf.pooling.spec + NP = length(wf.polys) + + P = zero(wf.P) + dP = zero(wf.dP) + ddP = zero(wf.P) + T = zero(wf.T) + dT = zero(wf.dT) + ddT = zero(wf.T) + for i = 1:length(wf.trans) + # =============== P, dP================================= # + Xt = wf.trans[i].(X) + P[:,(i-1) * NP + 1: i * NP], dP[:,(i-1) * NP + 1: i * NP], ddP[:,(i-1) * NP + 1: i * NP] = Polynomials4ML.evaluate_ed2(wf.polys, Xt) + # =============== ∂Xt/∂X, ∂Xt^2/∂^2XX================================= # + dtrans = x -> ForwardDiff.derivative(wf.trans[i], x) + ddtrans = x -> ForwardDiff.derivative(dtrans, x) + ∂Xt = dtrans.(X) + ∂∂Xt = ddtrans.(X) + # =============== dP = ∂P/∂trans(x) * ∂trans(x)/∂x, ddP = ∂^P/∂trans(x) * (∂trans(x)/∂x)^2================================= # + @inbounds for k = 1:Int(size(dP, 2)/length(wf.trans)) + @simd ivdep for j = 1:nX + dP[j, (i-1) * NP + k], ddP[j, (i-1) * NP + k] = ∂Xt[j] * dP[j, (i-1) * NP + k], ∂∂Xt[j] * dP[j, (i-1) * NP + k] + ∂Xt[j]^2 * ddP[j, (i-1) * NP + k] + end + end + # =============== T ================================= # + T[:,i] = exp.(-wf.Tc[i] * sqrt.(Ref(1).+(X .- Ref(wf.tpos[i])).^2)) + # =============== dT = ∂T/∂x, ddT ================================= # + dtrans = X -> ForwardDiff.derivative.(x -> exp(-wf.Tc[i] * sqrt(1+(x - wf.tpos[i])^2)), X) + ddtrans = x -> ForwardDiff.derivative(dtrans, x) + dT[:,i] = dtrans.(X) + ddT[:,i] = ddtrans.(X) + end + + Si_ = zeros(nX, 3) + Ai = zeros(length(wf.pooling)) + + @inbounds for i = 1:nX # loop over orbital bases (which i becomes ∅) + fill!(Si_, 0) + onehot!(Si_, i, Σ) + evalpool!(Ai, wf.pooling, (parent(P), Si_, T), NP) + @. A[i, :] .= Ai + for (iA, (k, σ, m)) in enumerate(spec_A) + for a = 1:nX + ∇A[a, i, iA] = Si_[a, σ] * (dP[a, (m-1) * NP + k] * T[a,m] + P[a, (m-1) * NP + k] * dT[a, m]) + ΔA[i, iA] += Si_[a, σ] * (ddP[a, (m-1) * NP + k] * T[a,m] + ddT[a, m] * P[a, (m-1) * NP + k] + 2 * dP[a, (m-1) * NP + k] * dT[a, m]) + end + end + end + return A, ∇A, ΔA +end + +function _assemble_AA_∇AA_ΔAA(A, ∇A, ΔA, wf::BFwfs) + nX = size(A, 1) + AA = zeros(nX, length(wf.corr)) + ∇AA = wf.∇AA + ΔAA = zeros(nX, length(wf.corr)) + + @inbounds for iAA = 1:wf.corr.num1 + @. AA[:, iAA] .= A[:, iAA] + @. ∇AA[:, :, iAA] .= ∇A[:, :, iAA] + @. ΔAA[:, iAA] .= ΔA[:, iAA] + end + + lenAA = length(wf.corr) + @inbounds for iAA = wf.corr.num1+1:lenAA + k1, k2 = wf.corr.nodes[iAA] + for i = 1:nX + AA_k1 = AA[i, k1]; AA_k2 = AA[i, k2] + AA[i, iAA] = AA_k1 * AA_k2 + L = ΔAA[i, k1] * AA_k2 + L = muladd(ΔAA[i, k2], AA_k1, L) + @simd ivdep for a = 1:nX + ∇AA_k1 = ∇AA[a, i, k1]; ∇AA_k2 = ∇AA[a, i, k2] + L = muladd(2 * ∇AA_k1, ∇AA_k2, L) + g = ∇AA_k1 * AA_k2 + ∇AA[a, i, iAA] = muladd(∇AA_k2, AA_k1, g) + end + ΔAA[i, iAA] = L + end + end + return AA, ∇AA, ΔAA +end + +function Δenv(pos, Ta, Tb, f, X::Float64, i::Int) + df = X -> ForwardDiff.derivative(x -> f(x), X) + ddf = X -> ForwardDiff.derivative(df, X) + M = length(pos) + a = zero(eltype(X)) + @simd ivdep for j = 1:M + dx = df(X - pos[j]) + ddx = ddf(X-pos[j]) + a += Ta[i,j] * exp(-Tb[i,j] * f(X - pos[j])) * -Tb[i,j] * dx * -Tb[i,j] * dx + a += Ta[i,j] * exp(-Tb[i,j] * f(X - pos[j])) * -Tb[i,j] * ddx + end + return a +end + +function _laplacian_inner(AA, ∇AA, ΔAA, wf::BFwfs, X, Σ) + + # Δψ = Φ⁻ᵀ : ΔΦ - ∑ᵢ (Φ⁻ᵀ * Φᵢ)ᵀ : (Φ⁻ᵀ * Φᵢ) + # where Φᵢ = ∂_{xi} Φ + + nX = size(AA, 1) + + # the wf, and the first layer of derivatives + BB = _BB(wf.pos, wf.Ta, wf.Tb, wf.envelope, X) + Φ = wf.Φ + mul!(Φ, parent(AA), wf.W) + _Φ = Φ .* [Σ[i] == Σ[j] for j = 1:nX, i = 1:nX] + Φ = _Φ .* BB + + Φ⁻ᵀ = transpose(pinv(Φ)) + + # first contribution to the laplacian + ∇Φ_all = reshape(reshape(∇AA, nX*nX, :) * wf.W, nX, nX, nX) + + ΔBB = [Δenv(wf.pos, wf.Ta, wf.Tb, wf.envelope, X[j], i) for j = 1:nX, i = 1:nX] + ∇BB = zeros(nX,nX,nX) + for j = 1:nX + ∇BB[j,j,:] = [∇env(wf.pos, wf.Ta, wf.Tb, wf.envelope, X[j], i) for i = 1:nX] + end + + + ΔΦ = ΔAA * wf.W .* [Σ[i] == Σ[j] for j = 1:nX, i = 1:nX] .* BB + ΔΦ += _Φ .* ΔBB + ΔΦ += 2 * sum(∇Φ_all[i,:,:] .* [Σ[i] == Σ[j] for j = 1:nX, i = 1:nX] .* ∇BB[i,:,:] for i = 1:nX) + + Δψ = dot(Φ⁻ᵀ, ΔΦ) + + # the gradient contribution + # TODO: we can rework this into a single BLAS3 call + # which will also give us a single back-propagation + # ∇Φi = zeros(nX, nX) + Φ⁻¹∇Φi = zeros(nX, nX) + for i = 1:nX + ∇Φi = ∇Φ_all[i, :, :] .* [Σ[i] == Σ[j] for j = 1:nX, i = 1:nX] .* BB + ∇Φi += _Φ .* ∇BB[i, :, :] + mul!(Φ⁻¹∇Φi, transpose(Φ⁻ᵀ), ∇Φi) + Δψ -= dot(transpose(Φ⁻¹∇Φi), Φ⁻¹∇Φi) + end + + return Δψ +end + +# ----------------- BFwf parameter wraging + +function get_params(U::BFwfs) + return (U.W, U.Ta, U.Tb) +end + +function set_params!(U::BFwfs, para::Tuple{Matrix{Float64}, Matrix{Float64}, Matrix{Float64}}) + U.W = para[1] + U.Ta = para[2] + U.Tb = para[3] + return U +end diff --git a/src/bflow.jl b/src/bflow.jl index 2b9286a..14fd362 100644 --- a/src/bflow.jl +++ b/src/bflow.jl @@ -1,4 +1,3 @@ - using ACEcore, Polynomials4ML using Polynomials4ML: OrthPolyBasis1D3T using ACEcore: PooledSparseProduct, SparseSymmProdDAG, SparseSymmProd, release! @@ -344,7 +343,7 @@ function laplacian(wf::BFwf, X, Σ) return Δψ end -function _assemble_A_∇A_ΔA(wf, X, Σ) +function _assemble_A_∇A_ΔA(wf::BFwf, X, Σ) TX = eltype(X) lenA = length(wf.pooling) nX = length(X) @@ -382,7 +381,7 @@ function _assemble_A_∇A_ΔA(wf, X, Σ) return A, ∇A, ΔA end -function _assemble_AA_∇AA_ΔAA(A, ∇A, ΔA, wf) +function _assemble_AA_∇AA_ΔAA(A, ∇A, ΔA, wf::BFwf) nX = size(A, 1) AA = zeros(nX, length(wf.corr)) ∇AA = wf.∇AA @@ -415,7 +414,7 @@ function _assemble_AA_∇AA_ΔAA(A, ∇A, ΔA, wf) end -function _laplacian_inner(AA, ∇AA, ΔAA, wf, Σ) +function _laplacian_inner(AA, ∇AA, ΔAA, wf::BFwf, Σ) # Δψ = Φ⁻ᵀ : ΔΦ - ∑ᵢ (Φ⁻ᵀ * Φᵢ)ᵀ : (Φ⁻ᵀ * Φᵢ) # where Φᵢ = ∂_{xi} Φ @@ -538,3 +537,13 @@ function set_params!(U::BFwf, para) set_params!(U.envelope, para[2]) return U end + +function Scaling(U::BFwf, γ::Float64) + c = get_params(U) + uu = [] + _spec = U.spec + for i = 1:length(_spec) + push!(u, sum(_spec[i] .^ 2)) + end + return (uu = γ * uu .* c[1], d = zeros(length(c[2]))) +end \ No newline at end of file From f72fa18a34f446191d9d6a9ad4845efab4faf497 Mon Sep 17 00:00:00 2001 From: jerryho Date: Sun, 26 Feb 2023 14:51:54 -0500 Subject: [PATCH 02/10] removing ACE from dependency --- src/bflow-mts.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/bflow-mts.jl b/src/bflow-mts.jl index a1ce8b7..cf05828 100644 --- a/src/bflow-mts.jl +++ b/src/bflow-mts.jl @@ -3,7 +3,6 @@ using Polynomials4ML: OrthPolyBasis1D3T using ACEcore: PooledSparseProduct, SparseSymmProdDAG, SparseSymmProd, release! using ACEcore.Utils: gensparse using LinearAlgebra: qr, I, logabsdet, pinv, mul!, dot , tr -using ACE import ForwardDiff import Base.Experimental: @aliasscope From 8b1019da1dec13cb9e8bdfe8000dc51d7960d3c3 Mon Sep 17 00:00:00 2001 From: jerryho Date: Wed, 1 Mar 2023 16:29:37 -0500 Subject: [PATCH 03/10] small clean up/bug fix and add type constrain --- src/bflow-mts.jl | 99 +++++++++++++----------------------------------- src/bflow.jl | 33 ++++++---------- 2 files changed, 39 insertions(+), 93 deletions(-) diff --git a/src/bflow-mts.jl b/src/bflow-mts.jl index cf05828..75aa048 100644 --- a/src/bflow-mts.jl +++ b/src/bflow-mts.jl @@ -38,17 +38,18 @@ mutable struct BFwfs{T, TT, TPOLY, TE} Tb::Matrix{T} end -(Φ::BFwf)(args...) = evaluate(Φ, args...) +(Φ::BFwfs)(args...) = evaluate(Φ, args...) const ↑ = '↑' const ↓ = '↓' const Ø = 'Ø' -function BFwf(Nel::Integer, polys, envelope::Function; totdeg = length(polys), +function BFwf(Nel::Integer, polys::OrthPolyBasis1D3T, envelope::Function; totdeg = length(polys), ν = 3, T = Float64, pos = [0.0],tpos = [0,0], trans = identity, sd_admissible = bb -> (true)) + @assert length(trans) == length(tpos) # 1-particle spec K = length(polys) @@ -102,65 +103,7 @@ function BFwf(Nel::Integer, polys, envelope::Function; totdeg = length(polys), end -""" -This function return correct Si for pooling operation. -""" -function onehot!(Si, i, Σ) - Si .= 0 - for k = 1:length(Σ) - Si[k, spin2num(Σ[k])] = 1 - end - # set current electron to ϕ, also remove their contribution in the sum of ↑ or ↓ basis - Si[i, 1] = 1 - Si[i, 2] = 0 - Si[i, 3] = 0 -end - -""" -This function convert spin to corresponding integer value used in spec -""" -function spin2num(σ) - if σ == '↑' - return 2 - elseif σ == '↓' - return 3 - elseif σ == '∅' - return 1 - end - error("illegal spin char for spin2num") -end - -""" -This function convert num to corresponding spin string. -""" -function num2spin(σ) - if σ == 2 - return '↑' - elseif σ == 3 - return '↓' - elseif σ == 1 - return '∅' - end - error("illegal integer value for num2spin") -end - - -""" -This function returns a nice version of spec. -""" -function displayspec(wf::BFwfs) - K = length(wf.polys) - spec1p = [ (k, σ, m) for σ in [1, 2, 3] for k in 1:K for m = 1:length(wf.trans)] # (1, 2, 3) = (∅, ↑, ↓); - spec1p = sort(spec1p, by = b -> b[1]) - _getnicespec = l -> (l[1], num2spin(l[2]),l[3]) - nicespec = [] - for k = 1:length(wf.spec) - push!(nicespec, _getnicespec.([spec1p[wf.spec[k][j]] for j = 1:length(wf.spec[k])])) - end - return nicespec -end - -function assemble_A(wf::BFwfs, X::AbstractVector, Σ, Pnn=nothing) +function assemble_A(wf::BFwfs, X::AbstractVector, Σ::Vector{Char}) nX = length(X) # position embedding P = wf.P # P = [P(trans[1]); P(trans[2]); ...; P(trans[M])] @@ -184,6 +127,7 @@ function assemble_A(wf::BFwfs, X::AbstractVector, Σ, Pnn=nothing) return A end + function evalpool!(A, basis::PooledSparseProduct{3}, BB, NP) nX = size(BB[1], 1) @assert all(B->size(B, 1) == nX, BB) @@ -203,6 +147,7 @@ function evalpool!(A, basis::PooledSparseProduct{3}, BB, NP) return nothing end + @inline function BB_prod(ϕ::NTuple{3}, BB, j) reduce(Base.FastMath.mul_fast, ntuple(Val(3)) do i @inline @@ -210,7 +155,8 @@ end end) end -function evaluate(wf::BFwfs, X::AbstractVector, Σ, Pnn=nothing) + +function evaluate(wf::BFwfs, X::AbstractVector, Σ::Vector{Char}) nX = length(X) A = assemble_A(wf, X, Σ) AA = ACEcore.evaluate(wf.corr, A) @@ -222,6 +168,7 @@ function evaluate(wf::BFwfs, X::AbstractVector, Σ, Pnn=nothing) return 2 * logabsdet(Φ)[1] end + function envelope(pos, Ta, Tb, f, X::Float64, i::Int) M = length(pos) a = zero(eltype(X)) @@ -231,17 +178,20 @@ function envelope(pos, Ta, Tb, f, X::Float64, i::Int) return a end + function _BB(pos, Ta, Tb, f, X) nX = length(X) return [envelope(pos, Ta, Tb, f, X[j], i) for j = 1:nX, i = 1:nX] end + function Φ!(_Φ, pos, Ta, Tb, f, X) Φ = _Φ .* _BB(pos, Ta, Tb, f, X) return logabsdet(Φ)[1] end -function gradp_evaluate(wf::BFwfs, X::AbstractVector, Σ) + +function gradp_evaluate(wf::BFwfs, X::AbstractVector, Σ::Vector{Char}) # =================== evaluate Φ============================= # nX = length(X) # 电子数 BB = _BB(wf.pos, wf.Ta, wf.Tb, wf.envelope, X) @@ -277,12 +227,7 @@ end # ----------------------- gradient -struct ZeroNoEffect end -Base.size(::ZeroNoEffect, ::Integer) = Inf -Base.setindex!(A::ZeroNoEffect, args...) = nothing -Base.getindex(A::ZeroNoEffect, args...) = Bool(0) - -function gradient(wf::BFwfs, X, Σ) +function gradient(wf::BFwfs, X::AbstractVector, Σ::Vector{Char}) nX = length(X) # ------ forward pass ----- @@ -441,7 +386,7 @@ end # ------------------ Laplacian implementation -function laplacian(wf::BFwfs, X, Σ) +function laplacian(wf::BFwfs, X::AbstractVector, Σ::Vector{Char}) A, ∇A, ΔA = _assemble_A_∇A_ΔA(wf, X, Σ) AA, ∇AA, ΔAA = _assemble_AA_∇AA_ΔAA(A, ∇A, ΔA, wf) @@ -450,7 +395,7 @@ function laplacian(wf::BFwfs, X, Σ) return Δψ end -function _assemble_A_∇A_ΔA(wf::BFwfs, X, Σ) +function _assemble_A_∇A_ΔA(wf::BFwfs, X::AbstractVector, Σ::Vector{Char}) TX = eltype(X) lenA = length(wf.pooling) nX = length(X) @@ -554,7 +499,7 @@ function Δenv(pos, Ta, Tb, f, X::Float64, i::Int) return a end -function _laplacian_inner(AA, ∇AA, ΔAA, wf::BFwfs, X, Σ) +function _laplacian_inner(AA, ∇AA, ΔAA, wf::BFwfs, X::AbstractVector, Σ::Vector{Char}) # Δψ = Φ⁻ᵀ : ΔΦ - ∑ᵢ (Φ⁻ᵀ * Φᵢ)ᵀ : (Φ⁻ᵀ * Φᵢ) # where Φᵢ = ∂_{xi} Φ @@ -613,3 +558,13 @@ function set_params!(U::BFwfs, para::Tuple{Matrix{Float64}, Matrix{Float64}, Mat U.Tb = para[3] return U end + +function Scaling(U::BFwfs, γ::Float64) + c = get_params(U) + uu = [] + _spec = U.spec + for i = 1:length(_spec) + push!(u, sum(_spec[i] .^ 2)) + end + return (uu = γ * uu .* c[1], d = zeros(length(c[2]))) +end diff --git a/src/bflow.jl b/src/bflow.jl index 14fd362..26b8718 100644 --- a/src/bflow.jl +++ b/src/bflow.jl @@ -31,7 +31,7 @@ end (Φ::BFwf)(args...) = evaluate(Φ, args...) -function BFwf(Nel::Integer, polys; totdeg = length(polys), +function BFwf(Nel::Integer, polys::OrthPolyBasis1D3T; totdeg = length(polys), ν = 3, T = Float64, trans = identity, sd_admissible = bb -> (true), @@ -84,7 +84,7 @@ end """ This function return correct Si for pooling operation. """ -function onehot!(Si, i, Σ) +function onehot!(Si::Matrix, i::Integer, Σ::Vector{Char}) Si .= 0 for k = 1:length(Σ) Si[k, spin2num(Σ[k])] = 1 @@ -98,7 +98,7 @@ end """ This function convert spin to corresponding integer value used in spec """ -function spin2num(σ) +function spin2num(σ::Char) if σ == '↑' return 2 elseif σ == '↓' @@ -112,7 +112,7 @@ end """ This function convert num to corresponding spin string. """ -function num2spin(σ) +function num2spin(σ::Integer) if σ == 2 return '↑' elseif σ == 3 @@ -140,7 +140,7 @@ function displayspec(wf::BFwf) end -function assemble_A(wf::BFwf, X::AbstractVector, Σ, Pnn=nothing) +function assemble_A(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) nX = length(X) # position embedding @@ -160,19 +160,10 @@ function assemble_A(wf::BFwf, X::AbstractVector, Σ, Pnn=nothing) return A end -function evaluate(wf::BFwf, X::AbstractVector, Σ, Pnn=nothing) +function evaluate(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) nX = length(X) A = assemble_A(wf, X, Σ) AA = ACEcore.evaluate(wf.corr, A) # nX x length(wf.corr) - - # the only basis to be purified are those with same spin - # scan through all corr basis, if they comes from same spin, remove self interation by using basis - # from same spin - # first we have to construct coefficent for basis coming from same spin, that is in other words the coefficent - # matrix of the original polynomial basis, this will be pass from the argument Pnn - # === purification goes here === # - - # === # Φ = wf.Φ mul!(Φ, parent(AA), wf.W) # nX x nX Φ = Φ .* [Σ[i] == Σ[j] for j = 1:nX, i = 1:nX] # the resulting matrix should contains two block each comes from each spin @@ -183,7 +174,7 @@ function evaluate(wf::BFwf, X::AbstractVector, Σ, Pnn=nothing) end -function gradp_evaluate(wf::BFwf, X::AbstractVector, Σ) +function gradp_evaluate(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) nX = length(X) A = assemble_A(wf, X, Σ) @@ -226,7 +217,7 @@ Base.setindex!(A::ZeroNoEffect, args...) = nothing Base.getindex(A::ZeroNoEffect, args...) = Bool(0) -function gradient(wf::BFwf, X, Σ) +function gradient(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) nX = length(X) # ------ forward pass ----- @@ -325,7 +316,7 @@ end # ------------------ Laplacian implementation -function laplacian(wf::BFwf, X, Σ) +function laplacian(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) A, ∇A, ΔA = _assemble_A_∇A_ΔA(wf, X, Σ) AA, ∇AA, ΔAA = _assemble_AA_∇AA_ΔAA(A, ∇A, ΔA, wf) @@ -343,7 +334,7 @@ function laplacian(wf::BFwf, X, Σ) return Δψ end -function _assemble_A_∇A_ΔA(wf::BFwf, X, Σ) +function _assemble_A_∇A_ΔA(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) TX = eltype(X) lenA = length(wf.pooling) nX = length(X) @@ -452,7 +443,7 @@ end # ------------------ gradp of Laplacian -function gradp_laplacian(wf::BFwf, X, Σ) +function gradp_laplacian(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) # ---- gradp of Laplacian of Ψ ---- @@ -532,7 +523,7 @@ function get_params(U::BFwf) return (U.W, U.envelope.ξ) end -function set_params!(U::BFwf, para) +function set_params!(U::BFwf, para::Tuple{Matrix{Float64}, Any}) U.W = para[1] set_params!(U.envelope, para[2]) return U From 43c3ec454734b0d16aa1e676899a47b8d6c5db44 Mon Sep 17 00:00:00 2001 From: jerryho Date: Sun, 5 Mar 2023 14:29:54 -0500 Subject: [PATCH 04/10] clean up, add BFwfs test and bug fix in testing files --- examples/{ => backup}/ace1.jl | 0 examples/backup/bflow-v-1trans.jl | 20 ++ examples/backup/bflow-v-mtrans.jl | 22 ++ profile/profile_bflow.jl | 47 ++++ .../{profile_bflow1.jl => profile_bflows.jl} | 21 +- src/bflow-mts.jl | 4 +- src/bflow.jl | 14 +- test/runtests.jl | 4 +- test/test_bflow.jl | 2 +- test/test_bflows.jl | 228 ++++++++++++++++++ 10 files changed, 344 insertions(+), 18 deletions(-) rename examples/{ => backup}/ace1.jl (100%) create mode 100644 examples/backup/bflow-v-1trans.jl create mode 100644 examples/backup/bflow-v-mtrans.jl create mode 100644 profile/profile_bflow.jl rename profile/{profile_bflow1.jl => profile_bflows.jl} (61%) create mode 100644 test/test_bflows.jl diff --git a/examples/ace1.jl b/examples/backup/ace1.jl similarity index 100% rename from examples/ace1.jl rename to examples/backup/ace1.jl diff --git a/examples/backup/bflow-v-1trans.jl b/examples/backup/bflow-v-1trans.jl new file mode 100644 index 0000000..ae396e0 --- /dev/null +++ b/examples/backup/bflow-v-1trans.jl @@ -0,0 +1,20 @@ +using ACE +using Polynomials4ML +using ACEpsi + +const ↑, ↓, ∅ = '↑','↓','∅' + +Nel = 8 +Σ = vcat(rand([↑],Int(ceil(Nel/2))),rand([↓],Nel - Int(ceil(Nel/2)))) +trans = λ("r -> 2/pi * atan(r)") +MaxDeg = [4, 4] +polys = Polynomials4ML.legendre_basis(maximum(MaxDeg)) +U = ACEpsi.BFwf(Nel, polys; ν=length(MaxDeg), totdeg = maximum(MaxDeg), trans = trans,sd_admissible = bb -> (length(bb) == 0 || all([bb[i][1] <= MaxDeg[length(bb)] for i = 1:length(bb)]))) + +X = rand(Nel) +ACEpsi.evaluate(U,X,Σ) +ACEpsi.gradient(U,X,Σ) + + + + diff --git a/examples/backup/bflow-v-mtrans.jl b/examples/backup/bflow-v-mtrans.jl new file mode 100644 index 0000000..85a5c4c --- /dev/null +++ b/examples/backup/bflow-v-mtrans.jl @@ -0,0 +1,22 @@ +using ACE +using Polynomials4ML +using ACEpsi + +const ↑, ↓, ∅ = '↑','↓','∅' + +Nel = 8 +Σ = vcat(rand([↑],Int(ceil(Nel/2))),rand([↓],Nel - Int(ceil(Nel/2)))) + +pos = [-70.,-50.,-30.,-10.,10.,30.,50.,70.] +trans = [λ("r -> atan(r+70.0)"),λ("r -> atan(r+50.0)"),λ("r -> atan(r+30.0)"),λ("r -> atan(r+10.0)"),λ("r -> atan(r-10.0)"),λ("r -> atan(r-30.0)"),λ("r -> atan(r-50.0)"),λ("r -> atan(r-70.0)")] +tpos = reduce(vcat,pos) +pos = reduce(vcat,pos) +M = length(pos) +MaxDeg = [6, 6, 6] + +polys = Polynomials4ML.legendre_basis(maximum(MaxDeg)) +wf = ACEpsi.BFwf(Nel, polys, x -> sqrt(1+x^2); pos = pos, tpos = tpos, ν=length(MaxDeg[1]), totdeg = maximum(MaxDeg), trans = trans,sd_admissible = bb -> (length(bb) == 0 || all([bb[i][1] <= MaxDeg[length(bb)] for i = 1:length(bb)]))) + +X = rand(Nel) +ACEpsi.evaluate(U,X,Σ) +ACEpsi.gradient(U,X,Σ) \ No newline at end of file diff --git a/profile/profile_bflow.jl b/profile/profile_bflow.jl new file mode 100644 index 0000000..494d9ce --- /dev/null +++ b/profile/profile_bflow.jl @@ -0,0 +1,47 @@ +using Polynomials4ML, ACEcore, ACEpsi, ACEbase, Printf +using ACEpsi: BFwf, gradient, evaluate, laplacian +using LinearAlgebra +using BenchmarkTools + +## + +N = 8 +Σ = vcat(rand([↑],Int(ceil(N/2))),rand([↓],N - Int(ceil(N/2)))) + +pos = [-70.,-50.,-30.,-10.,10.,30.,50.,70.] +trans = [λ("r -> atan(r+70.0)"),λ("r -> atan(r+50.0)"),λ("r -> atan(r+30.0)"),λ("r -> atan(r+10.0)"),λ("r -> atan(r-10.0)"),λ("r -> atan(r-30.0)"),λ("r -> atan(r-50.0)"),λ("r -> atan(r-70.0)")] +tpos = reduce(vcat,pos) +pos = reduce(vcat,pos) +M = length(pos) +MaxDeg = [6, 6, 6] + +polys = Polynomials4ML.legendre_basis(maximum(MaxDeg)) +wf = BFwf(N, polys, x -> sqrt(1+x^2); pos = pos, tpos = tpos, ν=length(MaxDeg[1]), totdeg = maximum(MaxDeg), trans = trans,sd_admissible = bb -> (length(bb) == 0 || all([bb[i][1] <= MaxDeg[length(bb)] for i = 1:length(bb)]))) + +## + +X = 2 * rand(Nel) .- 1 +Σ = rand([↑, ↓], Nel) +wf(X, Σ) +gradient(wf, X, Σ) +laplacian(wf, X, Σ) + +## + +@info("evaluate") +@btime wf($X, Σ) + +@info("gradient") +@btime gradient($wf, $X, $Σ) + +@info("laplacian") +@btime laplacian($wf, $X, $Σ) + + +## + +# @profview let wf=wf, X=X +# for nrun = 1:50_000 +# laplacian(wf, X) +# end +# end diff --git a/profile/profile_bflow1.jl b/profile/profile_bflows.jl similarity index 61% rename from profile/profile_bflow1.jl rename to profile/profile_bflows.jl index f3c3f42..b04030f 100644 --- a/profile/profile_bflow1.jl +++ b/profile/profile_bflows.jl @@ -1,5 +1,3 @@ - - using Polynomials4ML, ACEcore, ACEpsi, ACEbase, Printf using ACEpsi: BFwf, gradient, evaluate, laplacian using LinearAlgebra @@ -7,28 +5,31 @@ using BenchmarkTools ## -Nel = 10 -polys = legendre_basis(15) +const ↑, ↓, ∅ = '↑','↓','∅' +Nel = 8 +polys = legendre_basis(6) wf = BFwf(Nel, polys; ν=3) ## X = 2 * rand(Nel) .- 1 -wf(X) -gradient(wf, X) -laplacian(wf, X) +Σ = rand([↑, ↓], Nel) +wf(X, Σ) +gradient(wf, X, Σ) +laplacian(wf, X, Σ) ## @info("evaluate") -@btime $wf($X) +@btime wf($X, Σ) @info("gradient") -@btime gradient($wf, $X) +@btime gradient($wf, $X, $Σ) @info("laplacian") -@btime laplacian($wf, $X) +@btime laplacian($wf, $X, $Σ) + ## diff --git a/src/bflow-mts.jl b/src/bflow-mts.jl index 75aa048..f8a4374 100644 --- a/src/bflow-mts.jl +++ b/src/bflow-mts.jl @@ -163,7 +163,7 @@ function evaluate(wf::BFwfs, X::AbstractVector, Σ::Vector{Char}) BB = _BB(wf.pos, wf.Ta, wf.Tb, wf.envelope, X) Φ = wf.Φ mul!(Φ, parent(AA), wf.W) # Φ = AA * W, nX x nX - Φ = Φ .* [Σ[i] == Σ[j] for j = 1:nX, i = 1:nX] .* BB # the resulting matrix should contains two block each comes from each spin + Φ = Φ .* [Σ[i] == Σ[j] for j = 1:nX, i = 1:nX] .* BB release!(AA) return 2 * logabsdet(Φ)[1] end @@ -193,7 +193,7 @@ end function gradp_evaluate(wf::BFwfs, X::AbstractVector, Σ::Vector{Char}) # =================== evaluate Φ============================= # - nX = length(X) # 电子数 + nX = length(X) # number of electrons BB = _BB(wf.pos, wf.Ta, wf.Tb, wf.envelope, X) A = assemble_A(wf, X, Σ) AA = ACEcore.evaluate(wf.corr, A) # nX x length(wf.corr) diff --git a/src/bflow.jl b/src/bflow.jl index 26b8718..187b021 100644 --- a/src/bflow.jl +++ b/src/bflow.jl @@ -139,7 +139,9 @@ function displayspec(wf::BFwf) return nicespec end - +""" +Construct the A basis (basis after pooling) +""" function assemble_A(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) nX = length(X) @@ -334,6 +336,9 @@ function laplacian(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) return Δψ end +""" +Construct A, ∇A, ΔA for evaluating laplacian +""" function _assemble_A_∇A_ΔA(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) TX = eltype(X) lenA = length(wf.pooling) @@ -372,6 +377,9 @@ function _assemble_A_∇A_ΔA(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) return A, ∇A, ΔA end +""" +Construct AA, ∇AA, ΔAA for evaluating laplacian +""" function _assemble_AA_∇AA_ΔAA(A, ∇A, ΔA, wf::BFwf) nX = size(A, 1) AA = zeros(nX, length(wf.corr)) @@ -404,7 +412,9 @@ function _assemble_AA_∇AA_ΔAA(A, ∇A, ΔA, wf::BFwf) return AA, ∇AA, ΔAA end - +""" +Evaluate laplacian from AA, ∇AA, ΔAA +""" function _laplacian_inner(AA, ∇AA, ΔAA, wf::BFwf, Σ) # Δψ = Φ⁻ᵀ : ΔΦ - ∑ᵢ (Φ⁻ᵀ * Φᵢ)ᵀ : (Φ⁻ᵀ * Φᵢ) diff --git a/test/runtests.jl b/test/runtests.jl index 62daed3..27e4459 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,8 +2,6 @@ using ACEpsi using Test @testset "ACEpsi.jl" begin - - @testset "BFwf" begin include("test_bflow.jl") end - + @testset "BFwf" begin include("test_bflows.jl") end end diff --git a/test/test_bflow.jl b/test/test_bflow.jl index 29511b3..2e24337 100644 --- a/test/test_bflow.jl +++ b/test/test_bflow.jl @@ -220,6 +220,6 @@ wf2 = ACEpsi.set_params!(wf2, param1) ## -@warn("removed compac test since json file is missing") +#@warn("removed compac test since json file is missing") # @info("Test compatibility with ACESchrodinger") # Jerry: Not sure if this should be kept in the same file # include("compare_bflow.jl") diff --git a/test/test_bflows.jl b/test/test_bflows.jl new file mode 100644 index 0000000..c48f479 --- /dev/null +++ b/test/test_bflows.jl @@ -0,0 +1,228 @@ + +using Polynomials4ML, ACEcore, ACEbase, Printf, ACEpsi +using ACEpsi: BFwf, gradient, evaluate, laplacian +using LinearAlgebra +using ACE: λ + +## +function lap_test(f, Δf, X) + F = f(X) + ΔF = Δf(X) + nX = length(X) + n1, nF = size(F) + U = randn(n1) + V = randn(nF) ./ (1:nF).^2 + f0 = U' * F * V + Δf0 = U' * ΔF * V + EE = Matrix(I, (nX, nX)) + for h in sqrt(0.1).^((5:12)) + Δfh = 0.0 + for i = 1:nX + Δfh += (U'*f(X + h * EE[:, i])*V - f0) / h^2 + Δfh += (U'*f(X - h * EE[:, i])*V - f0) / h^2 + end + @printf(" %.1e | %.2e \n", h, abs(Δfh - Δf0)) + end +end + +function grad_test2(f, df, X::AbstractVector) + F = f(X) + ∇F = df(X) + nX = length(X) + EE = Matrix(I, (nX, nX)) + + for h in 0.1.^(3:12) + gh = [ (f(X + h * EE[:, i]) - F) / h for i = 1:nX ] + @printf(" %.1e | %.2e \n", h, norm(gh - ∇F, Inf)) + end +end + +function grad_test3(f, df, X::Float64) + F = f(X) + ∇F = df(X) # return as vector of size 1 + for h in 0.1.^(3:10) + gh = [ (f(X + h) - F) / h] + @printf(" %.1e | %.2e \n", h, norm(gh - ∇F, Inf)) + end +end + + +function grad_test(f, df, X) + F = f(X) + ∇F = df(X) + nX, nF = size(F) + U = randn(nX) + V = randn(nF) ./ (1:nF).^2 + f0 = U' * F * V + ∇f0 = [ U' * ∇F[i, :, :] * V for i = 1:nX ] + EE = Matrix(I, (Nel, Nel)) + for h in 0.1.^(2:10) + gh = [ (U'*f(X + h * EE[:, i])*V - f0) / h for i = 1:Nel ] + @printf(" %.1e | %.2e \n", h, norm(gh - ∇f0, Inf)) + end +end + +"This function should be removed later to test in a nicer way..." +function fdtest(F, Σ, dF, x::AbstractVector; h0 = 1.0, verbose=true) + errors = Float64[] + E = F(x, Σ) + dE = dF + # loop through finite-difference step-lengths + verbose && @printf("---------|----------- \n") + verbose && @printf(" h | error \n") + verbose && @printf("---------|----------- \n") + for p = 2:11 + h = 0.1^p + dEh = copy(dE) + for n = 1:length(dE) + x[n] += h + dEh[n] = (F(x, Σ) - E) / h + x[n] -= h + end + push!(errors, norm(dE - dEh, Inf)) + verbose && @printf(" %1.1e | %4.2e \n", h, errors[end]) + end + verbose && @printf("---------|----------- \n") + if minimum(errors) <= 1e-3 * maximum(errors) + verbose && println("passed") + return true + else + @warn("""It seems the finite-difference test has failed, which indicates + that there is an inconsistency between the function and gradient + evaluation. Please double-check this manually / visually. (It is + also possible that the function being tested is poorly scaled.)""") + return false + end + end +## + +const ↑, ↓, ∅ = '↑','↓','∅' + +N = 8 +Σ = vcat(rand([↑],Int(ceil(N/2))),rand([↓],N - Int(ceil(N/2)))) + +pos = [-70.,-50.,-30.,-10.,10.,30.,50.,70.] +trans = [λ("r -> atan(r+70.0)"),λ("r -> atan(r+50.0)"),λ("r -> atan(r+30.0)"),λ("r -> atan(r+10.0)"),λ("r -> atan(r-10.0)"),λ("r -> atan(r-30.0)"),λ("r -> atan(r-50.0)"),λ("r -> atan(r-70.0)")] +tpos = reduce(vcat,pos) +pos = reduce(vcat,pos) +M = length(pos) +MaxDeg = [6, 6, 6] + +polys = Polynomials4ML.legendre_basis(maximum(MaxDeg)) +wf = BFwf(N, polys, x -> sqrt(1+x^2); pos = pos, tpos = tpos, ν=length(MaxDeg[1]), totdeg = maximum(MaxDeg), trans = trans,sd_admissible = bb -> (length(bb) == 0 || all([bb[i][1] <= MaxDeg[length(bb)] for i = 1:length(bb)]))) + +X = rand(N) +evaluate(U,X,Σ) +g = gradient(U,X,Σ) + + + + +## + +using LinearAlgebra +using Printf +#using ACEbase.Testing: fdtest + +@info("Fd test of gradient w.r.t. X") +fdtest(wf, Σ, g, X) + +## + +A, ∇A, ΔA = ACEpsi._assemble_A_∇A_ΔA(wf, X, Σ) + +@info("test ∇A") +grad_test(X -> ACEpsi._assemble_A_∇A_ΔA(wf, X, Σ)[1], + X -> ACEpsi._assemble_A_∇A_ΔA(wf, X, Σ)[2], + X) + +@info("test ΔA") +lap_test(X -> ACEpsi._assemble_A_∇A_ΔA(wf, X, Σ)[1], + X -> ACEpsi._assemble_A_∇A_ΔA(wf, X, Σ)[3], + X) + + +## + +function f_AA(X, Σ) + A, ∇A, ΔA = ACEpsi._assemble_A_∇A_ΔA(wf, X, Σ) + AA, ∇AA, ΔAA = ACEpsi._assemble_AA_∇AA_ΔAA(A, ∇A, ΔA, wf) + return AA, ∇AA, ΔAA +end + +@info("test ∇A") +grad_test(X -> f_AA(X, Σ)[1], X -> f_AA(X, Σ)[2], X) + +@info("test ΔA") +lap_test(X -> f_AA(X, Σ)[1], X -> f_AA(X, Σ)[3], X) + + +## + +@info("test Δψ") +lap_test(X -> [wf(X, Σ);;], X -> [ACEpsi.laplacian(wf, X, Σ);;], X) + + +## + +@info("Test ∇ψ w.r.t. parameters") + +ACEpsi.gradp_evaluate(wf, X, Σ) + + +W0 = copy(wf.W) +w0 = W0[:] +Fp = w -> ( wf.W[:] .= w[:]; wf(X, Σ)) +dFp = w -> ( wf.W[:] .= w[:]; ACEpsi.gradp_evaluate(wf, X, Σ)[1][:] ) + +grad_test2(Fp, dFp, w0) + +## + +# BFwfs has no seperate env implementated +# @info("test ∇env w.r.t. parameter") +# Ξ0 = copy(wf.envelope.ξ) +# ξ0 = Ξ0 + +# Envp = w -> (wf.envelope.ξ = w; wf(X, Σ)) +# dEnvp = w -> (wf.envelope.ξ = w; ACEpsi.gradp_evaluate(wf, X, Σ)[2]) + +# grad_test3(Envp, dEnvp, ξ0) + +## + +# BFwfs has no gradp_laplacian implementated +# @info("Test ∇Δψ w.r.t. parameters") + +# Fp = w -> ( wf.W[:] .= w[:]; ACEpsi.laplacian(wf, X, Σ)) +# dFp = w -> ( wf.W[:] .= w[:]; ACEpsi.gradp_laplacian(wf, X, Σ)[1][:] ) + +# grad_test2(Fp, dFp, w0) + +## + +# BFwfs has no seperate env implementated +# @info("Test ∇Δenv w.r.t. parameters") + +# Ξ0 = copy(wf.envelope.ξ) +# ξ0 = Ξ0 + + +# Fp = w -> ( wf.envelope.ξ = w; ACEpsi.laplacian(wf, X, Σ)) +# dFp = w -> (wf.envelope.ξ = w; ACEpsi.gradp_laplacian(wf, X, Σ)[2][:] ) + +# grad_test3(Fp, dFp, ξ0) + +## + +@info("Test getting/setting parameters") + + +wf1 = BFwf(N, polys, x -> sqrt(1+x^2); pos = pos, tpos = tpos, ν=length(MaxDeg[1]), totdeg = maximum(MaxDeg), trans = trans,sd_admissible = bb -> (length(bb) == 0 || all([bb[i][1] <= MaxDeg[length(bb)] for i = 1:length(bb)]))) +wf2 = BFwf(N, polys, x -> sqrt(1+x^2); pos = pos, tpos = tpos, ν=length(MaxDeg[1]), totdeg = maximum(MaxDeg), trans = trans,sd_admissible = bb -> (length(bb) == 0 || all([bb[i][1] <= MaxDeg[length(bb)] for i = 1:length(bb)]))) +@printf(" wf1 - wf2: %f \n", abs(wf1(X, Σ) - wf2(X, Σ))) +param1 = ACEpsi.get_params(wf1) +wf2 = ACEpsi.set_params!(wf2, param1) +@printf(" wf1 - wf2 with parameter wf1: %f \n", abs(wf1(X, Σ) - wf2(X, Σ))) + +## From 40abf0b97187698e63e0b6e631d3e3bd07e391d4 Mon Sep 17 00:00:00 2001 From: jerryho Date: Wed, 8 Mar 2023 00:28:42 -0500 Subject: [PATCH 05/10] fix corr --- src/bflow.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/bflow.jl b/src/bflow.jl index 187b021..bb11cfd 100644 --- a/src/bflow.jl +++ b/src/bflow.jl @@ -57,9 +57,11 @@ function BFwf(Nel::Integer, polys::OrthPolyBasis1D3T; totdeg = length(polys), # further restrict spec = [t for t in spec if sd_admissible([spec1p[t[j]] for j = 1:length(t)])] + corr1 = SparseSymmProd(spec; T = Float64) - corr = corr1.dag - + corr = corr1.dag + corr = SparseSymmProdDAG{Float64}(corr.nodes[corr1.proj],corr.num1,length(corr.nodes[corr1.proj]),corr.projection,corr.pool_AA,corr.bpool_AA) + # initial guess for weights Q, _ = qr(randn(T, length(corr), Nel)) W = Matrix(Q) @@ -374,7 +376,7 @@ function _assemble_A_∇A_ΔA(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) end end end - return A, ∇A, ΔA + return A, ∇A, ΔA end """ From 4c524da81f0f624f3cf0f389dbd5bc6ea7522dc5 Mon Sep 17 00:00:00 2001 From: jerryho Date: Wed, 8 Mar 2023 00:32:09 -0500 Subject: [PATCH 06/10] minor fix --- src/bflow-mts.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bflow-mts.jl b/src/bflow-mts.jl index f8a4374..3161fed 100644 --- a/src/bflow-mts.jl +++ b/src/bflow-mts.jl @@ -74,7 +74,8 @@ function BFwf(Nel::Integer, polys::OrthPolyBasis1D3T, envelope::Function; totdeg corr1 = SparseSymmProd(spec; T = Float64) corr = corr1.dag - + corr = SparseSymmProdDAG{Float64}(corr.nodes[corr1.proj],corr.num1,length(corr.nodes[corr1.proj]),corr.projection,corr.pool_AA,corr.bpool_AA) + # initial guess for weights Q, _ = qr(randn(T, length(corr), Nel)) W = Matrix(Q) From 5c4d0c881deeaedb4c68a3cf4657eb83b7001048 Mon Sep 17 00:00:00 2001 From: jerryho Date: Thu, 9 Mar 2023 18:45:32 -0500 Subject: [PATCH 07/10] try using ord N only --- src/bflow-mts.jl | 33 +++++++++++++++++++++++---------- src/bflow.jl | 25 ++++++++++++++----------- 2 files changed, 37 insertions(+), 21 deletions(-) diff --git a/src/bflow-mts.jl b/src/bflow-mts.jl index 3161fed..825cf75 100644 --- a/src/bflow-mts.jl +++ b/src/bflow-mts.jl @@ -74,10 +74,9 @@ function BFwf(Nel::Integer, polys::OrthPolyBasis1D3T, envelope::Function; totdeg corr1 = SparseSymmProd(spec; T = Float64) corr = corr1.dag - corr = SparseSymmProdDAG{Float64}(corr.nodes[corr1.proj],corr.num1,length(corr.nodes[corr1.proj]),corr.projection,corr.pool_AA,corr.bpool_AA) - + # initial guess for weights - Q, _ = qr(randn(T, length(corr), Nel)) + Q, _ = qr(randn(T, length(corr1), Nel)) W = Matrix(Q) return BFwfs(pos, tpos, trans, polys, pooling, corr, W, envelope, spec, @@ -91,7 +90,7 @@ function BFwf(Nel::Integer, polys::OrthPolyBasis1D3T, envelope::Function; totdeg zeros(T, length(pooling)), zeros(T, length(pooling)), zeros(Bool, Nel, 3), - zeros(T, Nel, length(corr)), + zeros(T, Nel, length(corr1)), zeros(T, Nel, 3), zeros(T, Nel, Nel, length(corr)), ones(T, Nel, length(trans)), @@ -103,6 +102,17 @@ function BFwf(Nel::Integer, polys::OrthPolyBasis1D3T, envelope::Function; totdeg ) end +function displayspec(wf::BFwfs) + K = length(wf.polys) + spec1p = [ (k, σ, m) for σ in [1, 2, 3] for k in 1:K for m in 1:M] # (1, 2, 3) = (∅, ↑, ↓); + spec1p = sort(spec1p, by = b -> b[1]) + _getnicespec = l -> (l[1], ACEpsi.num2spin(l[2]),l[3]) + nicespec = [] + for k = 1:length(wf.spec) + push!(nicespec, _getnicespec.([spec1p[wf.spec[k][j]] for j = 1:length(wf.spec[k])])) + end + return nicespec +end function assemble_A(wf::BFwfs, X::AbstractVector, Σ::Vector{Char}) nX = length(X) @@ -156,11 +166,10 @@ end end) end - function evaluate(wf::BFwfs, X::AbstractVector, Σ::Vector{Char}) nX = length(X) A = assemble_A(wf, X, Σ) - AA = ACEcore.evaluate(wf.corr, A) + AA = ACEcore.evaluate(wf.corr, A)[:,wf.corr.projection] BB = _BB(wf.pos, wf.Ta, wf.Tb, wf.envelope, X) Φ = wf.Φ mul!(Φ, parent(AA), wf.W) # Φ = AA * W, nX x nX @@ -197,7 +206,7 @@ function gradp_evaluate(wf::BFwfs, X::AbstractVector, Σ::Vector{Char}) nX = length(X) # number of electrons BB = _BB(wf.pos, wf.Ta, wf.Tb, wf.envelope, X) A = assemble_A(wf, X, Σ) - AA = ACEcore.evaluate(wf.corr, A) # nX x length(wf.corr) + AA = ACEcore.evaluate(wf.corr, A)[:,wf.corr.projection] # nX x length(wf.corr) Φ = wf.Φ mul!(Φ, parent(AA), wf.W) _Φ = Φ .* [Σ[i] == Σ[j] for j = 1:nX, i = 1:nX] @@ -276,7 +285,8 @@ function gradient(wf::BFwfs, X::AbstractVector, Σ::Vector{Char}) end # n-correlations - AA = ACEcore.evaluate(wf.corr, A) # nX x length(wf.corr) + DD = ACEcore.evaluate(wf.corr, A) + AA = DD[:,wf.corr.projection] # nX x length(wf.corr) # =================== evaluate =============================== # # generalized orbitals BB = _BB(wf.pos, wf.Ta, wf.Tb, wf.envelope, X) @@ -304,8 +314,11 @@ function gradient(wf::BFwfs, X::AbstractVector, Σ::Vector{Char}) # ∂A = ∂ψ/∂A = ∂ψ/∂AA * ∂AA/∂A -> use custom pullback ∂A = wf.∂A # zeros(size(A)) - ACEcore.pullback_arg!(∂A, ∂AA, wf.corr, parent(AA)) + ∂DD = zeros(size(∂AA,1),length(wf.corr)) + ∂DD[:,wf.corr.projection] = ∂AA + ACEcore.pullback_arg!(∂A, ∂DD, wf.corr, parent(DD)) release!(AA) + release!(DD) # ∂P = ∂ψ/∂P = ∂ψ/∂A * ∂A/∂P -> use custom pullback # but need to do some work here since multiple @@ -483,7 +496,7 @@ function _assemble_AA_∇AA_ΔAA(A, ∇A, ΔA, wf::BFwfs) ΔAA[i, iAA] = L end end - return AA, ∇AA, ΔAA + return AA[:,wf.corr.projection], ∇AA[:,:,wf.corr.projection], ΔAA[:,wf.corr.projection] end function Δenv(pos, Ta, Tb, f, X::Float64, i::Int) diff --git a/src/bflow.jl b/src/bflow.jl index bb11cfd..5acff92 100644 --- a/src/bflow.jl +++ b/src/bflow.jl @@ -60,10 +60,9 @@ function BFwf(Nel::Integer, polys::OrthPolyBasis1D3T; totdeg = length(polys), corr1 = SparseSymmProd(spec; T = Float64) corr = corr1.dag - corr = SparseSymmProdDAG{Float64}(corr.nodes[corr1.proj],corr.num1,length(corr.nodes[corr1.proj]),corr.projection,corr.pool_AA,corr.bpool_AA) - + # initial guess for weights - Q, _ = qr(randn(T, length(corr), Nel)) + Q, _ = qr(randn(T, length(corr1), Nel)) W = Matrix(Q) return BFwf(trans, polys, pooling, corr, W, envelope, spec, @@ -77,7 +76,7 @@ function BFwf(Nel::Integer, polys::OrthPolyBasis1D3T; totdeg = length(polys), zeros(T, length(pooling)), zeros(T, length(pooling)), zeros(Bool, Nel, 3), - zeros(T, Nel, length(corr)), + zeros(T, Nel, length(corr1)), zeros(T, Nel, 3), zeros(T, Nel, Nel, length(corr)) ) @@ -167,7 +166,7 @@ end function evaluate(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) nX = length(X) A = assemble_A(wf, X, Σ) - AA = ACEcore.evaluate(wf.corr, A) # nX x length(wf.corr) + AA = ACEcore.evaluate(wf.corr, A)[:,wf.corr.projection] # nX x length(wf.corr) Φ = wf.Φ mul!(Φ, parent(AA), wf.W) # nX x nX Φ = Φ .* [Σ[i] == Σ[j] for j = 1:nX, i = 1:nX] # the resulting matrix should contains two block each comes from each spin @@ -182,7 +181,7 @@ function gradp_evaluate(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) nX = length(X) A = assemble_A(wf, X, Σ) - AA = ACEcore.evaluate(wf.corr, A) # nX x length(wf.corr) + AA = ACEcore.evaluate(wf.corr, A)[:,wf.corr.projection] # nX x length(wf.corr) Φ = wf.Φ mul!(Φ, parent(AA), wf.W) Φ = Φ .* [Σ[i] == Σ[j] for j = 1:nX, i = 1:nX] # the resulting matrix should contains two block each comes from each spin @@ -254,9 +253,9 @@ function gradient(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) ACEcore.evalpool!(Ai, wf.pooling, (parent(P), Si)) A[i, :] .= Ai end - # n-correlations - AA = ACEcore.evaluate(wf.corr, A) # nX x length(wf.corr) + BB = ACEcore.evaluate(wf.corr, A) + AA = BB[:,wf.corr.projection] # nX x length(wf.corr) # generalized orbitals Φ = wf.Φ @@ -264,7 +263,7 @@ function gradient(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) # the resulting matrix should contains two block each comes from each spin Φ = Φ .* [Σ[i] == Σ[j] for j = 1:nX, i = 1:nX] - + # envelope env = wf.envelope(X) @@ -277,12 +276,16 @@ function gradient(wf::BFwf, X::AbstractVector, Σ::Vector{Char}) # ∂AA = ∂ψ/∂AA = ∂ψ/∂Φ * ∂Φ/∂AA = ∂Φ * wf.W' ∂AA = wf.∂AA + mul!(∂AA, ∂Φ, transpose(wf.W)) # ∂A = ∂ψ/∂A = ∂ψ/∂AA * ∂AA/∂A -> use custom pullback ∂A = wf.∂A # zeros(size(A)) - ACEcore.pullback_arg!(∂A, ∂AA, wf.corr, parent(AA)) + ∂BB = zeros(size(∂AA,1),length(wf.corr)) + ∂BB[:,wf.corr.projection] = ∂AA + ACEcore.pullback_arg!(∂A, ∂BB, wf.corr, parent(BB)) release!(AA) + release!(BB) # ∂P = ∂ψ/∂P = ∂ψ/∂A * ∂A/∂P -> use custom pullback # but need to do some work here since multiple @@ -411,7 +414,7 @@ function _assemble_AA_∇AA_ΔAA(A, ∇A, ΔA, wf::BFwf) ΔAA[i, iAA] = L end end - return AA, ∇AA, ΔAA + return AA[:,wf.corr.projection], ∇AA[:,:,wf.corr.projection], ΔAA[:,wf.corr.projection] end """ From 3344735af8a83c6dc0769e1eefb68d4b6b775595 Mon Sep 17 00:00:00 2001 From: "Bernie (Po Cheng) Hsu" Date: Mon, 22 May 2023 08:00:24 -0700 Subject: [PATCH 08/10] Include RTrigs as input to BFwf --- Project.toml | 2 +- src/bflow.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index a8e4139..f94234e 100644 --- a/Project.toml +++ b/Project.toml @@ -19,7 +19,7 @@ ACEcore = "0.0.1" BenchmarkTools = "1" ForwardDiff = "0.10" ObjectPools = "0.0.2" -Polynomials4ML = "0.0.2" +Polynomials4ML = "^0.1" julia = "1" JSON = "0.21" ACEbase = "0.2" diff --git a/src/bflow.jl b/src/bflow.jl index 5acff92..5141f53 100644 --- a/src/bflow.jl +++ b/src/bflow.jl @@ -31,7 +31,7 @@ end (Φ::BFwf)(args...) = evaluate(Φ, args...) -function BFwf(Nel::Integer, polys::OrthPolyBasis1D3T; totdeg = length(polys), +function BFwf(Nel::Integer, polys::PolyBasis4ML; totdeg = length(polys), ν = 3, T = Float64, trans = identity, sd_admissible = bb -> (true), From 3d12a7947ace742ccaa74e90740fa0c890871c09 Mon Sep 17 00:00:00 2001 From: cortner Date: Thu, 25 May 2023 10:34:37 -0700 Subject: [PATCH 09/10] fix pkg compatibility --- Project.toml | 8 ++++---- src/bflow.jl | 2 +- test/test_bflows.jl | 5 +++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Project.toml b/Project.toml index f94234e..2397840 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Christoph Ortner and contributors"] version = "0.0.1" [deps] +ACE = "3e8ccfd2-c8b0-11ea-32f1-f3a5990fd77a" ACEbase = "14bae519-eb20-449c-a949-9c58ed33163e" ACEcore = "44c1e890-45d1-48ea-94d6-c2ea5b573f71" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" @@ -15,15 +16,14 @@ Polynomials4ML = "03c4bcba-a943-47e9-bfa1-b1661fc2974f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" [compat] +ACEbase = "0.2" ACEcore = "0.0.1" BenchmarkTools = "1" ForwardDiff = "0.10" +JSON = "0.21" ObjectPools = "0.0.2" -Polynomials4ML = "^0.1" julia = "1" -JSON = "0.21" -ACEbase = "0.2" - +Polynomials4ML = "0.0.6" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/src/bflow.jl b/src/bflow.jl index 5141f53..721aaeb 100644 --- a/src/bflow.jl +++ b/src/bflow.jl @@ -31,7 +31,7 @@ end (Φ::BFwf)(args...) = evaluate(Φ, args...) -function BFwf(Nel::Integer, polys::PolyBasis4ML; totdeg = length(polys), +function BFwf(Nel::Integer, polys; totdeg = length(polys), ν = 3, T = Float64, trans = identity, sd_admissible = bb -> (true), diff --git a/test/test_bflows.jl b/test/test_bflows.jl index c48f479..5d2378b 100644 --- a/test/test_bflows.jl +++ b/test/test_bflows.jl @@ -112,8 +112,9 @@ polys = Polynomials4ML.legendre_basis(maximum(MaxDeg)) wf = BFwf(N, polys, x -> sqrt(1+x^2); pos = pos, tpos = tpos, ν=length(MaxDeg[1]), totdeg = maximum(MaxDeg), trans = trans,sd_admissible = bb -> (length(bb) == 0 || all([bb[i][1] <= MaxDeg[length(bb)] for i = 1:length(bb)]))) X = rand(N) -evaluate(U,X,Σ) -g = gradient(U,X,Σ) + +evaluate(wf,X,Σ) +g = gradient(wf,X,Σ) From be6de7e100372aed101d3094f6b28ec164e0f83e Mon Sep 17 00:00:00 2001 From: cortner Date: Thu, 25 May 2023 10:41:32 -0700 Subject: [PATCH 10/10] add test with periodic BC --- test/test_periodic.jl | 229 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 test/test_periodic.jl diff --git a/test/test_periodic.jl b/test/test_periodic.jl new file mode 100644 index 0000000..71e63ae --- /dev/null +++ b/test/test_periodic.jl @@ -0,0 +1,229 @@ + +using Polynomials4ML, ACEcore, ACEbase, Printf, ACEpsi +using ACEpsi: BFwf, gradient, evaluate, laplacian +using LinearAlgebra +#using Random +#Random.seed!(123) +## +function lap_test(f, Δf, X) + F = f(X) + ΔF = Δf(X) + nX = length(X) + n1, nF = size(F) + U = randn(n1) + V = randn(nF) ./ (1:nF).^2 + f0 = U' * F * V + Δf0 = U' * ΔF * V + EE = Matrix(I, (nX, nX)) + for h in sqrt(0.1).^((5:12)) + Δfh = 0.0 + for i = 1:nX + Δfh += (U'*f(X + h * EE[:, i])*V - f0) / h^2 + Δfh += (U'*f(X - h * EE[:, i])*V - f0) / h^2 + end + @printf(" %.1e | %.2e \n", h, abs(Δfh - Δf0)) + end +end + +function grad_test2(f, df, X::AbstractVector) + F = f(X) + ∇F = df(X) + nX = length(X) + EE = Matrix(I, (nX, nX)) + + for h in 0.1.^(3:12) + gh = [ (f(X + h * EE[:, i]) - F) / h for i = 1:nX ] + @printf(" %.1e | %.2e \n", h, norm(gh - ∇F, Inf)) + end +end + +function grad_test3(f, df, X::Float64) + F = f(X) + ∇F = df(X) # return as vector of size 1 + for h in 0.1.^(3:10) + gh = [ (f(X + h) - F) / h] + @printf(" %.1e | %.2e \n", h, norm(gh - ∇F, Inf)) + end +end + + +function grad_test(f, df, X) + F = f(X) + ∇F = df(X) + nX, nF = size(F) + U = randn(nX) + V = randn(nF) ./ (1:nF).^2 + f0 = U' * F * V + ∇f0 = [ U' * ∇F[i, :, :] * V for i = 1:nX ] + EE = Matrix(I, (Nel, Nel)) + for h in 0.1.^(2:10) + gh = [ (U'*f(X + h * EE[:, i])*V - f0) / h for i = 1:Nel ] + @printf(" %.1e | %.2e \n", h, norm(gh - ∇f0, Inf)) + end +end + +"This function should be removed later to test in a nicer way..." +function fdtest(F, Σ, dF, x::AbstractVector; h0 = 1.0, verbose=true) + errors = Float64[] + E = F(x, Σ) + dE = dF + # loop through finite-difference step-lengths + verbose && @printf("---------|----------- \n") + verbose && @printf(" h | error \n") + verbose && @printf("---------|----------- \n") + for p = 2:11 + h = 0.1^p + dEh = copy(dE) + for n = 1:length(dE) + x[n] += h + dEh[n] = (F(x, Σ) - E) / h + x[n] -= h + end + push!(errors, norm(dE - dEh, Inf)) + verbose && @printf(" %1.1e | %4.2e \n", h, errors[end]) + end + verbose && @printf("---------|----------- \n") + if minimum(errors) <= 1e-3 * maximum(errors) + verbose && println("passed") + return true + else + @warn("""It seems the finite-difference test has failed, which indicates + that there is an inconsistency between the function and gradient + evaluation. Please double-check this manually / visually. (It is + also possible that the function being tested is poorly scaled.)""") + return false + end + end +## + +const ↑, ↓, ∅ = '↑','↓','∅' +Nel = 5 +polys = RTrigBasis(4) +wf = BFwf(Nel, polys; ν=3, + envelope = ACEpsi.envelopefcn(x -> 1, rand())) + +X = π * rand(Nel) .- 1 +Σ = rand([↑, ↓], Nel) + +wf(X, Σ) +g = gradient(wf, X, Σ) + +X1 = 2*π .+ X +wf(X, Σ) ≈ wf(X1, Σ) + +## + +using LinearAlgebra +using Printf +#using ACEbase.Testing: fdtest + +@info("Fd test of gradient w.r.t. X") +fdtest(wf, Σ, g, X) + + +# ## + +# todo: move this to a performance benchmark script +# using BenchmarkTools +# @btime $wf($X) +# @btime gradient($wf, $X) + +## + +A, ∇A, ΔA = ACEpsi._assemble_A_∇A_ΔA(wf, X, Σ) + +@info("test ∇A") +grad_test(X -> ACEpsi._assemble_A_∇A_ΔA(wf, X, Σ)[1], + X -> ACEpsi._assemble_A_∇A_ΔA(wf, X, Σ)[2], + X) + +@info("test ΔA") +lap_test(X -> ACEpsi._assemble_A_∇A_ΔA(wf, X, Σ)[1], + X -> ACEpsi._assemble_A_∇A_ΔA(wf, X, Σ)[3], + X) + + +## + +function f_AA(X, Σ) + A, ∇A, ΔA = ACEpsi._assemble_A_∇A_ΔA(wf, X, Σ) + AA, ∇AA, ΔAA = ACEpsi._assemble_AA_∇AA_ΔAA(A, ∇A, ΔA, wf) + return AA, ∇AA, ΔAA +end + +@info("test ∇A") +grad_test(X -> f_AA(X, Σ)[1], X -> f_AA(X, Σ)[2], X) + +@info("test ΔA") +lap_test(X -> f_AA(X, Σ)[1], X -> f_AA(X, Σ)[3], X) + + +## + +@info("test Δψ") +lap_test(X -> [wf(X, Σ);;], X -> [ACEpsi.laplacian(wf, X, Σ);;], X) + + +## + +@info("Test ∇ψ w.r.t. parameters") + +ACEpsi.gradp_evaluate(wf, X, Σ) + + +W0 = copy(wf.W) +w0 = W0[:] +Fp = w -> ( wf.W[:] .= w[:]; wf(X, Σ)) +dFp = w -> ( wf.W[:] .= w[:]; ACEpsi.gradp_evaluate(wf, X, Σ)[1][:] ) + +grad_test2(Fp, dFp, w0) + +## + +@info("test ∇env w.r.t. parameter") +Ξ0 = copy(wf.envelope.ξ) +ξ0 = Ξ0 + +Envp = w -> (wf.envelope.ξ = w; wf(X, Σ)) +dEnvp = w -> (wf.envelope.ξ = w; ACEpsi.gradp_evaluate(wf, X, Σ)[2]) + +grad_test3(Envp, dEnvp, ξ0) + +## + +@info("Test ∇Δψ w.r.t. parameters") + +Fp = w -> ( wf.W[:] .= w[:]; ACEpsi.laplacian(wf, X, Σ)) +dFp = w -> ( wf.W[:] .= w[:]; ACEpsi.gradp_laplacian(wf, X, Σ)[1][:] ) + +grad_test2(Fp, dFp, w0) + +## + +@info("Test ∇Δenv w.r.t. parameters") + +Ξ0 = copy(wf.envelope.ξ) +ξ0 = Ξ0 + + +Fp = w -> ( wf.envelope.ξ = w; ACEpsi.laplacian(wf, X, Σ)) +dFp = w -> (wf.envelope.ξ = w; ACEpsi.gradp_laplacian(wf, X, Σ)[2][:] ) + +grad_test3(Fp, dFp, ξ0) + +## + +@info("Test getting/setting parameters") + +wf1 = BFwf(Nel, polys; ν=3) +wf2 = BFwf(Nel, polys; ν=3) +@printf(" wf1 - wf2: %f \n", abs(wf1(X, Σ) - wf2(X, Σ))) +param1 = ACEpsi.get_params(wf1) +wf2 = ACEpsi.set_params!(wf2, param1) +@printf(" wf1 - wf2 with parameter wf1: %f \n", abs(wf1(X, Σ) - wf2(X, Σ))) + +## + +#@warn("removed compac test since json file is missing") +# @info("Test compatibility with ACESchrodinger") # Jerry: Not sure if this should be kept in the same file +# include("compare_bflow.jl")