Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add relative error check in adaptive time stepping algorithm for rk23… #275

Open
wants to merge 1 commit into
base: 3.11
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions cuda/vecnorm.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include "float3.h"

// dst = sqrt(ax*ax + ay*ay + az*az)
extern "C" __global__ void
vecnorm(float* __restrict__ dst,
float* __restrict__ ax, float* __restrict__ ay, float* __restrict__ az,
int N) {

int i = ( blockIdx.y*gridDim.x + blockIdx.x ) * blockDim.x + threadIdx.x;
if (i < N) {
float3 A = {ax[i], ay[i], az[i]};
dst[i] = sqrtf(dot(A, A));
}
}
18 changes: 18 additions & 0 deletions cuda/vecnorm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package cuda

import (
"github.com/mumax/3/data"
"github.com/mumax/3/util"
)

// dst = sqrt(dot(a, a)),
func VecNorm(dst *data.Slice, a *data.Slice) {
util.Argument(dst.NComp() == 1 && a.NComp() == 3)
util.Argument(dst.Len() == a.Len())

N := dst.Len()
cfg := make1DConf(N)
k_vecnorm_async(dst.DevPtr(0),
a.DevPtr(X), a.DevPtr(Y), a.DevPtr(Z),
N, cfg)
}
45 changes: 38 additions & 7 deletions engine/rk23.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,44 @@ func (rk *RK23) Step() {

// adjust next time step
if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
// step OK
setLastErr(err)
setMaxTorque(k4)
NSteps++
Time = t0 + Dt_si
adaptDt(math.Pow(MaxErr/err, 1./3.))
data.Copy(rk.k1, k4) // FSAL
// Passed absolute error. Check relative error...
errnorm := cuda.Buffer(1, size)
defer cuda.Recycle(errnorm)
cuda.VecNorm(errnorm, Err)
ddtnorm := cuda.Buffer(1, size)
defer cuda.Recycle(ddtnorm)
cuda.VecNorm(ddtnorm, k4)
maxdm := cuda.MaxVecNorm(k4)
fail := 0
rlerr := float64(0.0)
if maxdm < MinSlope { // Only step using relerr if dmdt is big enough. Overcomes equilibrium problem
fail = 0
} else {
cuda.Div(errnorm, errnorm, ddtnorm) //re-use errnorm
rlerr = float64(cuda.MaxAbs(errnorm))
fail = 1
}
if fail == 0 || RelErr <= 0.0 || rlerr < RelErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
// step OK
setLastErr(err)
setMaxTorque(k4)
NSteps++
Time = t0 + Dt_si
if fail == 0 {
adaptDt(math.Pow(MaxErr/err, 1./3.))
} else {
adaptDt(math.Pow(RelErr/rlerr, 1./3.))
}
data.Copy(rk.k1, k4) // FSAL
} else {
// undo bad step
//util.Println("Bad step at t=", t0, ", err=", err)
util.Assert(FixDt == 0)
Time = t0
data.Copy(m, m0)
NUndone++
adaptDt(math.Pow(RelErr/rlerr, 1./4.))
}
} else {
// undo bad step
//util.Println("Bad step at t=", t0, ", err=", err)
Expand Down
45 changes: 38 additions & 7 deletions engine/rk45dp.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,44 @@ func (rk *RK45DP) Step() {

// adjust next time step
if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
// step OK
setLastErr(err)
setMaxTorque(k7)
NSteps++
Time = t0 + Dt_si
adaptDt(math.Pow(MaxErr/err, 1./5.))
data.Copy(rk.k1, k7) // FSAL
//Passed absolute error. Check relative error...
errnorm := cuda.Buffer(1, size)
defer cuda.Recycle(errnorm)
cuda.VecNorm(errnorm, Err)
ddtnorm := cuda.Buffer(1, size)
defer cuda.Recycle(ddtnorm)
cuda.VecNorm(ddtnorm, k7)
maxdm := cuda.MaxVecNorm(k7)
fail := 0
rlerr := float64(0.0)
if maxdm < MinSlope { // Only step using relerr if dmdt is big enough. Overcomes equilibrium problem
fail = 0
} else {
cuda.Div(errnorm, errnorm, ddtnorm) //re-use errnorm
rlerr = float64(cuda.MaxAbs(errnorm))
fail = 1
}
if fail == 0 || RelErr <= 0.0 || rlerr < RelErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
// step OK
setLastErr(err)
setMaxTorque(k7)
NSteps++
Time = t0 + Dt_si
if fail == 0 {
adaptDt(math.Pow(MaxErr/err, 1./5.))
} else {
adaptDt(math.Pow(RelErr/rlerr, 1./5.))
}
data.Copy(rk.k1, k7) // FSAL
} else {
// undo bad step
//util.Println("Bad step at t=", t0, ", err=", err)
util.Assert(FixDt == 0)
Time = t0
data.Copy(m, m0)
NUndone++
adaptDt(math.Pow(RelErr/rlerr, 1./6.))
}
} else {
// undo bad step
//util.Println("Bad step at t=", t0, ", err=", err)
Expand Down
81 changes: 63 additions & 18 deletions engine/rk56.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
)

type RK56 struct {
k1 *data.Slice // torque at end of step is kept for beginning of next step
}

func (rk *RK56) Step() {
Expand All @@ -19,14 +20,24 @@ func (rk *RK56) Step() {
Dt_si = FixDt
}

// upon resize: remove wrongly sized k1
if rk.k1.Size() != m.Size() {
rk.Free()
}

// first step ever: one-time k1 init and eval
if rk.k1 == nil {
rk.k1 = cuda.NewSlice(3, size)
torqueFn(rk.k1)
}

t0 := Time
// backup magnetization
m0 := cuda.Buffer(3, size)
defer cuda.Recycle(m0)
data.Copy(m0, m)

k1, k2, k3, k4, k5, k6, k7, k8 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size)
defer cuda.Recycle(k1)
k2, k3, k4, k5, k6, k7, k8 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size)
defer cuda.Recycle(k2)
defer cuda.Recycle(k3)
defer cuda.Recycle(k4)
Expand All @@ -39,73 +50,105 @@ func (rk *RK56) Step() {
h := float32(Dt_si * GammaLL) // internal time step = Dt * gammaLL

// stage 1
torqueFn(k1)
torqueFn(rk.k1)
Copy link
Contributor

@JonathanMaes JonathanMaes Oct 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no need to re-calculate this (at zero temperature, at least) if k2 is stored at the end of the Step anyway.


// stage 2
Time = t0 + (1./6.)*Dt_si
cuda.Madd2(m, m, k1, 1, (1./6.)*h) // m = m*1 + k1*h/6
cuda.Madd2(m, m, rk.k1, 1, (1./6.)*h) // m = m*1 + k1*h/6
M.normalize()
torqueFn(k2)

// stage 3
Time = t0 + (4./15.)*Dt_si
cuda.Madd3(m, m0, k1, k2, 1, (4./75.)*h, (16./75.)*h)
cuda.Madd3(m, m0, rk.k1, k2, 1, (4./75.)*h, (16./75.)*h)
M.normalize()
torqueFn(k3)

// stage 4
Time = t0 + (2./3.)*Dt_si
cuda.Madd4(m, m0, k1, k2, k3, 1, (5./6.)*h, (-8./3.)*h, (5./2.)*h)
cuda.Madd4(m, m0, rk.k1, k2, k3, 1, (5./6.)*h, (-8./3.)*h, (5./2.)*h)
M.normalize()
torqueFn(k4)

// stage 5
Time = t0 + (4./5.)*Dt_si
cuda.Madd5(m, m0, k1, k2, k3, k4, 1, (-8./5.)*h, (144./25.)*h, (-4.)*h, (16./25.)*h)
cuda.Madd5(m, m0, rk.k1, k2, k3, k4, 1, (-8./5.)*h, (144./25.)*h, (-4.)*h, (16./25.)*h)
M.normalize()
torqueFn(k5)

// stage 6
Time = t0 + (1.)*Dt_si
cuda.Madd6(m, m0, k1, k2, k3, k4, k5, 1, (361./320.)*h, (-18./5.)*h, (407./128.)*h, (-11./80.)*h, (55./128.)*h)
cuda.Madd6(m, m0, rk.k1, k2, k3, k4, k5, 1, (361./320.)*h, (-18./5.)*h, (407./128.)*h, (-11./80.)*h, (55./128.)*h)
M.normalize()
torqueFn(k6)

// stage 7
Time = t0
cuda.Madd5(m, m0, k1, k3, k4, k5, 1, (-11./640.)*h, (11./256.)*h, (-11/160.)*h, (11./256.)*h)
cuda.Madd5(m, m0, rk.k1, k3, k4, k5, 1, (-11./640.)*h, (11./256.)*h, (-11/160.)*h, (11./256.)*h)
M.normalize()
torqueFn(k7)

// stage 8
Time = t0 + (1.)*Dt_si
cuda.Madd7(m, m0, k1, k2, k3, k4, k5, k7, 1, (93./640.)*h, (-18./5.)*h, (803./256.)*h, (-11./160.)*h, (99./256.)*h, (1.)*h)
cuda.Madd7(m, m0, rk.k1, k2, k3, k4, k5, k7, 1, (93./640.)*h, (-18./5.)*h, (803./256.)*h, (-11./160.)*h, (99./256.)*h, (1.)*h)
M.normalize()
torqueFn(k8)

// stage 9: 6th order solution
Time = t0 + (1.)*Dt_si
//madd6(m, m0, k1, k3, k4, k5, k6, 1, (31./384.)*h, (1125./2816.)*h, (9./32.)*h, (125./768.)*h, (5./66.)*h)
cuda.Madd7(m, m0, k1, k3, k4, k5, k7, k8, 1, (7./1408.)*h, (1125./2816.)*h, (9./32.)*h, (125./768.)*h, (5./66.)*h, (5./66.)*h)
cuda.Madd7(m, m0, rk.k1, k3, k4, k5, k7, k8, 1, (7./1408.)*h, (1125./2816.)*h, (9./32.)*h, (125./768.)*h, (5./66.)*h, (5./66.)*h)
M.normalize()
torqueFn(k2) // re-use k2

// error estimate
Err := cuda.Buffer(3, size)
defer cuda.Recycle(Err)
cuda.Madd4(Err, k1, k6, k7, k8, (-5. / 66.), (-5. / 66.), (5. / 66.), (5. / 66.))
cuda.Madd4(Err, rk.k1, k6, k7, k8, (-5. / 66.), (-5. / 66.), (5. / 66.), (5. / 66.))

// determine error
err := cuda.MaxVecNorm(Err) * float64(h)

// adjust next time step
if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
// step OK
setLastErr(err)
setMaxTorque(k2)
NSteps++
Time = t0 + Dt_si
adaptDt(math.Pow(MaxErr/err, 1./6.))
//Passed absolute error. Check relative error...
errnorm := cuda.Buffer(1, size)
defer cuda.Recycle(errnorm)
cuda.VecNorm(errnorm, Err)
ddtnorm := cuda.Buffer(1, size)
defer cuda.Recycle(ddtnorm)
cuda.VecNorm(ddtnorm, k2)
maxdm := cuda.MaxVecNorm(k2)
fail := 0
rlerr := float64(0.0)
if maxdm < MinSlope { // Only step using relerr if dmdt is big enough. Overcomes equilibrium problem
fail = 0
} else {
cuda.Div(errnorm, errnorm, ddtnorm) //re-use errnorm
rlerr = float64(cuda.MaxAbs(errnorm))
fail = 1
}
if fail == 0 || RelErr <= 0.0 || rlerr < RelErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
// step OK
setLastErr(err)
setMaxTorque(k2)
Copy link
Contributor

@JonathanMaes JonathanMaes Oct 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that the value used in setMaxTorque() is inconsistent between different solvers. [NOTE: this was already the case before this PR]

  • Here in RK56 (also RK45dp and RK23), setMaxTorque() is set with the torques acting on the final magnetization state (i.e. the torqueFn() values after the final cuda.Madd<n>(m, m0, ...) is performed).
  • However, in e.g. RK4 (also Heun and Euler I think?), setMaxTorque() is set with the torque acting on the magnetization considered in the last stage of the RK solver (i.e. the torqueFn() values before the final cuda.Madd<n>(m, m0, ...) is performed).

Which of these two possibilities do we consider most correct?

EDIT: the reason for not calculating the final torque in RK4 etc. is that this would significantly decrease performance for simulations at nonzero temperature because then this final torque can not be re-used. Therefore, it seems that the final torque evaluation of k2 in the original RK56 code could be omitted similar to how it was done in the RK4 solver.

NSteps++
Time = t0 + Dt_si
if fail == 0 {
adaptDt(math.Pow(MaxErr/err, 1./6.))
} else {
adaptDt(math.Pow(RelErr/rlerr, 1./6.))
}
data.Copy(rk.k1, k2) // FSAL
} else {
// undo bad step
//util.Println("Bad step at t=", t0, ", err=", err)
util.Assert(FixDt == 0)
Time = t0
data.Copy(m, m0)
NUndone++
adaptDt(math.Pow(RelErr/rlerr, 1./7.))
}
} else {
// undo bad step
//util.Println("Bad step at t=", t0, ", err=", err)
Expand All @@ -118,4 +161,6 @@ func (rk *RK56) Step() {
}

func (rk *RK56) Free() {
rk.k1.Free()
rk.k1 = nil
}
3 changes: 3 additions & 0 deletions engine/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ var (
Dt_si float64 = 1e-15 // time step = dt_si (seconds) *dt_mul, which should be nice float32
MinDt, MaxDt float64 // minimum and maximum time step
MaxErr float64 = 1e-5 // maximum error/step
RelErr float64 = 1e-4 // maximum relative error/step
MinSlope float64 = 1e8 // minimum delta per unit time before using relative error to step
Headroom float64 = 0.8 // solver headroom, (Gustafsson, 1992, Control of Error and Convergence in ODE Solvers)
LastErr, PeakErr float64 // error of last step, highest error ever
LastTorque float64 // maxTorque of last time step
Expand All @@ -39,6 +41,7 @@ func init() {
DeclVar("MinDt", &MinDt, "Minimum time step the solver can take (s)")
DeclVar("MaxDt", &MaxDt, "Maximum time step the solver can take (s)")
DeclVar("MaxErr", &MaxErr, "Maximum error per step the solver can tolerate (default = 1e-5)")
DeclVar("RelErr", &RelErr, "Maximum relative error per step the solver can tolerate (default = 1e-4)")
DeclVar("Headroom", &Headroom, "Solver headroom (default = 0.8)")
DeclVar("FixDt", &FixDt, "Set a fixed time step, 0 disables fixed step (which is the default)")
DeclFunc("Exit", Exit, "Exit from the program")
Expand Down