Skip to content

Commit

Permalink
Add relative error check in adaptive time stepping algorithm for rk23…
Browse files Browse the repository at this point in the history
…, rk45dp and rk56 time steppers
  • Loading branch information
xfong committed Jun 6, 2022
1 parent 1413b1f commit 479e821
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 32 deletions.
14 changes: 14 additions & 0 deletions cuda/vecnorm.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include "float3.h"

// dst = sqrt(ax*ax + ay*ay + az*az)
extern "C" __global__ void
vecnorm(float* __restrict__ dst,
float* __restrict__ ax, float* __restrict__ ay, float* __restrict__ az,
int N) {

int i = ( blockIdx.y*gridDim.x + blockIdx.x ) * blockDim.x + threadIdx.x;
if (i < N) {
float3 A = {ax[i], ay[i], az[i]};
dst[i] = sqrtf(dot(A, A));
}
}
18 changes: 18 additions & 0 deletions cuda/vecnorm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package cuda

import (
"github.com/mumax/3/data"
"github.com/mumax/3/util"
)

// dst = sqrt(dot(a, a)),
func VecNorm(dst *data.Slice, a *data.Slice) {
util.Argument(dst.NComp() == 1 && a.NComp() == 3)
util.Argument(dst.Len() == a.Len())

N := dst.Len()
cfg := make1DConf(N)
k_vecnorm_async(dst.DevPtr(0),
a.DevPtr(X), a.DevPtr(Y), a.DevPtr(Z),
N, cfg)
}
45 changes: 38 additions & 7 deletions engine/rk23.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,44 @@ func (rk *RK23) Step() {

// adjust next time step
if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
// step OK
setLastErr(err)
setMaxTorque(k4)
NSteps++
Time = t0 + Dt_si
adaptDt(math.Pow(MaxErr/err, 1./3.))
data.Copy(rk.k1, k4) // FSAL
// Passed absolute error. Check relative error...
errnorm := cuda.Buffer(1, size)
defer cuda.Recycle(errnorm)
cuda.VecNorm(errnorm, Err)
ddtnorm := cuda.Buffer(1, size)
defer cuda.Recycle(ddtnorm)
cuda.VecNorm(ddtnorm, k4)
maxdm := cuda.MaxVecNorm(k4)
fail := 0
rlerr := float64(0.0)
if maxdm < MinSlope { // Only step using relerr if dmdt is big enough. Overcomes equilibrium problem
fail = 0
} else {
cuda.Div(errnorm, errnorm, ddtnorm) //re-use errnorm
rlerr = float64(cuda.MaxAbs(errnorm))
fail = 1
}
if fail == 0 || RelErr <= 0.0 || rlerr < RelErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
// step OK
setLastErr(err)
setMaxTorque(k4)
NSteps++
Time = t0 + Dt_si
if fail == 0 {
adaptDt(math.Pow(MaxErr/err, 1./3.))
} else {
adaptDt(math.Pow(RelErr/rlerr, 1./3.))
}
data.Copy(rk.k1, k4) // FSAL
} else {
// undo bad step
//util.Println("Bad step at t=", t0, ", err=", err)
util.Assert(FixDt == 0)
Time = t0
data.Copy(m, m0)
NUndone++
adaptDt(math.Pow(RelErr/rlerr, 1./4.))
}
} else {
// undo bad step
//util.Println("Bad step at t=", t0, ", err=", err)
Expand Down
45 changes: 38 additions & 7 deletions engine/rk45dp.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,44 @@ func (rk *RK45DP) Step() {

// adjust next time step
if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
// step OK
setLastErr(err)
setMaxTorque(k7)
NSteps++
Time = t0 + Dt_si
adaptDt(math.Pow(MaxErr/err, 1./5.))
data.Copy(rk.k1, k7) // FSAL
//Passed absolute error. Check relative error...
errnorm := cuda.Buffer(1, size)
defer cuda.Recycle(errnorm)
cuda.VecNorm(errnorm, Err)
ddtnorm := cuda.Buffer(1, size)
defer cuda.Recycle(ddtnorm)
cuda.VecNorm(ddtnorm, k7)
maxdm := cuda.MaxVecNorm(k7)
fail := 0
rlerr := float64(0.0)
if maxdm < MinSlope { // Only step using relerr if dmdt is big enough. Overcomes equilibrium problem
fail = 0
} else {
cuda.Div(errnorm, errnorm, ddtnorm) //re-use errnorm
rlerr = float64(cuda.MaxAbs(errnorm))
fail = 1
}
if fail == 0 || RelErr <= 0.0 || rlerr < RelErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
// step OK
setLastErr(err)
setMaxTorque(k7)
NSteps++
Time = t0 + Dt_si
if fail == 0 {
adaptDt(math.Pow(MaxErr/err, 1./5.))
} else {
adaptDt(math.Pow(RelErr/rlerr, 1./5.))
}
data.Copy(rk.k1, k7) // FSAL
} else {
// undo bad step
//util.Println("Bad step at t=", t0, ", err=", err)
util.Assert(FixDt == 0)
Time = t0
data.Copy(m, m0)
NUndone++
adaptDt(math.Pow(RelErr/rlerr, 1./6.))
}
} else {
// undo bad step
//util.Println("Bad step at t=", t0, ", err=", err)
Expand Down
81 changes: 63 additions & 18 deletions engine/rk56.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
)

type RK56 struct {
k1 *data.Slice // torque at end of step is kept for beginning of next step
}

func (rk *RK56) Step() {
Expand All @@ -19,14 +20,24 @@ func (rk *RK56) Step() {
Dt_si = FixDt
}

// upon resize: remove wrongly sized k1
if rk.k1.Size() != m.Size() {
rk.Free()
}

// first step ever: one-time k1 init and eval
if rk.k1 == nil {
rk.k1 = cuda.NewSlice(3, size)
torqueFn(rk.k1)
}

t0 := Time
// backup magnetization
m0 := cuda.Buffer(3, size)
defer cuda.Recycle(m0)
data.Copy(m0, m)

k1, k2, k3, k4, k5, k6, k7, k8 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size)
defer cuda.Recycle(k1)
k2, k3, k4, k5, k6, k7, k8 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size)
defer cuda.Recycle(k2)
defer cuda.Recycle(k3)
defer cuda.Recycle(k4)
Expand All @@ -39,73 +50,105 @@ func (rk *RK56) Step() {
h := float32(Dt_si * GammaLL) // internal time step = Dt * gammaLL

// stage 1
torqueFn(k1)
torqueFn(rk.k1)

// stage 2
Time = t0 + (1./6.)*Dt_si
cuda.Madd2(m, m, k1, 1, (1./6.)*h) // m = m*1 + k1*h/6
cuda.Madd2(m, m, rk.k1, 1, (1./6.)*h) // m = m*1 + k1*h/6
M.normalize()
torqueFn(k2)

// stage 3
Time = t0 + (4./15.)*Dt_si
cuda.Madd3(m, m0, k1, k2, 1, (4./75.)*h, (16./75.)*h)
cuda.Madd3(m, m0, rk.k1, k2, 1, (4./75.)*h, (16./75.)*h)
M.normalize()
torqueFn(k3)

// stage 4
Time = t0 + (2./3.)*Dt_si
cuda.Madd4(m, m0, k1, k2, k3, 1, (5./6.)*h, (-8./3.)*h, (5./2.)*h)
cuda.Madd4(m, m0, rk.k1, k2, k3, 1, (5./6.)*h, (-8./3.)*h, (5./2.)*h)
M.normalize()
torqueFn(k4)

// stage 5
Time = t0 + (4./5.)*Dt_si
cuda.Madd5(m, m0, k1, k2, k3, k4, 1, (-8./5.)*h, (144./25.)*h, (-4.)*h, (16./25.)*h)
cuda.Madd5(m, m0, rk.k1, k2, k3, k4, 1, (-8./5.)*h, (144./25.)*h, (-4.)*h, (16./25.)*h)
M.normalize()
torqueFn(k5)

// stage 6
Time = t0 + (1.)*Dt_si
cuda.Madd6(m, m0, k1, k2, k3, k4, k5, 1, (361./320.)*h, (-18./5.)*h, (407./128.)*h, (-11./80.)*h, (55./128.)*h)
cuda.Madd6(m, m0, rk.k1, k2, k3, k4, k5, 1, (361./320.)*h, (-18./5.)*h, (407./128.)*h, (-11./80.)*h, (55./128.)*h)
M.normalize()
torqueFn(k6)

// stage 7
Time = t0
cuda.Madd5(m, m0, k1, k3, k4, k5, 1, (-11./640.)*h, (11./256.)*h, (-11/160.)*h, (11./256.)*h)
cuda.Madd5(m, m0, rk.k1, k3, k4, k5, 1, (-11./640.)*h, (11./256.)*h, (-11/160.)*h, (11./256.)*h)
M.normalize()
torqueFn(k7)

// stage 8
Time = t0 + (1.)*Dt_si
cuda.Madd7(m, m0, k1, k2, k3, k4, k5, k7, 1, (93./640.)*h, (-18./5.)*h, (803./256.)*h, (-11./160.)*h, (99./256.)*h, (1.)*h)
cuda.Madd7(m, m0, rk.k1, k2, k3, k4, k5, k7, 1, (93./640.)*h, (-18./5.)*h, (803./256.)*h, (-11./160.)*h, (99./256.)*h, (1.)*h)
M.normalize()
torqueFn(k8)

// stage 9: 6th order solution
Time = t0 + (1.)*Dt_si
//madd6(m, m0, k1, k3, k4, k5, k6, 1, (31./384.)*h, (1125./2816.)*h, (9./32.)*h, (125./768.)*h, (5./66.)*h)
cuda.Madd7(m, m0, k1, k3, k4, k5, k7, k8, 1, (7./1408.)*h, (1125./2816.)*h, (9./32.)*h, (125./768.)*h, (5./66.)*h, (5./66.)*h)
cuda.Madd7(m, m0, rk.k1, k3, k4, k5, k7, k8, 1, (7./1408.)*h, (1125./2816.)*h, (9./32.)*h, (125./768.)*h, (5./66.)*h, (5./66.)*h)
M.normalize()
torqueFn(k2) // re-use k2

// error estimate
Err := cuda.Buffer(3, size)
defer cuda.Recycle(Err)
cuda.Madd4(Err, k1, k6, k7, k8, (-5. / 66.), (-5. / 66.), (5. / 66.), (5. / 66.))
cuda.Madd4(Err, rk.k1, k6, k7, k8, (-5. / 66.), (-5. / 66.), (5. / 66.), (5. / 66.))

// determine error
err := cuda.MaxVecNorm(Err) * float64(h)

// adjust next time step
if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
// step OK
setLastErr(err)
setMaxTorque(k2)
NSteps++
Time = t0 + Dt_si
adaptDt(math.Pow(MaxErr/err, 1./6.))
//Passed absolute error. Check relative error...
errnorm := cuda.Buffer(1, size)
defer cuda.Recycle(errnorm)
cuda.VecNorm(errnorm, Err)
ddtnorm := cuda.Buffer(1, size)
defer cuda.Recycle(ddtnorm)
cuda.VecNorm(ddtnorm, k2)
maxdm := cuda.MaxVecNorm(k2)
fail := 0
rlerr := float64(0.0)
if maxdm < MinSlope { // Only step using relerr if dmdt is big enough. Overcomes equilibrium problem
fail = 0
} else {
cuda.Div(errnorm, errnorm, ddtnorm) //re-use errnorm
rlerr = float64(cuda.MaxAbs(errnorm))
fail = 1
}
if fail == 0 || RelErr <= 0.0 || rlerr < RelErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
// step OK
setLastErr(err)
setMaxTorque(k2)
NSteps++
Time = t0 + Dt_si
if fail == 0 {
adaptDt(math.Pow(MaxErr/err, 1./6.))
} else {
adaptDt(math.Pow(RelErr/rlerr, 1./6.))
}
data.Copy(rk.k1, k2) // FSAL
} else {
// undo bad step
//util.Println("Bad step at t=", t0, ", err=", err)
util.Assert(FixDt == 0)
Time = t0
data.Copy(m, m0)
NUndone++
adaptDt(math.Pow(RelErr/rlerr, 1./7.))
}
} else {
// undo bad step
//util.Println("Bad step at t=", t0, ", err=", err)
Expand All @@ -118,4 +161,6 @@ func (rk *RK56) Step() {
}

func (rk *RK56) Free() {
rk.k1.Free()
rk.k1 = nil
}
3 changes: 3 additions & 0 deletions engine/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ var (
Dt_si float64 = 1e-15 // time step = dt_si (seconds) *dt_mul, which should be nice float32
MinDt, MaxDt float64 // minimum and maximum time step
MaxErr float64 = 1e-5 // maximum error/step
RelErr float64 = 1e-4 // maximum relative error/step
MinSlope float64 = 1e8 // minimum delta per unit time before using relative error to step
Headroom float64 = 0.8 // solver headroom, (Gustafsson, 1992, Control of Error and Convergence in ODE Solvers)
LastErr, PeakErr float64 // error of last step, highest error ever
LastTorque float64 // maxTorque of last time step
Expand All @@ -39,6 +41,7 @@ func init() {
DeclVar("MinDt", &MinDt, "Minimum time step the solver can take (s)")
DeclVar("MaxDt", &MaxDt, "Maximum time step the solver can take (s)")
DeclVar("MaxErr", &MaxErr, "Maximum error per step the solver can tolerate (default = 1e-5)")
DeclVar("RelErr", &RelErr, "Maximum relative error per step the solver can tolerate (default = 1e-4)")
DeclVar("Headroom", &Headroom, "Solver headroom (default = 0.8)")
DeclVar("FixDt", &FixDt, "Set a fixed time step, 0 disables fixed step (which is the default)")
DeclFunc("Exit", Exit, "Exit from the program")
Expand Down

0 comments on commit 479e821

Please sign in to comment.