Skip to content

Commit

Permalink
astyle
Browse files Browse the repository at this point in the history
  • Loading branch information
barnex committed Sep 30, 2016
1 parent 9859625 commit 6df8d0a
Show file tree
Hide file tree
Showing 9 changed files with 3,426 additions and 3,782 deletions.
214 changes: 107 additions & 107 deletions cuda/dmi.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,123 +16,123 @@ adddmi(float* __restrict__ Hx, float* __restrict__ Hy, float* __restrict__ Hz,
float* __restrict__ aLUT2d, float* __restrict__ dLUT2d, uint8_t* __restrict__ regions,
float cx, float cy, float cz, int Nx, int Ny, int Nz, uint8_t PBC) {

int ix = blockIdx.x * blockDim.x + threadIdx.x;
int iy = blockIdx.y * blockDim.y + threadIdx.y;
int iz = blockIdx.z * blockDim.z + threadIdx.z;
int ix = blockIdx.x * blockDim.x + threadIdx.x;
int iy = blockIdx.y * blockDim.y + threadIdx.y;
int iz = blockIdx.z * blockDim.z + threadIdx.z;

if (ix >= Nx || iy >= Ny || iz >= Nz) {
return;
}
if (ix >= Nx || iy >= Ny || iz >= Nz) {
return;
}

int I = idx(ix, iy, iz); // central cell index
float3 h = make_float3(Hx[I], Hy[I], Hz[I]); // add to H
float3 m0 = make_float3(mx[I], my[I], mz[I]); // central m
uint8_t r0 = regions[I];
int i_; // neighbor index
int I = idx(ix, iy, iz); // central cell index
float3 h = make_float3(Hx[I], Hy[I], Hz[I]); // add to H
float3 m0 = make_float3(mx[I], my[I], mz[I]); // central m
uint8_t r0 = regions[I];
int i_; // neighbor index

if(is0(m0)) {
return;
}
if(is0(m0)) {
return;
}

// x derivatives (along length)
{
float3 m1 = make_float3(0.0f, 0.0f, 0.0f); // left neighbor
i_ = idx(lclampx(ix-1), iy, iz); // load neighbor m if inside grid, keep 0 otherwise
if (ix-1 >= 0 || PBCx) {
m1 = make_float3(mx[i_], my[i_], mz[i_]);
}
float A1 = aLUT2d[symidx(r0, regions[i_])]; // inter-region Aex
float D1 = dLUT2d[symidx(r0, regions[i_])]; // inter-region Dex
if (is0(m1)) { // neighbor missing
m1.x = m0.x - (-cx * (0.5f*D1/A1) * m0.z); // extrapolate missing m from BC's
m1.y = m0.y;
m1.z = m0.z + (-cx * (0.5f*D1/A1) * m0.x);
}
h += (2.0f*A1/(cx*cx)) * (m1 - m0); // exchange
h.x += (D1/cx)*(m0.z - m1.z); // DM (first 1/2 contribution, 2*D * deltaM / (2*c))
h.z -= (D1/cx)*(m0.x - m1.x);
}
// x derivatives (along length)
{
float3 m1 = make_float3(0.0f, 0.0f, 0.0f); // left neighbor
i_ = idx(lclampx(ix-1), iy, iz); // load neighbor m if inside grid, keep 0 otherwise
if (ix-1 >= 0 || PBCx) {
m1 = make_float3(mx[i_], my[i_], mz[i_]);
}
float A1 = aLUT2d[symidx(r0, regions[i_])]; // inter-region Aex
float D1 = dLUT2d[symidx(r0, regions[i_])]; // inter-region Dex
if (is0(m1)) { // neighbor missing
m1.x = m0.x - (-cx * (0.5f*D1/A1) * m0.z); // extrapolate missing m from BC's
m1.y = m0.y;
m1.z = m0.z + (-cx * (0.5f*D1/A1) * m0.x);
}
h += (2.0f*A1/(cx*cx)) * (m1 - m0); // exchange
h.x += (D1/cx)*(m0.z - m1.z); // DM (first 1/2 contribution, 2*D * deltaM / (2*c))
h.z -= (D1/cx)*(m0.x - m1.x);
}

{
float3 m2 = make_float3(0.0f, 0.0f, 0.0f); // right neighbor
i_ = idx(hclampx(ix+1), iy, iz);
if (ix+1 < Nx || PBCx) {
m2 = make_float3(mx[i_], my[i_], mz[i_]);
}
float A2 = aLUT2d[symidx(r0, regions[i_])];
float D2 = dLUT2d[symidx(r0, regions[i_])];
if (is0(m2)) {
m2.x = m0.x - (cx * (0.5f*D2/A2) * m0.z);
m2.y = m0.y;
m2.z = m0.z + (cx * (0.5f*D2/A2) * m0.x);
}
h += (2.0f*A2/(cx*cx)) * (m2 - m0);
h.x += (D2/cx)*(m2.z - m0.z);
h.z -= (D2/cx)*(m2.x - m0.x);
}
{
float3 m2 = make_float3(0.0f, 0.0f, 0.0f); // right neighbor
i_ = idx(hclampx(ix+1), iy, iz);
if (ix+1 < Nx || PBCx) {
m2 = make_float3(mx[i_], my[i_], mz[i_]);
}
float A2 = aLUT2d[symidx(r0, regions[i_])];
float D2 = dLUT2d[symidx(r0, regions[i_])];
if (is0(m2)) {
m2.x = m0.x - (cx * (0.5f*D2/A2) * m0.z);
m2.y = m0.y;
m2.z = m0.z + (cx * (0.5f*D2/A2) * m0.x);
}
h += (2.0f*A2/(cx*cx)) * (m2 - m0);
h.x += (D2/cx)*(m2.z - m0.z);
h.z -= (D2/cx)*(m2.x - m0.x);
}

// y derivatives (along height)
{
float3 m1 = make_float3(0.0f, 0.0f, 0.0f);
i_ = idx(ix, lclampy(iy-1), iz);
if (iy-1 >= 0 || PBCy) {
m1 = make_float3(mx[i_], my[i_], mz[i_]);
}
float A1 = aLUT2d[symidx(r0, regions[i_])];
float D1 = dLUT2d[symidx(r0, regions[i_])];
if (is0(m1)) {
m1.x = m0.x;
m1.y = m0.y - (-cy * (0.5f*D1/A1) * m0.z);
m1.z = m0.z + (-cy * (0.5f*D1/A1) * m0.y);
}
h += (2.0f*A1/(cy*cy)) * (m1 - m0);
h.y += (D1/cy)*(m0.z - m1.z);
h.z -= (D1/cy)*(m0.y - m1.y);
}
// y derivatives (along height)
{
float3 m1 = make_float3(0.0f, 0.0f, 0.0f);
i_ = idx(ix, lclampy(iy-1), iz);
if (iy-1 >= 0 || PBCy) {
m1 = make_float3(mx[i_], my[i_], mz[i_]);
}
float A1 = aLUT2d[symidx(r0, regions[i_])];
float D1 = dLUT2d[symidx(r0, regions[i_])];
if (is0(m1)) {
m1.x = m0.x;
m1.y = m0.y - (-cy * (0.5f*D1/A1) * m0.z);
m1.z = m0.z + (-cy * (0.5f*D1/A1) * m0.y);
}
h += (2.0f*A1/(cy*cy)) * (m1 - m0);
h.y += (D1/cy)*(m0.z - m1.z);
h.z -= (D1/cy)*(m0.y - m1.y);
}

{
float3 m2 = make_float3(0.0f, 0.0f, 0.0f);
i_ = idx(ix, hclampy(iy+1), iz);
if (iy+1 < Ny || PBCy) {
m2 = make_float3(mx[i_], my[i_], mz[i_]);
}
float A2 = aLUT2d[symidx(r0, regions[i_])];
float D2 = dLUT2d[symidx(r0, regions[i_])];
if (is0(m2)) {
m2.x = m0.x;
m2.y = m0.y - (cy * (0.5f*D2/A2) * m0.z);
m2.z = m0.z + (cy * (0.5f*D2/A2) * m0.y);
}
h += (2.0f*A2/(cy*cy)) * (m2 - m0);
h.y += (D2/cy)*(m2.z - m0.z);
h.z -= (D2/cy)*(m2.y - m0.y);
}
{
float3 m2 = make_float3(0.0f, 0.0f, 0.0f);
i_ = idx(ix, hclampy(iy+1), iz);
if (iy+1 < Ny || PBCy) {
m2 = make_float3(mx[i_], my[i_], mz[i_]);
}
float A2 = aLUT2d[symidx(r0, regions[i_])];
float D2 = dLUT2d[symidx(r0, regions[i_])];
if (is0(m2)) {
m2.x = m0.x;
m2.y = m0.y - (cy * (0.5f*D2/A2) * m0.z);
m2.z = m0.z + (cy * (0.5f*D2/A2) * m0.y);
}
h += (2.0f*A2/(cy*cy)) * (m2 - m0);
h.y += (D2/cy)*(m2.z - m0.z);
h.z -= (D2/cy)*(m2.y - m0.y);
}

// only take vertical derivative for 3D sim
if (Nz != 1) {
// bottom neighbor
{
i_ = idx(ix, iy, lclampz(iz-1));
float3 m1 = make_float3(mx[i_], my[i_], mz[i_]);
m1 = ( is0(m1)? m0: m1 ); // Neumann BC
float A1 = aLUT2d[symidx(r0, regions[i_])];
h += (2.0f*A1/(cz*cz)) * (m1 - m0); // Exchange only
}
// only take vertical derivative for 3D sim
if (Nz != 1) {
// bottom neighbor
{
i_ = idx(ix, iy, lclampz(iz-1));
float3 m1 = make_float3(mx[i_], my[i_], mz[i_]);
m1 = ( is0(m1)? m0: m1 ); // Neumann BC
float A1 = aLUT2d[symidx(r0, regions[i_])];
h += (2.0f*A1/(cz*cz)) * (m1 - m0); // Exchange only
}

// top neighbor
{
i_ = idx(ix, iy, hclampz(iz+1));
float3 m2 = make_float3(mx[i_], my[i_], mz[i_]);
m2 = ( is0(m2)? m0: m2 );
float A2 = aLUT2d[symidx(r0, regions[i_])];
h += (2.0f*A2/(cz*cz)) * (m2 - m0);
}
}
// top neighbor
{
i_ = idx(ix, iy, hclampz(iz+1));
float3 m2 = make_float3(mx[i_], my[i_], mz[i_]);
m2 = ( is0(m2)? m0: m2 );
float A2 = aLUT2d[symidx(r0, regions[i_])];
h += (2.0f*A2/(cz*cz)) * (m2 - m0);
}
}

// write back, result is H + Hdmi + Hex
Hx[I] = h.x;
Hy[I] = h.y;
Hz[I] = h.z;
// write back, result is H + Hdmi + Hex
Hx[I] = h.x;
Hy[I] = h.y;
Hz[I] = h.z;
}

// Note on boundary conditions.
Expand Down
Loading

0 comments on commit 6df8d0a

Please sign in to comment.