-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdiff_3D_linstep_multixpu.jl
167 lines (156 loc) · 7.38 KB
/
diff_3D_linstep_multixpu.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
const use_return = haskey(ENV, "USE_RETURN" ) ? parse(Bool, ENV["USE_RETURN"] ) : false
const USE_GPU = haskey(ENV, "USE_GPU" ) ? parse(Bool, ENV["USE_GPU"] ) : false
const do_viz = haskey(ENV, "DO_VIZ" ) ? parse(Bool, ENV["DO_VIZ"] ) : true
const do_save = haskey(ENV, "DO_SAVE" ) ? parse(Bool, ENV["DO_SAVE"] ) : false
const do_save_viz = haskey(ENV, "DO_SAVE_VIZ") ? parse(Bool, ENV["DO_SAVE_VIZ"]) : false
const nx = haskey(ENV, "NX" ) ? parse(Int , ENV["NX"] ) : 64
const ny = haskey(ENV, "NY" ) ? parse(Int , ENV["NY"] ) : 64
const nz = haskey(ENV, "NZ" ) ? parse(Int , ENV["NZ"] ) : 64
###
using ParallelStencil
using ParallelStencil.FiniteDifferences3D
@static if USE_GPU
@init_parallel_stencil(CUDA, Float64, 3)
else
@init_parallel_stencil(Threads, Float64, 3)
end
using ImplicitGlobalGrid, Plots, Printf, LinearAlgebra, MAT
import MPI
norm_g(A) = (sum2_l = sum(A.^2); sqrt(MPI.Allreduce(sum2_l, MPI.SUM, MPI.COMM_WORLD)))
@views inn(A) = A[2:end-1,2:end-1,2:end-1]
@parallel function compute_iter_params!(dτ_ρ, D, Re, Vpdτ, max_lxyz)
@all(dτ_ρ) = Vpdτ * max_lxyz / @maxloc(D) / Re
return
end
@parallel function compute_flux!(qHx, qHy, qHz, qHx2, qHy2, qHz2, H, D, θr_dτ, dx, dy, dz)
@all(qHx) = (@all(qHx) * θr_dτ - @av_xi(D) * @d_xi(H) / dx) / (1.0 + θr_dτ)
@all(qHy) = (@all(qHy) * θr_dτ - @av_yi(D) * @d_yi(H) / dy) / (1.0 + θr_dτ)
@all(qHz) = (@all(qHz) * θr_dτ - @av_zi(D) * @d_zi(H) / dz) / (1.0 + θr_dτ)
@all(qHx2) = -@av_xi(D) * @d_xi(H) / dx
@all(qHy2) = -@av_yi(D) * @d_yi(H) / dy
@all(qHz2) = -@av_zi(D) * @d_zi(H) / dz
return
end
@parallel function compute_update!(H, Hold, qHx, qHy, qHz, dτ_ρ, dt, dx, dy, dz)
@inn(H) = (@inn(H) + @all(dτ_ρ) * (@inn(Hold) / dt - (@d_xa(qHx) / dx + @d_ya(qHy) / dy + @d_za(qHz)/dz))) / (1.0 + @all(dτ_ρ) / dt)
return
end
@parallel function check_res!(ResH, H, Hold, qHx2, qHy2, qHz2, dt, dx, dy, dz)
@all(ResH) = -(@inn(H) - @inn(Hold)) / dt - (@d_xa(qHx2) / dx + @d_ya(qHy2) / dy + @d_za(qHz2) / dz)
return
end
@parallel_indices (iy,iz) function bc_x!(A)
A[1 , iy, iz] = A[2 , iy, iz]
A[end, iy, iz] = A[end-1, iy, iz]
return
end
@parallel_indices (ix,iz) function bc_y!(A)
A[ix, 1 , iz] = A[ix, 2 , iz]
A[ix, end, iz] = A[ix, end-1, iz]
return
end
@parallel_indices (ix,iy) function bc_z!(A)
A[ix, iy, 1 ] = A[ix, iy, 2 ]
A[ix, iy, end] = A[ix, iy, end-1]
return
end
@views function diffusion_3D_()
# Physics
lx, ly, lz = 10.0, 10.0, 10.0 # domain size
D1 = 1.0 # diffusion coefficient
D2 = 1e-4 # diffusion coefficient
ttot = 1.0 # total simulation time
dt = 0.2 # physical time step
# Numerics
tol = 1e-8 # tolerance
itMax = 1e5 # max number of iterations
nout = 10 # tol check
CFL = 1/sqrt(3) # CFL number
me, dims = init_global_grid(nx, ny, nz) # MPI initialisation
b_width = (8, 4, 4) # boundary width for comm/comp overlap
# Derived numerics
dx, dy, dz = lx/nx_g(), ly/ny_g(), lz/nz_g() # cell sizes
Vpdτ = CFL * min(dx, dy, dz)
max_lxyz = max(lx, ly, lz)
Re = π + sqrt(π^2 + (max_lxyz^2 / max(D1,D2)) / dt)
θr_dτ = max_lxyz / Vpdτ / Re
xc, yc, zc = LinRange(dx/2, lx-dx/2, nx), LinRange(dy/2, ly-dy/2, ny), LinRange(dz/2, lz-dz/2, nz)
# Array allocation
qHx = @zeros(nx-1,ny-2,nz-2)
qHy = @zeros(nx-2,ny-1,nz-2)
qHz = @zeros(nx-2,ny-2,nz-1)
qHx2 = @zeros(nx-1,ny-2,nz-2)
qHy2 = @zeros(nx-2,ny-1,nz-2)
qHz2 = @zeros(nx-2,ny-2,nz-1)
ResH = @zeros(nx-2,ny-2,nz-2)
dτ_ρ = @zeros(nx-2,ny-2,nz-2)
# Initial condition
H0 = zeros(nx,ny,nz)
D = D1*ones(nx,ny,nz)
Tmp = [x_g(ix,dx,H0) for ix=1:size(H0,1), iy=1:size(H0,2), iz=1:size(H0,3)]
D[Tmp.<lx/2.2] .= D2
Tmp = [y_g(iy,dy,H0) for ix=1:size(H0,1), iy=1:size(H0,2), iz=1:size(H0,3)]
D[Tmp.<ly/2.2] .= D2
D = Data.Array(D)
H0 = Data.Array([exp(-(x_g(ix,dx,H0)-0.5*lx+dx/2)*(x_g(ix,dx,H0)-0.5*lx+dx/2) - (y_g(iy,dy,H0)-0.5*ly+dy/2)*(y_g(iy,dy,H0)-0.5*ly+dy/2) - (z_g(iz,dz,H0)-0.5*lz+dz/2)*(z_g(iz,dz,H0)-0.5*lz+dz/2)) for ix=1:size(H0,1), iy=1:size(H0,2), iz=1:size(H0,3)])
Hold = @ones(nx,ny,nz) .* H0
H = @ones(nx,ny,nz) .* H0
@parallel compute_iter_params!(dτ_ρ, D, Re, Vpdτ, max_lxyz)
len_ResH_g = ((nx-2-2)*dims[1]+2)*((ny-2-2)*dims[2]+2)*((nz-2-2)*dims[3]+2)
if do_viz || do_save_viz
if (me==0) ENV["GKSwstype"]="nul"; if do_viz !ispath("../../figures") && mkdir("../../figures") end; end
nx_v, ny_v, nz_v = (nx-2)*dims[1], (ny-2)*dims[2], (nz-2)*dims[3]
if (nx_v*ny_v*nz_v*sizeof(Data.Number) > 0.8*Sys.free_memory()) error("Not enough memory for visualization.") end
H_v = zeros(nx_v, ny_v, nz_v) # global array for visu
H_inn = zeros(nx-2, ny-2, nz-2) # no halo local array for visu
z_sl = Int(ceil(nz_g()/2)) # Central z-slice
Xi_g, Yi_g = dx+dx/2:dx:lx-dx-dx/2, dy+dy/2:dy:ly-dy-dy/2 # inner points only
end
t = 0.0; it = 0; ittot = 0; nt = Int(ceil(ttot/dt))
# Physical time loop
while it<nt
iter = 0; err = 2 * tol
# Pseudo-transient iteration
while err>tol && iter<itMax
@parallel compute_flux!(qHx, qHy, qHz, qHx2, qHy2, qHz2, H, D, θr_dτ, dx, dy, dz)
@hide_communication b_width begin # communication/computation overlap
@parallel compute_update!(H, Hold, qHx, qHy, qHz, dτ_ρ, dt, dx, dy, dz)
update_halo!(H)
end
iter += 1
if iter % nout == 0
@parallel check_res!(ResH, H, Hold, qHx2, qHy2, qHz2, dt, dx, dy, dz)
err = norm_g(ResH) / sqrt(len_ResH_g)
end
end
ittot += iter; it += 1; t += dt
Hold .= H
if isnan(err) error("NaN") end
end
if (me==0) @printf("Total time = %1.2f, time steps = %d, nx = %d, iterations tot = %d \n", round(ttot, sigdigits=2), it, nx_g(), ittot) end
# Visualise
if do_viz || do_save_viz
H_inn .= Array(inn(H)); gather!(H_inn, H_v)
if me==0 && do_viz
heatmap(Xi_g, Yi_g, H_v[:,:,z_sl]', dpi=150, aspect_ratio=1, framestyle=:box, xlims=(Xi_g[1],Xi_g[end]), ylims=(Yi_g[1],Yi_g[end]), xlabel="lx", ylabel="ly", c=:viridis, clims=(0,1), title="linear step diffusion (nt=$it, iters=$ittot)")
savefig("../../figures/diff_3D_linstep_$(nx_g()).png")
end
end
if me==0 && do_save
!ispath("../../output") && mkdir("../../output")
open("../../output/out_diff_3D_linstep.txt","a") do io
println(io, "$(nx_g()) $(ny_g()) $(nz_g()) $(ittot) $(nt)")
end
end
if me==0 && do_save_viz
!ispath("../../out_visu") && mkdir("../../out_visu")
matwrite("../../out_visu/diff_3D_linstep.mat", Dict("H_3D"=> Array(H_v), "xc_3D"=> Array(xc), "yc_3D"=> Array(yc), "zc_3D"=> Array(zc)); compress = true)
end
finalize_global_grid()
return xc, yc, zc, H
end
if use_return
xc, yc, zc, H = diffusion_3D_();
else
diffusion_3D = begin diffusion_3D_(); return; end
end