diff --git a/src/TrixiGPU.jl b/src/TrixiGPU.jl index 2f8c341..0efcfa7 100644 --- a/src/TrixiGPU.jl +++ b/src/TrixiGPU.jl @@ -13,7 +13,7 @@ using Trixi: AbstractEquations, TreeMesh, DGSEM, True, False, wrap_array, compute_coefficients, have_nonconservative_terms, boundary_condition_periodic, - set_log_type, set_sqrt_type + set_log_type!, set_sqrt_type! import Trixi: get_node_vars, get_node_coords, get_surface_node_vars @@ -27,8 +27,8 @@ using StaticArrays: SVector include("auxiliary/auxiliary.jl") include("solvers/solvers.jl") -set_log_type("log_Base") -set_sqrt_type("sqrt_Base") +set_log_type!("log_Base") +set_sqrt_type!("sqrt_Base") # Export the public APIs export semidiscretize_gpu diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl index d8b41f5..1938552 100644 --- a/src/solvers/dg_1d.jl +++ b/src/solvers/dg_1d.jl @@ -5,7 +5,7 @@ # the device (i.e., GPU). # Kernel for calculating fluxes along normal direction -function flux_kernel!(flux_arr, u, equations::AbstractEquations{1}, flux::Function) +function flux_kernel!(flux_arr, u, equations::AbstractEquations{1}, flux::Any) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y @@ -43,7 +43,7 @@ end # Kernel for calculating volume fluxes function volume_flux_kernel!(volume_flux_arr, u, equations::AbstractEquations{1}, - volume_flux::Function) + volume_flux::Any) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y @@ -68,8 +68,8 @@ end # Kernel for calculating symmetric and nonconservative fluxes function symmetric_noncons_flux_kernel!(symmetric_flux_arr, noncons_flux_arr, u, derivative_split, - equations::AbstractEquations{1}, symmetric_flux::Function, - nonconservative_flux::Function) + equations::AbstractEquations{1}, symmetric_flux::Any, + nonconservative_flux::Any) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y @@ -574,8 +574,6 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{1}, boundary_conditions::NamedTup indices_arr = CuArray{Int64}([firsts[1], firsts[2]]) boundary_conditions_callable = replace_boundary_conditions(boundary_conditions) - size_arr = CuArray{Float64}(undef, length(boundary_arr)) - boundary_flux_kernel = @cuda launch=false boundary_flux_kernel!(surface_flux_values, boundaries_u, node_coordinates, t, boundary_arr, indices_arr, @@ -587,7 +585,7 @@ function cuda_boundary_flux!(t, mesh::TreeMesh{1}, boundary_conditions::NamedTup boundary_flux_kernel(surface_flux_values, boundaries_u, node_coordinates, t, boundary_arr, indices_arr, neighbor_ids, neighbor_sides, orientations, boundary_conditions_callable, equations, surface_flux; - configurator_1d(boundary_flux_kernel, size_arr)...) + configurator_1d(boundary_flux_kernel, boundary_arr)...) cache.elements.surface_flux_values = surface_flux_values # copy back to host automatically @@ -602,13 +600,11 @@ function cuda_surface_integral!(du, mesh::TreeMesh{1}, equations, dg::DGSEM, cac ]) surface_flux_values = CuArray{Float64}(cache.elements.surface_flux_values) - size_arr = CuArray{Float64}(undef, size(du, 1), size(du, 2), size(du, 3)) - surface_integral_kernel = @cuda launch=false surface_integral_kernel!(du, factor_arr, surface_flux_values, equations) surface_integral_kernel(du, factor_arr, surface_flux_values, equations; - configurator_3d(surface_integral_kernel, size_arr)...) + configurator_3d(surface_integral_kernel, du)...) return nothing end @@ -645,8 +641,8 @@ end # Put everything together into a single function # Ref: `rhs!` function in Trixi.jl -function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{1}, equations, initial_condition, - boundary_conditions, source_terms::Source, dg::DGSEM, cache) where {Source} +function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{1}, equations, boundary_conditions, + source_terms::Source, dg::DGSEM, cache) where {Source} du, u = copy_to_device!(du_cpu, u_cpu) cuda_volume_integral!(du, u, mesh, have_nonconservative_terms(equations), equations, diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl index 5648297..8f318e0 100644 --- a/src/solvers/dg_2d.jl +++ b/src/solvers/dg_2d.jl @@ -5,7 +5,7 @@ # the device (i.e., GPU). # Kernel for calculating fluxes along normal directions -function flux_kernel!(flux_arr1, flux_arr2, u, equations::AbstractEquations{2}, flux::Function) +function flux_kernel!(flux_arr1, flux_arr2, u, equations::AbstractEquations{2}, flux::Any) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y @@ -52,7 +52,7 @@ end # Kernel for calculating volume fluxes function volume_flux_kernel!(volume_flux_arr1, volume_flux_arr2, u, equations::AbstractEquations{2}, - volume_flux::Function) + volume_flux::Any) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y @@ -82,8 +82,8 @@ end # Kernel for calculating symmetric and nonconservative fluxes function symmetric_noncons_flux_kernel!(symmetric_flux_arr1, symmetric_flux_arr2, noncons_flux_arr1, noncons_flux_arr2, u, derivative_split, - equations::AbstractEquations{2}, symmetric_flux::Function, - nonconservative_flux::Function) + equations::AbstractEquations{2}, symmetric_flux::Any, + nonconservative_flux::Any) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y @@ -590,7 +590,7 @@ end # CUDA kernel for calculating source terms function source_terms_kernel!(du, u, node_coordinates, t, equations::AbstractEquations{2}, - source_terms::Function) + source_terms::Any) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y @@ -1061,8 +1061,8 @@ end # Put everything together into a single function # Ref: `rhs!` function in Trixi.jl -function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{2}, equations, initial_condition, - boundary_conditions, source_terms::Source, dg::DGSEM, cache) where {Source} +function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{2}, equations, boundary_conditions, + source_terms::Source, dg::DGSEM, cache) where {Source} du, u = copy_to_device!(du_cpu, u_cpu) cuda_volume_integral!(du, u, mesh, have_nonconservative_terms(equations), equations, diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl index 5c9f45d..37546cc 100644 --- a/src/solvers/dg_3d.jl +++ b/src/solvers/dg_3d.jl @@ -6,7 +6,7 @@ # Kernel for calculating fluxes along normal directions function flux_kernel!(flux_arr1, flux_arr2, flux_arr3, u, equations::AbstractEquations{3}, - flux::Function) + flux::Any) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y @@ -58,7 +58,7 @@ end # CUDA kernel for calculating volume fluxes function volume_flux_kernel!(volume_flux_arr1, volume_flux_arr2, volume_flux_arr3, u, - equations::AbstractEquations{3}, volume_flux::Function) + equations::AbstractEquations{3}, volume_flux::Any) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y @@ -679,7 +679,7 @@ end # Kernel for calculating source terms function source_terms_kernel!(du, u, node_coordinates, t, equations::AbstractEquations{3}, - source_terms::Function) + source_terms::Any) j = (blockIdx().x - 1) * blockDim().x + threadIdx().x k = (blockIdx().y - 1) * blockDim().y + threadIdx().y @@ -1113,8 +1113,8 @@ end # Put everything together into a single function # Ref: `rhs!` function in Trixi.jl -function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{3}, equations, initial_condition, - boundary_conditions, source_terms::Source, dg::DGSEM, cache) where {Source} +function rhs_gpu!(du_cpu, u_cpu, t, mesh::TreeMesh{3}, equations, boundary_conditions, + source_terms::Source, dg::DGSEM, cache) where {Source} du, u = copy_to_device!(du_cpu, u_cpu) cuda_volume_integral!(du, u, mesh, have_nonconservative_terms(equations), equations, diff --git a/src/solvers/solvers.jl b/src/solvers/solvers.jl index 3b1cb52..26db714 100644 --- a/src/solvers/solvers.jl +++ b/src/solvers/solvers.jl @@ -10,8 +10,7 @@ function rhs_gpu!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t) u = wrap_array(u_ode, mesh, equations, solver, cache) du = wrap_array(du_ode, mesh, equations, solver, cache) - rhs_gpu!(du, u, t, mesh, equations, initial_condition, boundary_conditions, source_terms, - solver, cache) + rhs_gpu!(du, u, t, mesh, equations, boundary_conditions, source_terms, solver, cache) return nothing end diff --git a/test/test_adevction_mortar.jl b/test/test_adevction_mortar.jl index 6b106b9..affb435 100644 --- a/test/test_adevction_mortar.jl +++ b/test/test_adevction_mortar.jl @@ -13,10 +13,6 @@ isdir(outdir) && rm(outdir, recursive = true) # CPU kernels, which corresponds to requiring equality of about half of the significant digits # (see https://docs.julialang.org/en/v1/base/math/#Base.isapprox). -# Basically, this heuristic method first checks whether this error bound (sometimes it is further -# relaxed) is satisfied. Any new methods and optimizations introduced later should at least satisfy -# this error bound. - # Test precision of the semidiscretization process @testset "Test Linear Advection Mortar" begin @testset "Linear Advection 2D" begin diff --git a/test/test_advection_basic.jl b/test/test_advection_basic.jl index 7d8de9b..bde97bf 100644 --- a/test/test_advection_basic.jl +++ b/test/test_advection_basic.jl @@ -13,10 +13,6 @@ isdir(outdir) && rm(outdir, recursive = true) # CPU kernels, which corresponds to requiring equality of about half of the significant digits # (see https://docs.julialang.org/en/v1/base/math/#Base.isapprox). -# Basically, this heuristic method first checks whether this error bound (sometimes it is further -# relaxed) is satisfied. Any new methods and optimizations introduced later should at least satisfy -# this error bound. - # Test precision of the semidiscretization process @testset "Test Linear Advection Basic" begin @testset "Linear Advection 1D" begin diff --git a/test/test_euler_ec.jl b/test/test_euler_ec.jl index a8c9a25..5ebb761 100644 --- a/test/test_euler_ec.jl +++ b/test/test_euler_ec.jl @@ -13,10 +13,6 @@ isdir(outdir) && rm(outdir, recursive = true) # CPU kernels, which corresponds to requiring equality of about half of the significant digits # (see https://docs.julialang.org/en/v1/base/math/#Base.isapprox). -# Basically, this heuristic method first checks whether this error bound (sometimes it is further -# relaxed) is satisfied. Any new methods and optimizations introduced later should at least satisfy -# this error bound. - # Test precision of the semidiscretization process @testset "Test Compressible Euler Flux Differencing" begin @testset "Compressible Euler 1D" begin diff --git a/test/test_euler_source_terms.jl b/test/test_euler_source_terms.jl index 7a62255..6f410ea 100644 --- a/test/test_euler_source_terms.jl +++ b/test/test_euler_source_terms.jl @@ -13,10 +13,6 @@ isdir(outdir) && rm(outdir, recursive = true) # CPU kernels, which corresponds to requiring equality of about half of the significant digits # (see https://docs.julialang.org/en/v1/base/math/#Base.isapprox). -# Basically, this heuristic method first checks whether this error bound (sometimes it is further -# relaxed) is satisfied. Any new methods and optimizations introduced later should at least satisfy -# this error bound. - # Test precision of the semidiscretization process @testset "Test Compressible Euler Source Terms" begin @testset "Compressible Euler 1D" begin diff --git a/test/test_hypdiff_nonperiodic.jl b/test/test_hypdiff_nonperiodic.jl index a6e9b21..d6263ab 100644 --- a/test/test_hypdiff_nonperiodic.jl +++ b/test/test_hypdiff_nonperiodic.jl @@ -13,10 +13,6 @@ isdir(outdir) && rm(outdir, recursive = true) # CPU kernels, which corresponds to requiring equality of about half of the significant digits # (see https://docs.julialang.org/en/v1/base/math/#Base.isapprox). -# Basically, this heuristic method first checks whether this error bound (sometimes it is further -# relaxed) is satisfied. Any new methods and optimizations introduced later should at least satisfy -# this error bound. - # Test precision of the semidiscretization process @testset "Test Hyperbolic Diffusion Boundary Conditions" begin @testset "Compressible Euler 1D" begin diff --git a/test/test_shallowwater_well_balanced.jl b/test/test_shallowwater_well_balanced.jl index 9ab2fca..d90b3d7 100644 --- a/test/test_shallowwater_well_balanced.jl +++ b/test/test_shallowwater_well_balanced.jl @@ -13,10 +13,6 @@ isdir(outdir) && rm(outdir, recursive = true) # CPU kernels, which corresponds to requiring equality of about half of the significant digits # (see https://docs.julialang.org/en/v1/base/math/#Base.isapprox). -# Basically, this heuristic method first checks whether this error bound (sometimes it is further -# relaxed) is satisfied. Any new methods and optimizations introduced later should at least satisfy -# this error bound. - # Test precision of the semidiscretization process @testset "Test Shallow Water Well Balanced" begin @testset "Shallow Water 1D" begin