diff --git a/common/src/KokkosFFT_Helpers.hpp b/common/src/KokkosFFT_Helpers.hpp index 4edc5cda..631b51ca 100644 --- a/common/src/KokkosFFT_Helpers.hpp +++ b/common/src/KokkosFFT_Helpers.hpp @@ -85,8 +85,8 @@ void _roll(const ExecutionSpace& exec_space, ViewType& inout, using point_type = typename range_type::point_type; range_type range( - point_type{{0, 0}}, point_type{{len0, len1}}, tile_type{{4, 4}} - // [TO DO] Choose optimal tile sizes for each device + exec_space, point_type{{0, 0}}, point_type{{len0, len1}}, + tile_type{{4, 4}} // [TO DO] Choose optimal tile sizes for each device ); axis_type<2> shift0 = {0}, shift1 = {0}, shift2 = {n0 / 2, n1 / 2}; diff --git a/common/src/KokkosFFT_padding.hpp b/common/src/KokkosFFT_padding.hpp index ac1a4310..4e286be1 100644 --- a/common/src/KokkosFFT_padding.hpp +++ b/common/src/KokkosFFT_padding.hpp @@ -133,8 +133,9 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; - range_type range(point_type{{0, 0}}, point_type{{n0, n1}}, tile_type{{4, 4}} - // [TO DO] Choose optimal tile sizes for each device + range_type range( + exec_space, point_type{{0, 0}}, point_type{{n0, n1}}, tile_type{{4, 4}} + // [TO DO] Choose optimal tile sizes for each device ); Kokkos::parallel_for( @@ -160,8 +161,8 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, using point_type = typename range_type::point_type; range_type range( - point_type{{0, 0, 0}}, point_type{{n0, n1, n2}}, tile_type{{4, 4, 4}} - // [TO DO] Choose optimal tile sizes for each device + exec_space, point_type{{0, 0, 0}}, point_type{{n0, n1, n2}}, + tile_type{{4, 4, 4}} // [TO DO] Choose optimal tile sizes for each device ); Kokkos::parallel_for( @@ -189,8 +190,8 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; - range_type range(point_type{{0, 0, 0, 0}}, point_type{{n0, n1, n2, n3}}, - tile_type{{4, 4, 4, 4}} + range_type range(exec_space, point_type{{0, 0, 0, 0}}, + point_type{{n0, n1, n2, n3}}, tile_type{{4, 4, 4, 4}} // [TO DO] Choose optimal tile sizes for each device ); @@ -220,7 +221,7 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; - range_type range(point_type{{0, 0, 0, 0, 0}}, + range_type range(exec_space, point_type{{0, 0, 0, 0, 0}}, point_type{{n0, n1, n2, n3, n4}}, tile_type{{4, 4, 4, 4, 1}} // [TO DO] Choose optimal tile sizes for each device ); @@ -252,7 +253,7 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; - range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + range_type range(exec_space, point_type{{0, 0, 0, 0, 0, 0}}, point_type{{n0, n1, n2, n3, n4, n5}}, tile_type{{4, 4, 4, 4, 1, 1}} // [TO DO] Choose optimal tile sizes for each device @@ -286,7 +287,7 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; - range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + range_type range(exec_space, point_type{{0, 0, 0, 0, 0, 0}}, point_type{{n0, n1, n2, n3, n4, n5}}, tile_type{{4, 4, 4, 4, 1, 1}} // [TO DO] Choose optimal tile sizes for each device @@ -323,7 +324,7 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; - range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + range_type range(exec_space, point_type{{0, 0, 0, 0, 0, 0}}, point_type{{n0, n1, n2, n3, n4, n5}}, tile_type{{4, 4, 4, 4, 1, 1}} // [TO DO] Choose optimal tile sizes for each device diff --git a/common/src/KokkosFFT_transpose.hpp b/common/src/KokkosFFT_transpose.hpp index 18217308..832bebb5 100644 --- a/common/src/KokkosFFT_transpose.hpp +++ b/common/src/KokkosFFT_transpose.hpp @@ -131,8 +131,9 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, int n0 = in.extent(0), n1 = in.extent(1); - range_type range(point_type{{0, 0}}, point_type{{n0, n1}}, tile_type{{4, 4}} - // [TO DO] Choose optimal tile sizes for each device + range_type range( + exec_space, point_type{{0, 0}}, point_type{{n0, n1}}, tile_type{{4, 4}} + // [TO DO] Choose optimal tile sizes for each device ); _prep_transpose_view(in, out, _map); @@ -157,8 +158,8 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, int n0 = in.extent(0), n1 = in.extent(1), n2 = in.extent(2); range_type range( - point_type{{0, 0, 0}}, point_type{{n0, n1, n2}}, tile_type{{4, 4, 4}} - // [TO DO] Choose optimal tile sizes for each device + exec_space, point_type{{0, 0, 0}}, point_type{{n0, n1, n2}}, + tile_type{{4, 4, 4}} // [TO DO] Choose optimal tile sizes for each device ); _prep_transpose_view(in, out, _map); @@ -191,8 +192,8 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, int n0 = in.extent(0), n1 = in.extent(1), n2 = in.extent(2), n3 = in.extent(3); - range_type range(point_type{{0, 0, 0, 0}}, point_type{{n0, n1, n2, n3}}, - tile_type{{4, 4, 4, 4}} + range_type range(exec_space, point_type{{0, 0, 0, 0}}, + point_type{{n0, n1, n2, n3}}, tile_type{{4, 4, 4, 4}} // [TO DO] Choose optimal tile sizes for each device ); @@ -228,7 +229,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, n3 = in.extent(3); int n4 = in.extent(4); - range_type range(point_type{{0, 0, 0, 0, 0}}, + range_type range(exec_space, point_type{{0, 0, 0, 0, 0}}, point_type{{n0, n1, n2, n3, n4}}, tile_type{{4, 4, 4, 4, 1}} // [TO DO] Choose optimal tile sizes for each device ); @@ -266,7 +267,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, n3 = in.extent(3); int n4 = in.extent(4), n5 = in.extent(5); - range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + range_type range(exec_space, point_type{{0, 0, 0, 0, 0, 0}}, point_type{{n0, n1, n2, n3, n4, n5}}, tile_type{{4, 4, 4, 4, 1, 1}} // [TO DO] Choose optimal tile sizes for each device @@ -307,7 +308,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, n3 = in.extent(3); int n4 = in.extent(4), n5 = in.extent(5), n6 = in.extent(6); - range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + range_type range(exec_space, point_type{{0, 0, 0, 0, 0, 0}}, point_type{{n0, n1, n2, n3, n4, n5}}, tile_type{{4, 4, 4, 4, 1, 1}} // [TO DO] Choose optimal tile sizes for each device @@ -354,7 +355,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, int n4 = in.extent(4), n5 = in.extent(5), n6 = in.extent(6), n7 = in.extent(7); - range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + range_type range(exec_space, point_type{{0, 0, 0, 0, 0, 0}}, point_type{{n0, n1, n2, n3, n4, n5}}, tile_type{{4, 4, 4, 4, 1, 1}} // [TO DO] Choose optimal tile sizes for each device