diff --git a/common/src/KokkosFFT_padding.hpp b/common/src/KokkosFFT_padding.hpp index 334c4f28..af6971e6 100644 --- a/common/src/KokkosFFT_padding.hpp +++ b/common/src/KokkosFFT_padding.hpp @@ -10,10 +10,10 @@ namespace Impl { template auto get_modified_shape(const ViewType& view, shape_type shape) { static_assert(ViewType::rank() >= DIM, - "KokkosFFT::get_modified_shape: Rank of View must be larger " + "get_modified_shape: Rank of View must be larger " "than or equal to the Rank of new shape"); static_assert(DIM > 0, - "KokkosFFT::get_modified_shape: Rank of FFT axes must be " + "get_modified_shape: Rank of FFT axes must be " "larger than or equal to 1"); // [TO DO] Add a is_C2R arg. If is_C2R is true, then shape should be shape/2+1 @@ -41,7 +41,7 @@ template auto is_crop_or_pad_needed(const ViewType& view, const shape_type& modified_shape) { static_assert(ViewType::rank() == DIM, - "KokkosFFT::_crop_or_pad: Rank of View must be equal to Rank " + "is_crop_or_pad_needed: Rank of View must be equal to Rank " "of extended shape."); // [TO DO] Add a is_C2R arg. If is_C2R is true, then shape should be shape/2+1 @@ -61,11 +61,6 @@ auto is_crop_or_pad_needed(const ViewType& view, template void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, ViewType& out, shape_type<1> s) { - constexpr std::size_t DIM = 1; - static_assert(ViewType::rank() == DIM, - "KokkosFFT::_crop_or_pad: Rank of View must be equal to Rank " - "of extended shape."); - auto _n0 = s.at(0); out = ViewType("out", _n0); @@ -81,9 +76,6 @@ template void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, ViewType& out, shape_type<2> s) { constexpr std::size_t DIM = 2; - static_assert(ViewType::rank() == DIM, - "KokkosFFT::_crop_or_pad: Rank of View must be equal to Rank " - "of extended shape."); auto [_n0, _n1] = s; out = ViewType("out", _n0, _n1); @@ -93,7 +85,7 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, using range_type = Kokkos::MDRangePolicy< ExecutionSpace, - Kokkos::Rank<2, Kokkos::Iterate::Default, Kokkos::Iterate::Default>>; + Kokkos::Rank>; using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; @@ -109,9 +101,6 @@ template void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, ViewType& out, shape_type<3> s) { constexpr std::size_t DIM = 3; - static_assert(ViewType::rank() == DIM, - "KokkosFFT::_crop_or_pad: Rank of View must be equal to Rank " - "of extended shape."); auto [_n0, _n1, _n2] = s; out = ViewType("out", _n0, _n1, _n2); @@ -122,7 +111,7 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, using range_type = Kokkos::MDRangePolicy< ExecutionSpace, - Kokkos::Rank<3, Kokkos::Iterate::Default, Kokkos::Iterate::Default>>; + Kokkos::Rank>; using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; @@ -137,9 +126,182 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, }); } +template +void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, + ViewType& out, shape_type<4> s) { + constexpr std::size_t DIM = 4; + + auto [_n0, _n1, _n2, _n3] = s; + out = ViewType("out", _n0, _n1, _n2, _n3); + + int n0 = std::min(_n0, in.extent(0)); + int n1 = std::min(_n1, in.extent(1)); + int n2 = std::min(_n2, in.extent(2)); + int n3 = std::min(_n3, in.extent(3)); + + using range_type = Kokkos::MDRangePolicy< + ExecutionSpace, + Kokkos::Rank>; + using tile_type = typename range_type::tile_type; + using point_type = typename range_type::point_type; + + range_type range(point_type{{0, 0, 0, 0}}, point_type{{n0, n1, n2, n3}}, + tile_type{{4, 4, 4, 4}} + // [TO DO] Choose optimal tile sizes for each device + ); + + Kokkos::parallel_for( + range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3) { + out(i0, i1, i2, i3) = in(i0, i1, i2, i3); + }); +} + +template +void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, + ViewType& out, shape_type<5> s) { + constexpr std::size_t DIM = 5; + + auto [_n0, _n1, _n2, _n3, _n4] = s; + out = ViewType("out", _n0, _n1, _n2, _n3, _n4); + + int n0 = std::min(_n0, in.extent(0)); + int n1 = std::min(_n1, in.extent(1)); + int n2 = std::min(_n2, in.extent(2)); + int n3 = std::min(_n3, in.extent(3)); + int n4 = std::min(_n4, in.extent(4)); + + using range_type = Kokkos::MDRangePolicy< + ExecutionSpace, + Kokkos::Rank>; + using tile_type = typename range_type::tile_type; + using point_type = typename range_type::point_type; + + range_type range(point_type{{0, 0, 0, 0, 0}}, + point_type{{n0, n1, n2, n3, n4}}, tile_type{{4, 4, 4, 4, 1}} + // [TO DO] Choose optimal tile sizes for each device + ); + + Kokkos::parallel_for( + range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4) { + out(i0, i1, i2, i3, i4) = in(i0, i1, i2, i3, i4); + }); +} + +template +void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, + ViewType& out, shape_type<6> s) { + constexpr std::size_t DIM = 6; + + auto [_n0, _n1, _n2, _n3, _n4, _n5] = s; + out = ViewType("out", _n0, _n1, _n2, _n3, _n4, _n5); + + int n0 = std::min(_n0, in.extent(0)); + int n1 = std::min(_n1, in.extent(1)); + int n2 = std::min(_n2, in.extent(2)); + int n3 = std::min(_n3, in.extent(3)); + int n4 = std::min(_n4, in.extent(4)); + int n5 = std::min(_n5, in.extent(5)); + + using range_type = Kokkos::MDRangePolicy< + ExecutionSpace, + Kokkos::Rank>; + using tile_type = typename range_type::tile_type; + using point_type = typename range_type::point_type; + + range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + point_type{{n0, n1, n2, n3, n4, n5}}, + tile_type{{4, 4, 4, 4, 1, 1}} + // [TO DO] Choose optimal tile sizes for each device + ); + + Kokkos::parallel_for( + range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) { + out(i0, i1, i2, i3, i4, i5) = in(i0, i1, i2, i3, i4, i5); + }); +} + +template +void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, + ViewType& out, shape_type<7> s) { + constexpr std::size_t DIM = 6; + + auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6] = s; + out = ViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6); + + int n0 = std::min(_n0, in.extent(0)); + int n1 = std::min(_n1, in.extent(1)); + int n2 = std::min(_n2, in.extent(2)); + int n3 = std::min(_n3, in.extent(3)); + int n4 = std::min(_n4, in.extent(4)); + int n5 = std::min(_n5, in.extent(5)); + int n6 = std::min(_n6, in.extent(6)); + + using range_type = Kokkos::MDRangePolicy< + ExecutionSpace, + Kokkos::Rank>; + using tile_type = typename range_type::tile_type; + using point_type = typename range_type::point_type; + + range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + point_type{{n0, n1, n2, n3, n4, n5}}, + tile_type{{4, 4, 4, 4, 1, 1}} + // [TO DO] Choose optimal tile sizes for each device + ); + + Kokkos::parallel_for( + range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) { + for (int i6 = 0; i6 < n6; i6++) { + out(i0, i1, i2, i3, i4, i5, i6) = in(i0, i1, i2, i3, i4, i5, i6); + } + }); +} + +template +void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, + ViewType& out, shape_type<8> s) { + constexpr std::size_t DIM = 6; + + auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7] = s; + out = ViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7); + + int n0 = std::min(_n0, in.extent(0)); + int n1 = std::min(_n1, in.extent(1)); + int n2 = std::min(_n2, in.extent(2)); + int n3 = std::min(_n3, in.extent(3)); + int n4 = std::min(_n4, in.extent(4)); + int n5 = std::min(_n5, in.extent(5)); + int n6 = std::min(_n6, in.extent(6)); + int n7 = std::min(_n7, in.extent(7)); + + using range_type = Kokkos::MDRangePolicy< + ExecutionSpace, + Kokkos::Rank>; + using tile_type = typename range_type::tile_type; + using point_type = typename range_type::point_type; + + range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + point_type{{n0, n1, n2, n3, n4, n5}}, + tile_type{{4, 4, 4, 4, 1, 1}} + // [TO DO] Choose optimal tile sizes for each device + ); + + Kokkos::parallel_for( + range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) { + for (int i6 = 0; i6 < n6; i6++) { + for (int i7 = 0; i7 < n7; i7++) { + out(i0, i1, i2, i3, i4, i5, i6, i7) = + in(i0, i1, i2, i3, i4, i5, i6, i7); + } + } + }); +} + template void crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, ViewType& out, shape_type s) { + static_assert(ViewType::rank() == DIM, + "crop_or_pad: Rank of View must be equal to Rank " + "of extended shape."); _crop_or_pad(exec_space, in, out, s); } } // namespace Impl diff --git a/common/src/KokkosFFT_transpose.hpp b/common/src/KokkosFFT_transpose.hpp index 44658dea..068baf84 100644 --- a/common/src/KokkosFFT_transpose.hpp +++ b/common/src/KokkosFFT_transpose.hpp @@ -11,10 +11,10 @@ namespace Impl { template auto get_map_axes(const ViewType& view, axis_type _axes) { static_assert(ViewType::rank() >= DIM, - "KokkosFFT::get_map_axes: Rank of View must be larger thane or " + "get_map_axes: Rank of View must be larger thane or " "equal to the Rank of FFT axes."); static_assert(DIM > 0, - "KokkosFFT::get_map_axes: Rank of FFT axes must be larger than " + "get_map_axes: Rank of FFT axes must be larger than " "or equal to 1."); constexpr int rank = static_cast(ViewType::rank()); @@ -90,11 +90,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, template void _transpose(const ExecutionSpace& exec_space, InViewType& in, OutViewType& out, axis_type<2> _map) { - constexpr std::size_t DIM = 2; - static_assert(InViewType::rank() == DIM, - "KokkosFFT::_transpose: Rank of View must be equal to Rank of " - "transpose axes."); - + constexpr std::size_t DIM = 2; constexpr std::size_t rank = InViewType::rank(); using array_layout_type = typename InViewType::array_layout; @@ -131,10 +127,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, template void _transpose(const ExecutionSpace& exec_space, InViewType& in, OutViewType& out, axis_type<3> _map) { - constexpr std::size_t DIM = 3; - static_assert(InViewType::rank() == DIM, - "KokkosFFT::_transpose: Rank of View must be equal to Rank of " - "transpose axes."); + constexpr std::size_t DIM = 3; constexpr std::size_t rank = InViewType::rank(); using array_layout_type = typename InViewType::array_layout; @@ -169,29 +162,290 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, Kokkos::Array map = {_map[0], _map[1], _map[2]}; Kokkos::parallel_for( range, KOKKOS_LAMBDA(int i0, int i1, int i2) { - int _i0 = i0, _i1 = i1, _i2 = i2; - if (map[0] == 0 && map[1] == 2 && map[2] == 1) { - _i1 = i2; - _i2 = i1; - } else if (map[0] == 1 && map[1] == 0 && map[2] == 2) { - _i0 = i1; - _i1 = i0; - } else if (map[0] == 1 && map[1] == 2 && map[2] == 0) { - _i0 = i1; - _i1 = i2; - _i2 = i0; - } else if (map[0] == 2 && map[1] == 1 && map[2] == 0) { - _i0 = i2; - _i2 = i0; - } else if (map[0] == 2 && map[1] == 0 && map[2] == 1) { - _i0 = i2; - _i1 = i0; - _i2 = i1; - } + int _indices[rank] = {i0, i1, i2}; + int _i0 = _indices[map[0]]; + int _i1 = _indices[map[1]]; + int _i2 = _indices[map[2]]; + out(_i0, _i1, _i2) = in(i0, i1, i2); }); } +template +void _transpose(const ExecutionSpace& exec_space, InViewType& in, + OutViewType& out, axis_type<4> _map) { + constexpr std::size_t DIM = 4; + constexpr std::size_t rank = InViewType::rank(); + using array_layout_type = typename InViewType::array_layout; + + using range_type = Kokkos::MDRangePolicy< + ExecutionSpace, + Kokkos::Rank >; + using tile_type = typename range_type::tile_type; + using point_type = typename range_type::point_type; + + int n0 = in.extent(0), n1 = in.extent(1), n2 = in.extent(2), + n3 = in.extent(3); + + range_type range(point_type{{0, 0, 0, 0}}, point_type{{n0, n1, n2, n3}}, + tile_type{{4, 4, 4, 4}} + // [TO DO] Choose optimal tile sizes for each device + ); + + // Assign a View if not a shallow copy + bool is_out_view_ready = true; + std::array out_extents; + for (int i = 0; i < rank; i++) { + out_extents.at(i) = in.extent(_map.at(i)); + if (out_extents.at(i) != out.extent(i)) { + is_out_view_ready = false; + } + } + + if (!is_out_view_ready) { + auto [_n0, _n1, _n2, _n3] = out_extents; + out = OutViewType("out", _n0, _n1, _n2, _n3); + } + + Kokkos::Array map = {_map[0], _map[1], _map[2], _map[3]}; + Kokkos::parallel_for( + range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3) { + int _indices[rank] = {i0, i1, i2, i3}; + int _i0 = _indices[map[0]]; + int _i1 = _indices[map[1]]; + int _i2 = _indices[map[2]]; + int _i3 = _indices[map[3]]; + + out(_i0, _i1, _i2, _i3) = in(i0, i1, i2, i3); + }); +} + +template +void _transpose(const ExecutionSpace& exec_space, InViewType& in, + OutViewType& out, axis_type<5> _map) { + constexpr std::size_t DIM = 5; + constexpr std::size_t rank = InViewType::rank(); + using array_layout_type = typename InViewType::array_layout; + + using range_type = Kokkos::MDRangePolicy< + ExecutionSpace, + Kokkos::Rank >; + using tile_type = typename range_type::tile_type; + using point_type = typename range_type::point_type; + + int n0 = in.extent(0), n1 = in.extent(1), n2 = in.extent(2), + n3 = in.extent(3); + int n4 = in.extent(4); + + range_type range(point_type{{0, 0, 0, 0, 0}}, + point_type{{n0, n1, n2, n3, n4}}, tile_type{{4, 4, 4, 4, 1}} + // [TO DO] Choose optimal tile sizes for each device + ); + + // Assign a View if not a shallow copy + bool is_out_view_ready = true; + std::array out_extents; + for (int i = 0; i < rank; i++) { + out_extents.at(i) = in.extent(_map.at(i)); + if (out_extents.at(i) != out.extent(i)) { + is_out_view_ready = false; + } + } + + if (!is_out_view_ready) { + auto [_n0, _n1, _n2, _n3, _n4] = out_extents; + out = OutViewType("out", _n0, _n1, _n2, _n3, _n4); + } + + Kokkos::Array map = {_map[0], _map[1], _map[2], _map[3], _map[4]}; + Kokkos::parallel_for( + range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4) { + int _indices[rank] = {i0, i1, i2, i3, i4}; + int _i0 = _indices[map[0]]; + int _i1 = _indices[map[1]]; + int _i2 = _indices[map[2]]; + int _i3 = _indices[map[3]]; + int _i4 = _indices[map[4]]; + + out(_i0, _i1, _i2, _i3, _i4) = in(i0, i1, i2, i3, i4); + }); +} + +template +void _transpose(const ExecutionSpace& exec_space, InViewType& in, + OutViewType& out, axis_type<6> _map) { + constexpr std::size_t DIM = 6; + constexpr std::size_t rank = InViewType::rank(); + using array_layout_type = typename InViewType::array_layout; + + using range_type = Kokkos::MDRangePolicy< + ExecutionSpace, + Kokkos::Rank >; + using tile_type = typename range_type::tile_type; + using point_type = typename range_type::point_type; + + int n0 = in.extent(0), n1 = in.extent(1), n2 = in.extent(2), + n3 = in.extent(3); + int n4 = in.extent(4), n5 = in.extent(5); + + range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + point_type{{n0, n1, n2, n3, n4, n5}}, + tile_type{{4, 4, 4, 4, 1, 1}} + // [TO DO] Choose optimal tile sizes for each device + ); + + // Assign a View if not a shallow copy + bool is_out_view_ready = true; + std::array out_extents; + for (int i = 0; i < rank; i++) { + out_extents.at(i) = in.extent(_map.at(i)); + if (out_extents.at(i) != out.extent(i)) { + is_out_view_ready = false; + } + } + + if (!is_out_view_ready) { + auto [_n0, _n1, _n2, _n3, _n4, _n5] = out_extents; + out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5); + } + + Kokkos::Array map = {_map[0], _map[1], _map[2], + _map[3], _map[4], _map[5]}; + Kokkos::parallel_for( + range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) { + int _indices[rank] = {i0, i1, i2, i3, i4, i5}; + int _i0 = _indices[map[0]]; + int _i1 = _indices[map[1]]; + int _i2 = _indices[map[2]]; + int _i3 = _indices[map[3]]; + int _i4 = _indices[map[4]]; + int _i5 = _indices[map[5]]; + + out(_i0, _i1, _i2, _i3, _i4, _i5) = in(i0, i1, i2, i3, i4, i5); + }); +} + +template +void _transpose(const ExecutionSpace& exec_space, InViewType& in, + OutViewType& out, axis_type<7> _map) { + constexpr std::size_t DIM = 6; + constexpr std::size_t rank = InViewType::rank(); + using array_layout_type = typename InViewType::array_layout; + + using range_type = Kokkos::MDRangePolicy< + ExecutionSpace, + Kokkos::Rank >; + using tile_type = typename range_type::tile_type; + using point_type = typename range_type::point_type; + + int n0 = in.extent(0), n1 = in.extent(1), n2 = in.extent(2), + n3 = in.extent(3); + int n4 = in.extent(4), n5 = in.extent(5), n6 = in.extent(6); + + range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + point_type{{n0, n1, n2, n3, n4, n5}}, + tile_type{{4, 4, 4, 4, 1, 1}} + // [TO DO] Choose optimal tile sizes for each device + ); + + // Assign a View if not a shallow copy + bool is_out_view_ready = true; + std::array out_extents; + for (int i = 0; i < rank; i++) { + out_extents.at(i) = in.extent(_map.at(i)); + if (out_extents.at(i) != out.extent(i)) { + is_out_view_ready = false; + } + } + + if (!is_out_view_ready) { + auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6] = out_extents; + out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6); + } + + Kokkos::Array map = {_map[0], _map[1], _map[2], _map[3], + _map[4], _map[5], _map[6]}; + Kokkos::parallel_for( + range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) { + for (int i6 = 0; i6 < n6; i6++) { + int _indices[rank] = {i0, i1, i2, i3, i4, i5, i6}; + int _i0 = _indices[map[0]]; + int _i1 = _indices[map[1]]; + int _i2 = _indices[map[2]]; + int _i3 = _indices[map[3]]; + int _i4 = _indices[map[4]]; + int _i5 = _indices[map[5]]; + int _i6 = _indices[map[6]]; + + out(_i0, _i1, _i2, _i3, _i4, _i5, _i6) = + in(i0, i1, i2, i3, i4, i5, i6); + } + }); +} + +template +void _transpose(const ExecutionSpace& exec_space, InViewType& in, + OutViewType& out, axis_type<8> _map) { + constexpr std::size_t DIM = 6; + + constexpr std::size_t rank = InViewType::rank(); + using array_layout_type = typename InViewType::array_layout; + + using range_type = Kokkos::MDRangePolicy< + ExecutionSpace, + Kokkos::Rank >; + using tile_type = typename range_type::tile_type; + using point_type = typename range_type::point_type; + + int n0 = in.extent(0), n1 = in.extent(1), n2 = in.extent(2), + n3 = in.extent(3); + int n4 = in.extent(4), n5 = in.extent(5), n6 = in.extent(6), + n7 = in.extent(7); + + range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + point_type{{n0, n1, n2, n3, n4, n5}}, + tile_type{{4, 4, 4, 4, 1, 1}} + // [TO DO] Choose optimal tile sizes for each device + ); + + // Assign a View if not a shallow copy + bool is_out_view_ready = true; + std::array out_extents; + for (int i = 0; i < rank; i++) { + out_extents.at(i) = in.extent(_map.at(i)); + if (out_extents.at(i) != out.extent(i)) { + is_out_view_ready = false; + } + } + + if (!is_out_view_ready) { + auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7] = out_extents; + out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7); + } + + Kokkos::Array map = {_map[0], _map[1], _map[2], _map[3], + _map[4], _map[5], _map[6], _map[7]}; + Kokkos::parallel_for( + range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) { + for (int i6 = 0; i6 < n6; i6++) { + for (int i7 = 0; i7 < n7; i7++) { + int _indices[rank] = {i0, i1, i2, i3, i4, i5, i6, i7}; + int _i0 = _indices[map[0]]; + int _i1 = _indices[map[1]]; + int _i2 = _indices[map[2]]; + int _i3 = _indices[map[3]]; + int _i4 = _indices[map[4]]; + int _i5 = _indices[map[5]]; + int _i6 = _indices[map[6]]; + int _i7 = _indices[map[7]]; + + out(_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7) = + in(i0, i1, i2, i3, i4, i5, i6, i7); + } + } + }); +} + /* Make the axis direction to the inner most direction axis should be the range in [-(rank-1), rank-1], where negative number is interpreted as rank + axis. @@ -220,16 +474,20 @@ void transpose(const ExecutionSpace& exec_space, InViewType& in, using array_layout_type = typename InViewType::array_layout; static_assert(Kokkos::is_view::value, - "KokkosFFT::transpose: InViewType is not a Kokkos::View."); + "transpose: InViewType is not a Kokkos::View."); static_assert(Kokkos::is_view::value, - "KokkosFFT::transpose: OutViewType is not a Kokkos::View."); + "transpose: OutViewType is not a Kokkos::View."); static_assert(InViewType::rank() == OutViewType::rank(), - "KokkosFFT::transpose: InViewType and OutViewType must have " + "transpose: InViewType and OutViewType must have " "the same rank."); + static_assert(InViewType::rank() == DIM, + "transpose: Rank of View must be equal to Rank of " + "transpose axes."); + if (!KokkosFFT::Impl::is_transpose_needed(_map)) { - throw std::runtime_error("KokkosFFT::transpose: transpose not necessary"); + throw std::runtime_error("transpose: transpose not necessary"); } _transpose(exec_space, in, out, _map); diff --git a/common/unit_test/Test_Padding.cpp b/common/unit_test/Test_Padding.cpp index 877e3fc1..6e23d1b3 100644 --- a/common/unit_test/Test_Padding.cpp +++ b/common/unit_test/Test_Padding.cpp @@ -691,12 +691,12 @@ TEST(CropOrPad3D, View3D) { auto h_x = Kokkos::create_mirror_view(x); Kokkos::deep_copy(h_x, x); - for (int i0 = -1; i0 <= 1; i0++) { - for (int i1 = -1; i1 <= 1; i1++) { - for (int i2 = -1; i2 <= 1; i2++) { - std::size_t n0_new = static_cast(n0 + i0); - std::size_t n1_new = static_cast(n1 + i1); - std::size_t n2_new = static_cast(n2 + i2); + for (int d0 = -1; d0 <= 1; d0++) { + for (int d1 = -1; d1 <= 1; d1++) { + for (int d2 = -1; d2 <= 1; d2++) { + std::size_t n0_new = static_cast(n0 + d0); + std::size_t n1_new = static_cast(n1 + d1); + std::size_t n2_new = static_cast(n2 + d2); shape_type<3> shape_new = {n0_new, n1_new, n2_new}; View3D _x; @@ -714,6 +714,299 @@ TEST(CropOrPad3D, View3D) { } } + Kokkos::deep_copy(ref_x, h_ref_x); + KokkosFFT::Impl::crop_or_pad(execution_space(), x, _x, shape_new); + EXPECT_TRUE(allclose(_x, ref_x, 1.e-5, 1.e-12)); + } + } + } +} + +TEST(CropOrPad4D, View4D) { + const int n0 = 30, n1 = 15, n2 = 8, n3 = 7; + View4D x("x", n0, n1, n2, n3); + + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(x, random_pool, 1.0); + + auto rand_engine = std::mt19937(0); + auto rand_dist = std::uniform_int_distribution(-1, 1); + + auto h_x = Kokkos::create_mirror_view(x); + Kokkos::deep_copy(h_x, x); + for (int d0 = -1; d0 <= 1; d0++) { + for (int d1 = -1; d1 <= 1; d1++) { + for (int d2 = -1; d2 <= 1; d2++) { + int d3 = rand_dist(rand_engine); + std::size_t n0_new = static_cast(n0 + d0); + std::size_t n1_new = static_cast(n1 + d1); + std::size_t n2_new = static_cast(n2 + d2); + std::size_t n3_new = static_cast(n3 + d3); + shape_type<4> shape_new = {n0_new, n1_new, n2_new, n3_new}; + + View4D _x; + View4D ref_x("ref_x", n0_new, n1_new, n2_new, n3_new); + + auto h_ref_x = Kokkos::create_mirror_view(ref_x); + for (int i3 = 0; i3 < n3; i3++) { + for (int i2 = 0; i2 < n2; i2++) { + for (int i1 = 0; i1 < n1; i1++) { + for (int i0 = 0; i0 < n0; i0++) { + if (i0 >= h_ref_x.extent(0) || i1 >= h_ref_x.extent(1) || + i2 >= h_ref_x.extent(2) || i3 >= h_ref_x.extent(3)) + continue; + h_ref_x(i0, i1, i2, i3) = h_x(i0, i1, i2, i3); + } + } + } + } + + Kokkos::deep_copy(ref_x, h_ref_x); + KokkosFFT::Impl::crop_or_pad(execution_space(), x, _x, shape_new); + EXPECT_TRUE(allclose(_x, ref_x, 1.e-5, 1.e-12)); + } + } + } +} + +TEST(CropOrPad5D, View5D) { + const int n0 = 30, n1 = 15, n2 = 8, n3 = 7, n4 = 3; + View5D x("x", n0, n1, n2, n3, n4); + + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(x, random_pool, 1.0); + + auto rand_engine = std::mt19937(0); + auto rand_dist = std::uniform_int_distribution(-1, 1); + + auto h_x = Kokkos::create_mirror_view(x); + Kokkos::deep_copy(h_x, x); + for (int d0 = -1; d0 <= 1; d0++) { + for (int d1 = -1; d1 <= 1; d1++) { + for (int d2 = -1; d2 <= 1; d2++) { + int d3 = rand_dist(rand_engine); + int d4 = rand_dist(rand_engine); + std::size_t n0_new = static_cast(n0 + d0); + std::size_t n1_new = static_cast(n1 + d1); + std::size_t n2_new = static_cast(n2 + d2); + std::size_t n3_new = static_cast(n3 + d3); + std::size_t n4_new = static_cast(n4 + d4); + shape_type<5> shape_new = {n0_new, n1_new, n2_new, n3_new, n4_new}; + + View5D _x; + View5D ref_x("ref_x", n0_new, n1_new, n2_new, n3_new, n4_new); + + auto h_ref_x = Kokkos::create_mirror_view(ref_x); + for (int i4 = 0; i4 < n4; i4++) { + for (int i3 = 0; i3 < n3; i3++) { + for (int i2 = 0; i2 < n2; i2++) { + for (int i1 = 0; i1 < n1; i1++) { + for (int i0 = 0; i0 < n0; i0++) { + if (i0 >= h_ref_x.extent(0) || i1 >= h_ref_x.extent(1) || + i2 >= h_ref_x.extent(2) || i3 >= h_ref_x.extent(3) || + i4 >= h_ref_x.extent(4)) + continue; + h_ref_x(i0, i1, i2, i3, i4) = h_x(i0, i1, i2, i3, i4); + } + } + } + } + } + + Kokkos::deep_copy(ref_x, h_ref_x); + KokkosFFT::Impl::crop_or_pad(execution_space(), x, _x, shape_new); + EXPECT_TRUE(allclose(_x, ref_x, 1.e-5, 1.e-12)); + } + } + } +} + +TEST(CropOrPad6D, View6D) { + const int n0 = 10, n1 = 15, n2 = 8, n3 = 7, n4 = 3, n5 = 4; + View6D x("x", n0, n1, n2, n3, n4, n5); + + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(x, random_pool, 1.0); + + auto rand_engine = std::mt19937(0); + auto rand_dist = std::uniform_int_distribution(-1, 1); + + auto h_x = Kokkos::create_mirror_view(x); + Kokkos::deep_copy(h_x, x); + for (int d0 = -1; d0 <= 1; d0++) { + for (int d1 = -1; d1 <= 1; d1++) { + for (int d2 = -1; d2 <= 1; d2++) { + int d3 = rand_dist(rand_engine); + int d4 = rand_dist(rand_engine); + int d5 = rand_dist(rand_engine); + std::size_t n0_new = static_cast(n0 + d0); + std::size_t n1_new = static_cast(n1 + d1); + std::size_t n2_new = static_cast(n2 + d2); + std::size_t n3_new = static_cast(n3 + d3); + std::size_t n4_new = static_cast(n4 + d4); + std::size_t n5_new = static_cast(n5 + d5); + shape_type<6> shape_new = {n0_new, n1_new, n2_new, + n3_new, n4_new, n5_new}; + + View6D _x; + View6D ref_x("ref_x", n0_new, n1_new, n2_new, n3_new, n4_new, + n5_new); + + auto h_ref_x = Kokkos::create_mirror_view(ref_x); + for (int i5 = 0; i5 < n5; i5++) { + for (int i4 = 0; i4 < n4; i4++) { + for (int i3 = 0; i3 < n3; i3++) { + for (int i2 = 0; i2 < n2; i2++) { + for (int i1 = 0; i1 < n1; i1++) { + for (int i0 = 0; i0 < n0; i0++) { + if (i0 >= h_ref_x.extent(0) || i1 >= h_ref_x.extent(1) || + i2 >= h_ref_x.extent(2) || i3 >= h_ref_x.extent(3) || + i4 >= h_ref_x.extent(4) || i5 >= h_ref_x.extent(5)) + continue; + h_ref_x(i0, i1, i2, i3, i4, i5) = + h_x(i0, i1, i2, i3, i4, i5); + } + } + } + } + } + } + + Kokkos::deep_copy(ref_x, h_ref_x); + KokkosFFT::Impl::crop_or_pad(execution_space(), x, _x, shape_new); + EXPECT_TRUE(allclose(_x, ref_x, 1.e-5, 1.e-12)); + } + } + } +} + +TEST(CropOrPad7D, View7D) { + const int n0 = 10, n1 = 15, n2 = 8, n3 = 7, n4 = 3, n5 = 4, n6 = 5; + View7D x("x", n0, n1, n2, n3, n4, n5, n6); + + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(x, random_pool, 1.0); + + auto rand_engine = std::mt19937(0); + auto rand_dist = std::uniform_int_distribution(-1, 1); + + auto h_x = Kokkos::create_mirror_view(x); + Kokkos::deep_copy(h_x, x); + for (int d0 = -1; d0 <= 1; d0++) { + for (int d1 = -1; d1 <= 1; d1++) { + for (int d2 = -1; d2 <= 1; d2++) { + int d3 = rand_dist(rand_engine); + int d4 = rand_dist(rand_engine); + int d5 = rand_dist(rand_engine); + int d6 = rand_dist(rand_engine); + std::size_t n0_new = static_cast(n0 + d0); + std::size_t n1_new = static_cast(n1 + d1); + std::size_t n2_new = static_cast(n2 + d2); + std::size_t n3_new = static_cast(n3 + d3); + std::size_t n4_new = static_cast(n4 + d4); + std::size_t n5_new = static_cast(n5 + d5); + std::size_t n6_new = static_cast(n6 + d6); + shape_type<7> shape_new = {n0_new, n1_new, n2_new, n3_new, + n4_new, n5_new, n6_new}; + + View7D _x; + View7D ref_x("ref_x", n0_new, n1_new, n2_new, n3_new, n4_new, + n5_new, n6_new); + + auto h_ref_x = Kokkos::create_mirror_view(ref_x); + for (int i6 = 0; i6 < n6; i6++) { + for (int i5 = 0; i5 < n5; i5++) { + for (int i4 = 0; i4 < n4; i4++) { + for (int i3 = 0; i3 < n3; i3++) { + for (int i2 = 0; i2 < n2; i2++) { + for (int i1 = 0; i1 < n1; i1++) { + for (int i0 = 0; i0 < n0; i0++) { + if (i0 >= h_ref_x.extent(0) || i1 >= h_ref_x.extent(1) || + i2 >= h_ref_x.extent(2) || i3 >= h_ref_x.extent(3) || + i4 >= h_ref_x.extent(4) || i5 >= h_ref_x.extent(5) || + i6 >= h_ref_x.extent(6)) + continue; + h_ref_x(i0, i1, i2, i3, i4, i5, i6) = + h_x(i0, i1, i2, i3, i4, i5, i6); + } + } + } + } + } + } + } + + Kokkos::deep_copy(ref_x, h_ref_x); + KokkosFFT::Impl::crop_or_pad(execution_space(), x, _x, shape_new); + EXPECT_TRUE(allclose(_x, ref_x, 1.e-5, 1.e-12)); + } + } + } +} + +TEST(CropOrPad8D, View8D) { + const int n0 = 10, n1 = 15, n2 = 8, n3 = 7, n4 = 3, n5 = 4, n6 = 5, n7 = 3; + View8D x("x", n0, n1, n2, n3, n4, n5, n6, n7); + + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(x, random_pool, 1.0); + + auto rand_engine = std::mt19937(0); + auto rand_dist = std::uniform_int_distribution(-1, 1); + + auto h_x = Kokkos::create_mirror_view(x); + Kokkos::deep_copy(h_x, x); + for (int d0 = -1; d0 <= 1; d0++) { + for (int d1 = -1; d1 <= 1; d1++) { + for (int d2 = -1; d2 <= 1; d2++) { + int d3 = rand_dist(rand_engine); + int d4 = rand_dist(rand_engine); + int d5 = rand_dist(rand_engine); + int d6 = rand_dist(rand_engine); + int d7 = rand_dist(rand_engine); + std::size_t n0_new = static_cast(n0 + d0); + std::size_t n1_new = static_cast(n1 + d1); + std::size_t n2_new = static_cast(n2 + d2); + std::size_t n3_new = static_cast(n3 + d3); + std::size_t n4_new = static_cast(n4 + d4); + std::size_t n5_new = static_cast(n5 + d5); + std::size_t n6_new = static_cast(n6 + d6); + std::size_t n7_new = static_cast(n7 + d7); + shape_type<8> shape_new = {n0_new, n1_new, n2_new, n3_new, + n4_new, n5_new, n6_new, n7_new}; + + View8D _x; + View8D ref_x("ref_x", n0_new, n1_new, n2_new, n3_new, n4_new, + n5_new, n6_new, n7_new); + + auto h_ref_x = Kokkos::create_mirror_view(ref_x); + for (int i7 = 0; i7 < n7; i7++) { + for (int i6 = 0; i6 < n6; i6++) { + for (int i5 = 0; i5 < n5; i5++) { + for (int i4 = 0; i4 < n4; i4++) { + for (int i3 = 0; i3 < n3; i3++) { + for (int i2 = 0; i2 < n2; i2++) { + for (int i1 = 0; i1 < n1; i1++) { + for (int i0 = 0; i0 < n0; i0++) { + if (i0 >= h_ref_x.extent(0) || + i1 >= h_ref_x.extent(1) || + i2 >= h_ref_x.extent(2) || + i3 >= h_ref_x.extent(3) || + i4 >= h_ref_x.extent(4) || + i5 >= h_ref_x.extent(5) || + i6 >= h_ref_x.extent(6) || i7 >= h_ref_x.extent(7)) + continue; + h_ref_x(i0, i1, i2, i3, i4, i5, i6, i7) = + h_x(i0, i1, i2, i3, i4, i5, i6, i7); + } + } + } + } + } + } + } + } + Kokkos::deep_copy(ref_x, h_ref_x); KokkosFFT::Impl::crop_or_pad(execution_space(), x, _x, shape_new); EXPECT_TRUE(allclose(_x, ref_x, 1.e-5, 1.e-12)); diff --git a/common/unit_test/Test_Transpose.cpp b/common/unit_test/Test_Transpose.cpp index 25c91eab..982fa3c3 100644 --- a/common/unit_test/Test_Transpose.cpp +++ b/common/unit_test/Test_Transpose.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include #include "KokkosFFT_transpose.hpp" @@ -334,93 +336,49 @@ void test_transpose_1d() { std::runtime_error); } -TYPED_TEST(Transpose, 1DView) { - using layout_type = typename TestFixture::layout_type; - - test_transpose_1d(); -} - -/* -TEST(Transpose, 1DLeftView) { - test_transpose_1d(); -} - -TEST(Transpose, 1DRightView) { - test_transpose_1d(); -} -*/ - -TEST(Transpose, 2DLeftView) { +template +void test_transpose_2d() { + using RealView2Dtype = Kokkos::View; const int n0 = 3, n1 = 5; - LeftView2D x("x", n0, n1); - LeftView2D xt_axis0, xt_axis1; // views are allocated internally - LeftView2D ref_axis1("ref_axis1", n1, n0); + RealView2Dtype x("x", n0, n1), ref("ref", n1, n0); + RealView2Dtype xt_axis01, xt_axis10; // views are allocated internally Kokkos::Random_XorShift64_Pool<> random_pool(12345); Kokkos::fill_random(x, random_pool, 1.0); // Transposed views - auto h_x = Kokkos::create_mirror_view(x); - auto h_ref_axis1 = Kokkos::create_mirror_view(ref_axis1); + auto h_x = Kokkos::create_mirror_view(x); + auto h_ref = Kokkos::create_mirror_view(ref); Kokkos::deep_copy(h_x, x); for (int i0 = 0; i0 < h_x.extent(0); i0++) { for (int i1 = 0; i1 < h_x.extent(1); i1++) { - h_ref_axis1(i1, i0) = h_x(i0, i1); + h_ref(i1, i0) = h_x(i0, i1); } } - Kokkos::deep_copy(ref_axis1, h_ref_axis1); + Kokkos::deep_copy(ref, h_ref); Kokkos::fence(); EXPECT_THROW( - KokkosFFT::Impl::transpose(execution_space(), x, xt_axis0, + KokkosFFT::Impl::transpose(execution_space(), x, xt_axis01, axes_type<2>({0, 1})), // xt is identical to x std::runtime_error); - KokkosFFT::Impl::transpose(execution_space(), x, xt_axis1, + KokkosFFT::Impl::transpose(execution_space(), x, xt_axis10, axes_type<2>({1, 0})); // xt is the transpose of x - EXPECT_TRUE(allclose(xt_axis1, ref_axis1, 1.e-5, 1.e-12)); + EXPECT_TRUE(allclose(xt_axis10, ref, 1.e-5, 1.e-12)); } -TEST(Transpose, 2DRightView) { - const int n0 = 3, n1 = 5; - RightView2D x("x", n0, n1), ref_axis0("ref_axis0", n1, n0); - RightView2D xt_axis0, xt_axis1; // views are allocated internally - - Kokkos::Random_XorShift64_Pool<> random_pool(12345); - Kokkos::fill_random(x, random_pool, 1.0); - - // Transposed views - auto h_x = Kokkos::create_mirror_view(x); - auto h_ref_axis0 = Kokkos::create_mirror_view(ref_axis0); - Kokkos::deep_copy(h_x, x); - - for (int i0 = 0; i0 < h_x.extent(0); i0++) { - for (int i1 = 0; i1 < h_x.extent(1); i1++) { - h_ref_axis0(i1, i0) = h_x(i0, i1); - } - } - Kokkos::deep_copy(ref_axis0, h_ref_axis0); - Kokkos::fence(); - - KokkosFFT::Impl::transpose(execution_space(), x, xt_axis0, - axes_type<2>({1, 0})); // xt is the transpose of x - EXPECT_TRUE(allclose(xt_axis0, ref_axis0, 1.e-5, 1.e-12)); - - EXPECT_THROW( - KokkosFFT::Impl::transpose(execution_space(), x, xt_axis1, - axes_type<2>({0, 1})), // xt is identical to x - std::runtime_error); -} - -TEST(Transpose, 3DLeftView) { +template +void test_transpose_3d() { + using RealView3Dtype = Kokkos::View; const int n0 = 3, n1 = 5, n2 = 8; - LeftView3D x("x", n0, n1, n2); - LeftView3D xt_axis012, xt_axis021, xt_axis102, xt_axis120, xt_axis201, + RealView3Dtype x("x", n0, n1, n2); + RealView3Dtype xt_axis012, xt_axis021, xt_axis102, xt_axis120, xt_axis201, xt_axis210; // views are allocated internally - LeftView3D ref_axis021("ref_axis021", n0, n2, n1), + RealView3Dtype ref_axis021("ref_axis021", n0, n2, n1), ref_axis102("ref_axis102", n1, n0, n2); - LeftView3D ref_axis120("ref_axis120", n1, n2, n0), + RealView3Dtype ref_axis120("ref_axis120", n1, n2, n0), ref_axis201("ref_axis201", n2, n0, n1), ref_axis210("ref_axis210", n2, n1, n0); @@ -488,77 +446,856 @@ TEST(Transpose, 3DLeftView) { EXPECT_TRUE(allclose(xt_axis210, ref_axis210, 1.e-5, 1.e-12)); } -TEST(Transpose, 3DRightView) { - const int n0 = 3, n1 = 5, n2 = 8; - RightView3D x("x", n0, n1, n2); - RightView3D xt_axis012, xt_axis021, xt_axis102, xt_axis120, - xt_axis201, xt_axis210; // views are allocated internally - RightView3D ref_axis021("ref_axis021", n0, n2, n1), - ref_axis102("ref_axis102", n1, n0, n2); - RightView3D ref_axis120("ref_axis120", n1, n2, n0), - ref_axis201("ref_axis201", n2, n0, n1), - ref_axis210("ref_axis210", n2, n1, n0); +template +void test_transpose_4d() { + using RealView4DType = Kokkos::View; + const int n0 = 2, n1 = 3, n2 = 4, n3 = 5; + constexpr std::size_t DIM = 4; + RealView4DType x("x", n0, n1, n2, n3); Kokkos::Random_XorShift64_Pool<> random_pool(12345); Kokkos::fill_random(x, random_pool, 1.0); + auto h_x = Kokkos::create_mirror_view(x); + Kokkos::deep_copy(h_x, x); + // Transposed views - auto h_x = Kokkos::create_mirror_view(x); - auto h_ref_axis021 = Kokkos::create_mirror_view(ref_axis021); - auto h_ref_axis102 = Kokkos::create_mirror_view(ref_axis102); - auto h_ref_axis120 = Kokkos::create_mirror_view(ref_axis120); - auto h_ref_axis201 = Kokkos::create_mirror_view(ref_axis201); - auto h_ref_axis210 = Kokkos::create_mirror_view(ref_axis210); + axes_type default_axes({0, 1, 2, 3}); + + std::vector > list_of_tested_axes = { + axes_type({0, 1, 2, 3}), axes_type({0, 1, 3, 2}), + axes_type({0, 2, 1, 3}), axes_type({0, 2, 3, 1}), + axes_type({0, 3, 1, 2}), axes_type({0, 3, 2, 1}), + axes_type({1, 0, 2, 3}), axes_type({1, 0, 3, 2}), + axes_type({1, 2, 0, 3}), axes_type({1, 2, 3, 0}), + axes_type({1, 3, 0, 2}), axes_type({1, 3, 2, 0}), + axes_type({2, 0, 1, 3}), axes_type({2, 0, 3, 1}), + axes_type({2, 1, 0, 3}), axes_type({2, 1, 3, 0}), + axes_type({2, 3, 0, 1}), axes_type({2, 3, 1, 0}), + axes_type({3, 0, 1, 2}), axes_type({3, 0, 2, 1}), + axes_type({3, 1, 0, 2}), axes_type({3, 1, 2, 0}), + axes_type({3, 2, 0, 1}), axes_type({3, 2, 1, 0})}; + + for (auto& tested_axes : list_of_tested_axes) { + axes_type out_extents; + auto [map, map_inv] = KokkosFFT::Impl::get_map_axes(x, tested_axes); + + // Convert to vector, need to reverse the order for LayoutLeft + std::vector _map(map.begin(), map.end()); + if (std::is_same::value) { + std::reverse(_map.begin(), _map.end()); + } + for (int i = 0; i < DIM; i++) { + out_extents.at(i) = x.extent(_map.at(i)); + } + + auto [_n0, _n1, _n2, _n3] = out_extents; + RealView4DType xt; + RealView4DType ref("ref", _n0, _n1, _n2, _n3); + + // Transposed Views + auto h_ref = Kokkos::create_mirror_view(ref); + + // Filling the transposed View + for (int i0 = 0; i0 < h_x.extent(0); i0++) { + for (int i1 = 0; i1 < h_x.extent(1); i1++) { + for (int i2 = 0; i2 < h_x.extent(2); i2++) { + for (int i3 = 0; i3 < h_x.extent(3); i3++) { + int _i0 = i0, _i1 = i1, _i2 = i2, _i3 = i3; + if (_map[0] == 1) { + _i0 = i1; + } else if (_map[0] == 2) { + _i0 = i2; + } else if (_map[0] == 3) { + _i0 = i3; + } + + if (_map[1] == 0) { + _i1 = i0; + } else if (_map[1] == 2) { + _i1 = i2; + } else if (_map[1] == 3) { + _i1 = i3; + } + + if (_map[2] == 0) { + _i2 = i0; + } else if (_map[2] == 1) { + _i2 = i1; + } else if (_map[2] == 3) { + _i2 = i3; + } + + if (_map[3] == 0) { + _i3 = i0; + } else if (_map[3] == 1) { + _i3 = i1; + } else if (_map[3] == 2) { + _i3 = i2; + } + + h_ref(_i0, _i1, _i2, _i3) = h_x(i0, i1, i2, i3); + } + } + } + } + + Kokkos::deep_copy(ref, h_ref); + Kokkos::fence(); + + if (tested_axes == default_axes) { + EXPECT_THROW( + KokkosFFT::Impl::transpose(execution_space(), x, xt, + tested_axes), // xt is identical to x + std::runtime_error); + } else { + KokkosFFT::Impl::transpose(execution_space(), x, xt, + tested_axes); // xt is the transpose of x + EXPECT_TRUE(allclose(xt, ref, 1.e-5, 1.e-12)); + } + } +} + +template +void test_transpose_5d() { + using RealView5DType = Kokkos::View; + const int n0 = 2, n1 = 3, n2 = 4, n3 = 5, n4 = 6; + constexpr std::size_t DIM = 5; + RealView5DType x("x", n0, n1, n2, n3, n4); + + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(x, random_pool, 1.0); + + auto h_x = Kokkos::create_mirror_view(x); Kokkos::deep_copy(h_x, x); - for (int i0 = 0; i0 < h_x.extent(0); i0++) { - for (int i1 = 0; i1 < h_x.extent(1); i1++) { - for (int i2 = 0; i2 < h_x.extent(2); i2++) { - h_ref_axis021(i0, i2, i1) = h_x(i0, i1, i2); - h_ref_axis102(i1, i0, i2) = h_x(i0, i1, i2); - h_ref_axis120(i1, i2, i0) = h_x(i0, i1, i2); - h_ref_axis201(i2, i0, i1) = h_x(i0, i1, i2); - h_ref_axis210(i2, i1, i0) = h_x(i0, i1, i2); + // Transposed views + axes_type default_axes({0, 1, 2, 3, 4}); + + // Randomly choosen axes + std::vector > list_of_tested_axes = { + axes_type({0, 1, 2, 3, 4}), axes_type({0, 1, 3, 2, 4}), + axes_type({0, 2, 1, 3, 4}), axes_type({0, 2, 3, 4, 1}), + axes_type({0, 3, 4, 1, 2}), axes_type({0, 3, 2, 1, 4}), + axes_type({0, 4, 3, 1, 2}), axes_type({0, 4, 2, 1, 3}), + axes_type({1, 0, 2, 3, 4}), axes_type({1, 0, 4, 3, 2}), + axes_type({1, 2, 0, 3, 4}), axes_type({1, 2, 3, 4, 0}), + axes_type({1, 3, 0, 4, 2}), axes_type({1, 3, 4, 2, 0}), + axes_type({1, 4, 0, 2, 3}), axes_type({1, 4, 3, 2, 0}), + axes_type({2, 0, 1, 3, 4}), axes_type({2, 0, 4, 3, 1}), + axes_type({2, 1, 0, 3, 4}), axes_type({2, 1, 3, 4, 0}), + axes_type({2, 3, 4, 0, 1}), axes_type({2, 3, 4, 1, 0}), + axes_type({2, 4, 3, 0, 1}), axes_type({2, 4, 1, 0, 3}), + axes_type({3, 0, 1, 2, 4}), axes_type({3, 0, 2, 4, 1}), + axes_type({3, 1, 0, 2, 4}), axes_type({3, 1, 4, 2, 0}), + axes_type({3, 2, 0, 1, 4}), axes_type({3, 2, 1, 4, 0}), + axes_type({3, 4, 0, 1, 2}), axes_type({3, 4, 1, 2, 0}), + axes_type({4, 0, 1, 2, 3}), axes_type({4, 0, 1, 3, 2}), + axes_type({4, 1, 2, 0, 3}), axes_type({4, 1, 2, 3, 0}), + axes_type({4, 2, 3, 1, 0}), axes_type({4, 2, 3, 0, 1}), + axes_type({4, 3, 1, 0, 2}), axes_type({4, 3, 2, 0, 1})}; + + for (auto& tested_axes : list_of_tested_axes) { + axes_type out_extents; + auto [map, map_inv] = KokkosFFT::Impl::get_map_axes(x, tested_axes); + + // Convert to vector, need to reverse the order for LayoutLeft + std::vector _map(map.begin(), map.end()); + if (std::is_same::value) { + std::reverse(_map.begin(), _map.end()); + } + + for (int i = 0; i < DIM; i++) { + out_extents.at(i) = x.extent(_map.at(i)); + } + + auto [_n0, _n1, _n2, _n3, _n4] = out_extents; + RealView5DType xt; + RealView5DType ref("ref", _n0, _n1, _n2, _n3, _n4); + + // Transposed Views + auto h_ref = Kokkos::create_mirror_view(ref); + + // Filling the transposed View + for (int i0 = 0; i0 < h_x.extent(0); i0++) { + for (int i1 = 0; i1 < h_x.extent(1); i1++) { + for (int i2 = 0; i2 < h_x.extent(2); i2++) { + for (int i3 = 0; i3 < h_x.extent(3); i3++) { + for (int i4 = 0; i4 < h_x.extent(4); i4++) { + int _i0 = i0, _i1 = i1, _i2 = i2, _i3 = i3, _i4 = i4; + if (_map[0] == 1) { + _i0 = i1; + } else if (_map[0] == 2) { + _i0 = i2; + } else if (_map[0] == 3) { + _i0 = i3; + } else if (_map[0] == 4) { + _i0 = i4; + } + + if (_map[1] == 0) { + _i1 = i0; + } else if (_map[1] == 2) { + _i1 = i2; + } else if (_map[1] == 3) { + _i1 = i3; + } else if (_map[1] == 4) { + _i1 = i4; + } + + if (_map[2] == 0) { + _i2 = i0; + } else if (_map[2] == 1) { + _i2 = i1; + } else if (_map[2] == 3) { + _i2 = i3; + } else if (_map[2] == 4) { + _i2 = i4; + } + + if (_map[3] == 0) { + _i3 = i0; + } else if (_map[3] == 1) { + _i3 = i1; + } else if (_map[3] == 2) { + _i3 = i2; + } else if (_map[3] == 4) { + _i3 = i4; + } + + if (_map[4] == 0) { + _i4 = i0; + } else if (_map[4] == 1) { + _i4 = i1; + } else if (_map[4] == 2) { + _i4 = i2; + } else if (_map[4] == 3) { + _i4 = i3; + } + + h_ref(_i0, _i1, _i2, _i3, _i4) = h_x(i0, i1, i2, i3, i4); + } + } + } } } + + Kokkos::deep_copy(ref, h_ref); + Kokkos::fence(); + + if (tested_axes == default_axes) { + EXPECT_THROW( + KokkosFFT::Impl::transpose(execution_space(), x, xt, + tested_axes), // xt is identical to x + std::runtime_error); + } else { + KokkosFFT::Impl::transpose(execution_space(), x, xt, + tested_axes); // xt is the transpose of x + EXPECT_TRUE(allclose(xt, ref, 1.e-5, 1.e-12)); + } } +} - Kokkos::deep_copy(ref_axis021, h_ref_axis021); - Kokkos::deep_copy(ref_axis102, h_ref_axis102); - Kokkos::deep_copy(ref_axis120, h_ref_axis120); - Kokkos::deep_copy(ref_axis201, h_ref_axis201); - Kokkos::deep_copy(ref_axis210, h_ref_axis210); +template +void test_transpose_6d() { + using RealView6DType = + Kokkos::View; + const int n0 = 2, n1 = 3, n2 = 4, n3 = 5, n4 = 6, n5 = 7; + constexpr std::size_t DIM = 6; + RealView6DType x("x", n0, n1, n2, n3, n4, n5); - Kokkos::fence(); + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(x, random_pool, 1.0); - EXPECT_THROW(KokkosFFT::Impl::transpose( - execution_space(), x, xt_axis012, - axes_type<3>({0, 1, 2})), // xt is identical to x - std::runtime_error); + auto h_x = Kokkos::create_mirror_view(x); + Kokkos::deep_copy(h_x, x); - KokkosFFT::Impl::transpose( - execution_space(), x, xt_axis021, - axes_type<3>({0, 2, 1})); // xt is the transpose of x - EXPECT_TRUE(allclose(xt_axis021, ref_axis021, 1.e-5, 1.e-12)); + // Transposed views + axes_type default_axes({0, 1, 2, 3, 4, 5}); - KokkosFFT::Impl::transpose( - execution_space(), x, xt_axis102, - axes_type<3>({1, 0, 2})); // xt is the transpose of x - EXPECT_TRUE(allclose(xt_axis102, ref_axis102, 1.e-5, 1.e-12)); + // Too much combinations, choose axes randomly + std::vector > list_of_tested_axes; - KokkosFFT::Impl::transpose( - execution_space(), x, xt_axis120, - axes_type<3>({1, 2, 0})); // xt is the transpose of x - EXPECT_TRUE(allclose(xt_axis120, ref_axis120, 1.e-5, 1.e-12)); + constexpr int nb_trials = 100; + auto rng = std::default_random_engine{}; - KokkosFFT::Impl::transpose( - execution_space(), x, xt_axis201, - axes_type<3>({2, 0, 1})); // xt is the transpose of x - EXPECT_TRUE(allclose(xt_axis201, ref_axis201, 1.e-5, 1.e-12)); + for (int i = 0; i < nb_trials; i++) { + axes_type tmp_axes = default_axes; + std::shuffle(std::begin(tmp_axes), std::end(tmp_axes), rng); + list_of_tested_axes.push_back(tmp_axes); + } - KokkosFFT::Impl::transpose( - execution_space(), x, xt_axis210, - axes_type<3>({2, 1, 0})); // xt is the transpose of x - EXPECT_TRUE(allclose(xt_axis210, ref_axis210, 1.e-5, 1.e-12)); + for (auto& tested_axes : list_of_tested_axes) { + axes_type out_extents; + auto [map, map_inv] = KokkosFFT::Impl::get_map_axes(x, tested_axes); + + // Convert to vector, need to reverse the order for LayoutLeft + std::vector _map(map.begin(), map.end()); + if (std::is_same::value) { + std::reverse(_map.begin(), _map.end()); + } + + for (int i = 0; i < DIM; i++) { + out_extents.at(i) = x.extent(_map.at(i)); + } + + auto [_n0, _n1, _n2, _n3, _n4, _n5] = out_extents; + RealView6DType xt; + RealView6DType ref("ref", _n0, _n1, _n2, _n3, _n4, _n5); + + // Transposed Views + auto h_ref = Kokkos::create_mirror_view(ref); + + // Filling the transposed View + for (int i0 = 0; i0 < h_x.extent(0); i0++) { + for (int i1 = 0; i1 < h_x.extent(1); i1++) { + for (int i2 = 0; i2 < h_x.extent(2); i2++) { + for (int i3 = 0; i3 < h_x.extent(3); i3++) { + for (int i4 = 0; i4 < h_x.extent(4); i4++) { + for (int i5 = 0; i5 < h_x.extent(5); i5++) { + int _i0 = i0, _i1 = i1, _i2 = i2, _i3 = i3, _i4 = i4, _i5 = i5; + if (_map[0] == 1) { + _i0 = i1; + } else if (_map[0] == 2) { + _i0 = i2; + } else if (_map[0] == 3) { + _i0 = i3; + } else if (_map[0] == 4) { + _i0 = i4; + } else if (_map[0] == 5) { + _i0 = i5; + } + + if (_map[1] == 0) { + _i1 = i0; + } else if (_map[1] == 2) { + _i1 = i2; + } else if (_map[1] == 3) { + _i1 = i3; + } else if (_map[1] == 4) { + _i1 = i4; + } else if (_map[1] == 5) { + _i1 = i5; + } + + if (_map[2] == 0) { + _i2 = i0; + } else if (_map[2] == 1) { + _i2 = i1; + } else if (_map[2] == 3) { + _i2 = i3; + } else if (_map[2] == 4) { + _i2 = i4; + } else if (_map[2] == 5) { + _i2 = i5; + } + + if (_map[3] == 0) { + _i3 = i0; + } else if (_map[3] == 1) { + _i3 = i1; + } else if (_map[3] == 2) { + _i3 = i2; + } else if (_map[3] == 4) { + _i3 = i4; + } else if (_map[3] == 5) { + _i3 = i5; + } + + if (_map[4] == 0) { + _i4 = i0; + } else if (_map[4] == 1) { + _i4 = i1; + } else if (_map[4] == 2) { + _i4 = i2; + } else if (_map[4] == 3) { + _i4 = i3; + } else if (_map[4] == 5) { + _i4 = i5; + } + + if (_map[5] == 0) { + _i5 = i0; + } else if (_map[5] == 1) { + _i5 = i1; + } else if (_map[5] == 2) { + _i5 = i2; + } else if (_map[5] == 3) { + _i5 = i3; + } else if (_map[5] == 4) { + _i5 = i4; + } + + h_ref(_i0, _i1, _i2, _i3, _i4, _i5) = + h_x(i0, i1, i2, i3, i4, i5); + } + } + } + } + } + } + + Kokkos::deep_copy(ref, h_ref); + Kokkos::fence(); + + if (tested_axes == default_axes) { + EXPECT_THROW( + KokkosFFT::Impl::transpose(execution_space(), x, xt, + tested_axes), // xt is identical to x + std::runtime_error); + } else { + KokkosFFT::Impl::transpose(execution_space(), x, xt, + tested_axes); // xt is the transpose of x + EXPECT_TRUE(allclose(xt, ref, 1.e-5, 1.e-12)); + } + } +} + +template +void test_transpose_7d() { + using RealView7DType = + Kokkos::View; + const int n0 = 2, n1 = 3, n2 = 4, n3 = 5, n4 = 6, n5 = 7, n6 = 8; + constexpr std::size_t DIM = 7; + RealView7DType x("x", n0, n1, n2, n3, n4, n5, n6); + + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(x, random_pool, 1.0); + + auto h_x = Kokkos::create_mirror_view(x); + Kokkos::deep_copy(h_x, x); + + // Transposed views + axes_type default_axes({0, 1, 2, 3, 4, 5, 6}); + + // Too much combinations, choose axes randomly + std::vector > list_of_tested_axes; + + constexpr int nb_trials = 100; + auto rng = std::default_random_engine{}; + + for (int i = 0; i < nb_trials; i++) { + axes_type tmp_axes = default_axes; + std::shuffle(std::begin(tmp_axes), std::end(tmp_axes), rng); + list_of_tested_axes.push_back(tmp_axes); + } + + for (auto& tested_axes : list_of_tested_axes) { + axes_type out_extents; + auto [map, map_inv] = KokkosFFT::Impl::get_map_axes(x, tested_axes); + + // Convert to vector, need to reverse the order for LayoutLeft + std::vector _map(map.begin(), map.end()); + if (std::is_same::value) { + std::reverse(_map.begin(), _map.end()); + } + + for (int i = 0; i < DIM; i++) { + out_extents.at(i) = x.extent(_map.at(i)); + } + + auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6] = out_extents; + RealView7DType xt; + RealView7DType ref("ref", _n0, _n1, _n2, _n3, _n4, _n5, _n6); + + // Transposed Views + auto h_ref = Kokkos::create_mirror_view(ref); + + // Filling the transposed View + for (int i0 = 0; i0 < h_x.extent(0); i0++) { + for (int i1 = 0; i1 < h_x.extent(1); i1++) { + for (int i2 = 0; i2 < h_x.extent(2); i2++) { + for (int i3 = 0; i3 < h_x.extent(3); i3++) { + for (int i4 = 0; i4 < h_x.extent(4); i4++) { + for (int i5 = 0; i5 < h_x.extent(5); i5++) { + for (int i6 = 0; i6 < h_x.extent(6); i6++) { + int _i0 = i0, _i1 = i1, _i2 = i2, _i3 = i3, _i4 = i4, + _i5 = i5, _i6 = i6; + if (_map[0] == 1) { + _i0 = i1; + } else if (_map[0] == 2) { + _i0 = i2; + } else if (_map[0] == 3) { + _i0 = i3; + } else if (_map[0] == 4) { + _i0 = i4; + } else if (_map[0] == 5) { + _i0 = i5; + } else if (_map[0] == 6) { + _i0 = i6; + } + + if (_map[1] == 0) { + _i1 = i0; + } else if (_map[1] == 2) { + _i1 = i2; + } else if (_map[1] == 3) { + _i1 = i3; + } else if (_map[1] == 4) { + _i1 = i4; + } else if (_map[1] == 5) { + _i1 = i5; + } else if (_map[1] == 6) { + _i1 = i6; + } + + if (_map[2] == 0) { + _i2 = i0; + } else if (_map[2] == 1) { + _i2 = i1; + } else if (_map[2] == 3) { + _i2 = i3; + } else if (_map[2] == 4) { + _i2 = i4; + } else if (_map[2] == 5) { + _i2 = i5; + } else if (_map[2] == 6) { + _i2 = i6; + } + + if (_map[3] == 0) { + _i3 = i0; + } else if (_map[3] == 1) { + _i3 = i1; + } else if (_map[3] == 2) { + _i3 = i2; + } else if (_map[3] == 4) { + _i3 = i4; + } else if (_map[3] == 5) { + _i3 = i5; + } else if (_map[3] == 6) { + _i3 = i6; + } + + if (_map[4] == 0) { + _i4 = i0; + } else if (_map[4] == 1) { + _i4 = i1; + } else if (_map[4] == 2) { + _i4 = i2; + } else if (_map[4] == 3) { + _i4 = i3; + } else if (_map[4] == 5) { + _i4 = i5; + } else if (_map[4] == 6) { + _i4 = i6; + } + + if (_map[5] == 0) { + _i5 = i0; + } else if (_map[5] == 1) { + _i5 = i1; + } else if (_map[5] == 2) { + _i5 = i2; + } else if (_map[5] == 3) { + _i5 = i3; + } else if (_map[5] == 4) { + _i5 = i4; + } else if (_map[5] == 6) { + _i5 = i6; + } + + if (_map[6] == 0) { + _i6 = i0; + } else if (_map[6] == 1) { + _i6 = i1; + } else if (_map[6] == 2) { + _i6 = i2; + } else if (_map[6] == 3) { + _i6 = i3; + } else if (_map[6] == 4) { + _i6 = i4; + } else if (_map[6] == 5) { + _i6 = i5; + } + + h_ref(_i0, _i1, _i2, _i3, _i4, _i5, _i6) = + h_x(i0, i1, i2, i3, i4, i5, i6); + } + } + } + } + } + } + } + + Kokkos::deep_copy(ref, h_ref); + Kokkos::fence(); + + if (tested_axes == default_axes) { + EXPECT_THROW( + KokkosFFT::Impl::transpose(execution_space(), x, xt, + tested_axes), // xt is identical to x + std::runtime_error); + } else { + KokkosFFT::Impl::transpose(execution_space(), x, xt, + tested_axes); // xt is the transpose of x + EXPECT_TRUE(allclose(xt, ref, 1.e-5, 1.e-12)); + } + } +} + +template +void test_transpose_8d() { + using RealView8DType = + Kokkos::View; + const int n0 = 2, n1 = 3, n2 = 4, n3 = 5, n4 = 6, n5 = 7, n6 = 8, n7 = 9; + constexpr std::size_t DIM = 8; + RealView8DType x("x", n0, n1, n2, n3, n4, n5, n6, n7); + + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(x, random_pool, 1.0); + + auto h_x = Kokkos::create_mirror_view(x); + Kokkos::deep_copy(h_x, x); + + // Transposed views + axes_type default_axes({0, 1, 2, 3, 4, 5, 6, 7}); + + // Too much combinations, choose axes randomly + std::vector > list_of_tested_axes; + + constexpr int nb_trials = 100; + auto rng = std::default_random_engine{}; + + for (int i = 0; i < nb_trials; i++) { + axes_type tmp_axes = default_axes; + std::shuffle(std::begin(tmp_axes), std::end(tmp_axes), rng); + list_of_tested_axes.push_back(tmp_axes); + } + + for (auto& tested_axes : list_of_tested_axes) { + axes_type out_extents; + auto [map, map_inv] = KokkosFFT::Impl::get_map_axes(x, tested_axes); + + // Convert to vector, need to reverse the order for LayoutLeft + std::vector _map(map.begin(), map.end()); + if (std::is_same::value) { + std::reverse(_map.begin(), _map.end()); + } + + for (int i = 0; i < DIM; i++) { + out_extents.at(i) = x.extent(_map.at(i)); + } + + auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7] = out_extents; + RealView8DType xt; + RealView8DType ref("ref", _n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7); + + // Transposed Views + auto h_ref = Kokkos::create_mirror_view(ref); + + // Filling the transposed View + for (int i0 = 0; i0 < h_x.extent(0); i0++) { + for (int i1 = 0; i1 < h_x.extent(1); i1++) { + for (int i2 = 0; i2 < h_x.extent(2); i2++) { + for (int i3 = 0; i3 < h_x.extent(3); i3++) { + for (int i4 = 0; i4 < h_x.extent(4); i4++) { + for (int i5 = 0; i5 < h_x.extent(5); i5++) { + for (int i6 = 0; i6 < h_x.extent(6); i6++) { + for (int i7 = 0; i7 < h_x.extent(7); i7++) { + int _i0 = i0, _i1 = i1, _i2 = i2, _i3 = i3, _i4 = i4, + _i5 = i5, _i6 = i6, _i7 = i7; + if (_map[0] == 1) { + _i0 = i1; + } else if (_map[0] == 2) { + _i0 = i2; + } else if (_map[0] == 3) { + _i0 = i3; + } else if (_map[0] == 4) { + _i0 = i4; + } else if (_map[0] == 5) { + _i0 = i5; + } else if (_map[0] == 6) { + _i0 = i6; + } else if (_map[0] == 7) { + _i0 = i7; + } + + if (_map[1] == 0) { + _i1 = i0; + } else if (_map[1] == 2) { + _i1 = i2; + } else if (_map[1] == 3) { + _i1 = i3; + } else if (_map[1] == 4) { + _i1 = i4; + } else if (_map[1] == 5) { + _i1 = i5; + } else if (_map[1] == 6) { + _i1 = i6; + } else if (_map[1] == 7) { + _i1 = i7; + } + + if (_map[2] == 0) { + _i2 = i0; + } else if (_map[2] == 1) { + _i2 = i1; + } else if (_map[2] == 3) { + _i2 = i3; + } else if (_map[2] == 4) { + _i2 = i4; + } else if (_map[2] == 5) { + _i2 = i5; + } else if (_map[2] == 6) { + _i2 = i6; + } else if (_map[2] == 7) { + _i2 = i7; + } + + if (_map[3] == 0) { + _i3 = i0; + } else if (_map[3] == 1) { + _i3 = i1; + } else if (_map[3] == 2) { + _i3 = i2; + } else if (_map[3] == 4) { + _i3 = i4; + } else if (_map[3] == 5) { + _i3 = i5; + } else if (_map[3] == 6) { + _i3 = i6; + } else if (_map[3] == 7) { + _i3 = i7; + } + + if (_map[4] == 0) { + _i4 = i0; + } else if (_map[4] == 1) { + _i4 = i1; + } else if (_map[4] == 2) { + _i4 = i2; + } else if (_map[4] == 3) { + _i4 = i3; + } else if (_map[4] == 5) { + _i4 = i5; + } else if (_map[4] == 6) { + _i4 = i6; + } else if (_map[4] == 7) { + _i4 = i7; + } + + if (_map[5] == 0) { + _i5 = i0; + } else if (_map[5] == 1) { + _i5 = i1; + } else if (_map[5] == 2) { + _i5 = i2; + } else if (_map[5] == 3) { + _i5 = i3; + } else if (_map[5] == 4) { + _i5 = i4; + } else if (_map[5] == 6) { + _i5 = i6; + } else if (_map[5] == 7) { + _i5 = i7; + } + + if (_map[6] == 0) { + _i6 = i0; + } else if (_map[6] == 1) { + _i6 = i1; + } else if (_map[6] == 2) { + _i6 = i2; + } else if (_map[6] == 3) { + _i6 = i3; + } else if (_map[6] == 4) { + _i6 = i4; + } else if (_map[6] == 5) { + _i6 = i5; + } else if (_map[6] == 7) { + _i6 = i7; + } + + if (_map[7] == 0) { + _i7 = i0; + } else if (_map[7] == 1) { + _i7 = i1; + } else if (_map[7] == 2) { + _i7 = i2; + } else if (_map[7] == 3) { + _i7 = i3; + } else if (_map[7] == 4) { + _i7 = i4; + } else if (_map[7] == 5) { + _i7 = i5; + } else if (_map[7] == 6) { + _i7 = i6; + } + + h_ref(_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7) = + h_x(i0, i1, i2, i3, i4, i5, i6, i7); + } + } + } + } + } + } + } + } + + Kokkos::deep_copy(ref, h_ref); + Kokkos::fence(); + + if (tested_axes == default_axes) { + EXPECT_THROW( + KokkosFFT::Impl::transpose(execution_space(), x, xt, + tested_axes), // xt is identical to x + std::runtime_error); + } else { + KokkosFFT::Impl::transpose(execution_space(), x, xt, + tested_axes); // xt is the transpose of x + EXPECT_TRUE(allclose(xt, ref, 1.e-5, 1.e-12)); + } + } +} + +TYPED_TEST(Transpose, 1DView) { + using layout_type = typename TestFixture::layout_type; + + test_transpose_1d(); +} + +TYPED_TEST(Transpose, 2DView) { + using layout_type = typename TestFixture::layout_type; + + test_transpose_2d(); +} + +TYPED_TEST(Transpose, 3DView) { + using layout_type = typename TestFixture::layout_type; + + test_transpose_3d(); +} + +TYPED_TEST(Transpose, 4DView) { + using layout_type = typename TestFixture::layout_type; + + test_transpose_4d(); +} + +TYPED_TEST(Transpose, 5DView) { + using layout_type = typename TestFixture::layout_type; + + test_transpose_5d(); +} + +TYPED_TEST(Transpose, 6DView) { + using layout_type = typename TestFixture::layout_type; + + test_transpose_6d(); +} + +TYPED_TEST(Transpose, 7DView) { + using layout_type = typename TestFixture::layout_type; + + test_transpose_7d(); +} + +TYPED_TEST(Transpose, 8DView) { + using layout_type = typename TestFixture::layout_type; + + test_transpose_8d(); } \ No newline at end of file diff --git a/common/unit_test/Test_Types.hpp b/common/unit_test/Test_Types.hpp index b973593d..0ebd33d4 100644 --- a/common/unit_test/Test_Types.hpp +++ b/common/unit_test/Test_Types.hpp @@ -9,6 +9,16 @@ template using View2D = Kokkos::View; template using View3D = Kokkos::View; +template +using View4D = Kokkos::View; +template +using View5D = Kokkos::View; +template +using View6D = Kokkos::View; +template +using View7D = Kokkos::View; +template +using View8D = Kokkos::View; // Layout Left template diff --git a/fft/unit_test/Test_Transform.cpp b/fft/unit_test/Test_Transform.cpp index e77d17c3..8b2edcdd 100644 --- a/fft/unit_test/Test_Transform.cpp +++ b/fft/unit_test/Test_Transform.cpp @@ -784,6 +784,387 @@ void test_fft1_1dfft_3dview(T atol = 1.e-12) { EXPECT_TRUE(allclose(xr_axis2, ref_xr, 1.e-5, atol)); } +template +void test_fft1_1dfft_4dview(T atol = 1.e-12) { + const int n0 = 10, n1 = 12, n2 = 8, n3 = 5; + using RealView4DType = Kokkos::View; + using ComplexView4DType = + Kokkos::View****, LayoutType, execution_space>; + + ComplexView4DType x("x", n0, n1, n2, n3), ref_x("ref_x", n0, n1, n2, n3); + ComplexView4DType x_axis0("x_axis0", n0, n1, n2, n3), + x_axis1("x_axis1", n0, n1, n2, n3), x_axis2("x_axis2", n0, n1, n2, n3), + x_axis3("x_axis3", n0, n1, n2, n3); + ComplexView4DType out_axis0("out_axis0", n0, n1, n2, n3), + ref_out_axis0("ref_out_axis0", n0, n1, n2, n3); + ComplexView4DType out_axis1("out_axis1", n0, n1, n2, n3), + ref_out_axis1("ref_out_axis1", n0, n1, n2, n3); + ComplexView4DType out_axis2("out_axis2", n0, n1, n2, n3), + ref_out_axis2("ref_out_axis2", n0, n1, n2, n3); + ComplexView4DType out_axis3("out_axis3", n0, n1, n2, n3), + ref_out_axis3("ref_out_axis3", n0, n1, n2, n3); + + RealView4DType xr("xr", n0, n1, n2, n3), ref_xr("ref_xr", n0, n1, n2, n3); + RealView4DType xr_axis0("xr_axis0", n0, n1, n2, n3), + xr_axis1("xr_axis1", n0, n1, n2, n3), + xr_axis2("xr_axis2", n0, n1, n2, n3), + xr_axis3("xr_axis3", n0, n1, n2, n3); + ComplexView4DType outr_axis0("outr_axis0", n0 / 2 + 1, n1, n2, n3), + outr_axis1("outr_axis1", n0, n1 / 2 + 1, n2, n3), + outr_axis2("outr_axis2", n0, n1, n2 / 2 + 1, n3), + outr_axis3("outr_axis3", n0, n1, n2, n3 / 2 + 1); + + const Kokkos::complex I(1.0, 1.0); + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(x, random_pool, I); + Kokkos::fill_random(xr, random_pool, 1); + + // Since HIP FFT destructs the input data, we need to keep the input data in + // different place + Kokkos::deep_copy(ref_x, x); + Kokkos::deep_copy(ref_xr, xr); + + Kokkos::fence(); + + // Along axis 0 (transpose neeed) + // Perform batched 1D (along 0th axis) FFT sequentially + for (int i3 = 0; i3 < n3; i3++) { + for (int i2 = 0; i2 < n2; i2++) { + for (int i1 = 0; i1 < n1; i1++) { + auto sub_x = Kokkos::subview(x, Kokkos::ALL, i1, i2, i3); + auto sub_ref = Kokkos::subview(ref_out_axis0, Kokkos::ALL, i1, i2, i3); + fft1(sub_x, sub_ref); + } + } + } + + KokkosFFT::fft(execution_space(), x, out_axis0, + KokkosFFT::Normalization::BACKWARD, /*axis=*/0); + EXPECT_TRUE(allclose(out_axis0, ref_out_axis0, 1.e-5, atol)); + + KokkosFFT::ifft(execution_space(), out_axis0, x_axis0, + KokkosFFT::Normalization::BACKWARD, /*axis=*/0); + EXPECT_TRUE(allclose(x_axis0, ref_x, 1.e-5, atol)); + + // Simple identity tests for r2c and c2r transforms + KokkosFFT::rfft(execution_space(), xr, outr_axis0, + KokkosFFT::Normalization::BACKWARD, /*axis=*/0); + KokkosFFT::irfft(execution_space(), outr_axis0, xr_axis0, + KokkosFFT::Normalization::BACKWARD, /*axis=*/0); + + EXPECT_TRUE(allclose(xr_axis0, ref_xr, 1.e-5, atol)); + + // Recover input from reference + Kokkos::deep_copy(x, ref_x); + Kokkos::deep_copy(xr, ref_xr); + + // Along axis 1 (transpose neeed) + // Perform batched 1D (along 1st axis) FFT sequentially + for (int i3 = 0; i3 < n3; i3++) { + for (int i2 = 0; i2 < n2; i2++) { + for (int i0 = 0; i0 < n0; i0++) { + auto sub_x = Kokkos::subview(x, i0, Kokkos::ALL, i2, i3); + auto sub_ref = Kokkos::subview(ref_out_axis1, i0, Kokkos::ALL, i2, i3); + fft1(sub_x, sub_ref); + } + } + } + + KokkosFFT::fft(execution_space(), x, out_axis1, + KokkosFFT::Normalization::BACKWARD, /*axis=*/1); + EXPECT_TRUE(allclose(out_axis1, ref_out_axis1, 1.e-5, atol)); + + KokkosFFT::ifft(execution_space(), out_axis1, x_axis1, + KokkosFFT::Normalization::BACKWARD, /*axis=*/1); + EXPECT_TRUE(allclose(x_axis1, ref_x, 1.e-5, atol)); + + // Simple identity tests for r2c and c2r transforms + KokkosFFT::rfft(execution_space(), xr, outr_axis1, + KokkosFFT::Normalization::BACKWARD, /*axis=*/1); + KokkosFFT::irfft(execution_space(), outr_axis1, xr_axis1, + KokkosFFT::Normalization::BACKWARD, /*axis=*/1); + + EXPECT_TRUE(allclose(xr_axis1, ref_xr, 1.e-5, atol)); + + // Recover input from reference + Kokkos::deep_copy(x, ref_x); + Kokkos::deep_copy(xr, ref_xr); + + // Along axis 2 + // Perform batched 1D (along 2nd axis) FFT sequentially + for (int i3 = 0; i3 < n3; i3++) { + for (int i1 = 0; i1 < n1; i1++) { + for (int i0 = 0; i0 < n0; i0++) { + auto sub_x = Kokkos::subview(x, i0, i1, Kokkos::ALL, i3); + auto sub_ref = Kokkos::subview(ref_out_axis2, i0, i1, Kokkos::ALL, i3); + fft1(sub_x, sub_ref); + } + } + } + + KokkosFFT::fft(execution_space(), x, out_axis2, + KokkosFFT::Normalization::BACKWARD, /*axis=*/2); + EXPECT_TRUE(allclose(out_axis2, ref_out_axis2, 1.e-5, atol)); + + KokkosFFT::ifft(execution_space(), out_axis2, x_axis2, + KokkosFFT::Normalization::BACKWARD, /*axis=*/2); + EXPECT_TRUE(allclose(x_axis2, ref_x, 1.e-5, atol)); + + // Simple identity tests for r2c and c2r transforms + KokkosFFT::rfft(execution_space(), xr, outr_axis2, + KokkosFFT::Normalization::BACKWARD, /*axis=*/2); + KokkosFFT::irfft(execution_space(), outr_axis2, xr_axis2, + KokkosFFT::Normalization::BACKWARD, /*axis=*/2); + + EXPECT_TRUE(allclose(xr_axis2, ref_xr, 1.e-5, atol)); + + // Recover input from reference + Kokkos::deep_copy(x, ref_x); + Kokkos::deep_copy(xr, ref_xr); + + // Along axis 3 + // Perform batched 1D (along 3rd axis) FFT sequentially + for (int i2 = 0; i2 < n2; i2++) { + for (int i1 = 0; i1 < n1; i1++) { + for (int i0 = 0; i0 < n0; i0++) { + auto sub_x = Kokkos::subview(x, i0, i1, i2, Kokkos::ALL); + auto sub_ref = Kokkos::subview(ref_out_axis3, i0, i1, i2, Kokkos::ALL); + fft1(sub_x, sub_ref); + } + } + } + + KokkosFFT::fft(execution_space(), x, out_axis3, + KokkosFFT::Normalization::BACKWARD, /*axis=*/3); + EXPECT_TRUE(allclose(out_axis3, ref_out_axis3, 1.e-5, atol)); + + KokkosFFT::ifft(execution_space(), out_axis3, x_axis3, + KokkosFFT::Normalization::BACKWARD, /*axis=*/3); + EXPECT_TRUE(allclose(x_axis3, ref_x, 1.e-5, atol)); + + // Simple identity tests for r2c and c2r transforms + KokkosFFT::rfft(execution_space(), xr, outr_axis3, + KokkosFFT::Normalization::BACKWARD, /*axis=*/3); + KokkosFFT::irfft(execution_space(), outr_axis3, xr_axis3, + KokkosFFT::Normalization::BACKWARD, /*axis=*/3); + + EXPECT_TRUE(allclose(xr_axis3, ref_xr, 1.e-5, atol)); +} + +template +void test_fft1_1dfft_5dview(T atol = 1.e-12) { + const int n0 = 10, n1 = 6, n2 = 8, n3 = 5, n4 = 4; + using RealView5DType = Kokkos::View; + using ComplexView5DType = + Kokkos::View*****, LayoutType, execution_space>; + + constexpr int DIM = 5; + std::array shape = {n0, n1, n2, n3, n4}; + ComplexView5DType x("x", n0, n1, n2, n3, n4), + ref_x("ref_x", n0, n1, n2, n3, n4); + + for (int axis = 0; axis < DIM; axis++) { + std::array shape_c2r = shape; + shape_c2r.at(axis) = shape_c2r.at(axis) / 2 + 1; + auto [_n0, _n1, _n2, _n3, _n4] = shape_c2r; + ComplexView5DType _x("_x", n0, n1, n2, n3, n4), + out("out", n0, n1, n2, n3, n4), ref_out("ref_out", n0, n1, n2, n3, n4); + RealView5DType xr("xr", n0, n1, n2, n3, n4), + ref_xr("ref_xr", n0, n1, n2, n3, n4), _xr("_xr", n0, n1, n2, n3, n4); + ComplexView5DType outr("outr", _n0, _n1, _n2, _n3, _n4); + + const Kokkos::complex I(1.0, 1.0); + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(x, random_pool, I); + Kokkos::fill_random(xr, random_pool, 1); + + // Since HIP FFT destructs the input data, we need to keep the input data in + // different place + Kokkos::deep_copy(ref_x, x); + Kokkos::deep_copy(ref_xr, xr); + + Kokkos::fence(); + + // Along one axis + // Simple identity tests + KokkosFFT::fft(execution_space(), x, out, + KokkosFFT::Normalization::BACKWARD, axis); + KokkosFFT::ifft(execution_space(), out, _x, + KokkosFFT::Normalization::BACKWARD, axis); + EXPECT_TRUE(allclose(_x, ref_x, 1.e-5, atol)); + + // Simple identity tests for r2c and c2r transforms + KokkosFFT::rfft(execution_space(), xr, outr, + KokkosFFT::Normalization::BACKWARD, axis); + KokkosFFT::irfft(execution_space(), outr, _xr, + KokkosFFT::Normalization::BACKWARD, axis); + + EXPECT_TRUE(allclose(_xr, ref_xr, 1.e-5, atol)); + } +} + +template +void test_fft1_1dfft_6dview(T atol = 1.e-12) { + const int n0 = 10, n1 = 6, n2 = 8, n3 = 5, n4 = 4, n5 = 3; + using RealView6DType = Kokkos::View; + using ComplexView6DType = + Kokkos::View******, LayoutType, execution_space>; + + constexpr int DIM = 6; + std::array shape = {n0, n1, n2, n3, n4, n5}; + ComplexView6DType x("x", n0, n1, n2, n3, n4, n5), + ref_x("ref_x", n0, n1, n2, n3, n4, n5); + + for (int axis = 0; axis < DIM; axis++) { + std::array shape_c2r = shape; + shape_c2r.at(axis) = shape_c2r.at(axis) / 2 + 1; + auto [_n0, _n1, _n2, _n3, _n4, _n5] = shape_c2r; + ComplexView6DType _x("_x", n0, n1, n2, n3, n4, n5), + out("out", n0, n1, n2, n3, n4, n5), + ref_out("ref_out", n0, n1, n2, n3, n4, n5); + RealView6DType xr("xr", n0, n1, n2, n3, n4, n5), + ref_xr("ref_xr", n0, n1, n2, n3, n4, n5), + _xr("_xr", n0, n1, n2, n3, n4, n5); + ComplexView6DType outr("outr", _n0, _n1, _n2, _n3, _n4, _n5); + + const Kokkos::complex I(1.0, 1.0); + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(x, random_pool, I); + Kokkos::fill_random(xr, random_pool, 1); + + // Since HIP FFT destructs the input data, we need to keep the input data in + // different place + Kokkos::deep_copy(ref_x, x); + Kokkos::deep_copy(ref_xr, xr); + + Kokkos::fence(); + + // Along one axis + // Simple identity tests + KokkosFFT::fft(execution_space(), x, out, + KokkosFFT::Normalization::BACKWARD, axis); + KokkosFFT::ifft(execution_space(), out, _x, + KokkosFFT::Normalization::BACKWARD, axis); + EXPECT_TRUE(allclose(_x, ref_x, 1.e-5, atol)); + + // Simple identity tests for r2c and c2r transforms + KokkosFFT::rfft(execution_space(), xr, outr, + KokkosFFT::Normalization::BACKWARD, axis); + KokkosFFT::irfft(execution_space(), outr, _xr, + KokkosFFT::Normalization::BACKWARD, axis); + + EXPECT_TRUE(allclose(_xr, ref_xr, 1.e-5, atol)); + } +} + +template +void test_fft1_1dfft_7dview(T atol = 1.e-12) { + const int n0 = 2, n1 = 6, n2 = 8, n3 = 5, n4 = 4, n5 = 3, n6 = 4; + using RealView7DType = Kokkos::View; + using ComplexView7DType = + Kokkos::View*******, LayoutType, execution_space>; + + constexpr int DIM = 7; + std::array shape = {n0, n1, n2, n3, n4, n5, n6}; + ComplexView7DType x("x", n0, n1, n2, n3, n4, n5, n6), + ref_x("ref_x", n0, n1, n2, n3, n4, n5, n6); + + for (int axis = 0; axis < DIM; axis++) { + std::array shape_c2r = shape; + shape_c2r.at(axis) = shape_c2r.at(axis) / 2 + 1; + auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6] = shape_c2r; + ComplexView7DType _x("_x", n0, n1, n2, n3, n4, n5, n6), + out("out", n0, n1, n2, n3, n4, n5, n6), + ref_out("ref_out", n0, n1, n2, n3, n4, n5, n6); + RealView7DType xr("xr", n0, n1, n2, n3, n4, n5, n6), + ref_xr("ref_xr", n0, n1, n2, n3, n4, n5, n6), + _xr("_xr", n0, n1, n2, n3, n4, n5, n6); + ComplexView7DType outr("outr", _n0, _n1, _n2, _n3, _n4, _n5, _n6); + + const Kokkos::complex I(1.0, 1.0); + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(x, random_pool, I); + Kokkos::fill_random(xr, random_pool, 1); + + // Since HIP FFT destructs the input data, we need to keep the input data in + // different place + Kokkos::deep_copy(ref_x, x); + Kokkos::deep_copy(ref_xr, xr); + + Kokkos::fence(); + + // Along one axis + // Simple identity tests + KokkosFFT::fft(execution_space(), x, out, + KokkosFFT::Normalization::BACKWARD, axis); + KokkosFFT::ifft(execution_space(), out, _x, + KokkosFFT::Normalization::BACKWARD, axis); + EXPECT_TRUE(allclose(_x, ref_x, 1.e-5, atol)); + + // Simple identity tests for r2c and c2r transforms + KokkosFFT::rfft(execution_space(), xr, outr, + KokkosFFT::Normalization::BACKWARD, axis); + KokkosFFT::irfft(execution_space(), outr, _xr, + KokkosFFT::Normalization::BACKWARD, axis); + + EXPECT_TRUE(allclose(_xr, ref_xr, 1.e-5, atol)); + } +} + +template +void test_fft1_1dfft_8dview(T atol = 1.e-12) { + const int n0 = 2, n1 = 6, n2 = 8, n3 = 5, n4 = 4, n5 = 3, n6 = 4, n7 = 3; + using RealView8DType = Kokkos::View; + using ComplexView8DType = + Kokkos::View********, LayoutType, execution_space>; + + constexpr int DIM = 8; + std::array shape = {n0, n1, n2, n3, n4, n5, n6, n7}; + ComplexView8DType x("x", n0, n1, n2, n3, n4, n5, n6, n7), + ref_x("ref_x", n0, n1, n2, n3, n4, n5, n6, n7); + + for (int axis = 0; axis < DIM; axis++) { + std::array shape_c2r = shape; + shape_c2r.at(axis) = shape_c2r.at(axis) / 2 + 1; + auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7] = shape_c2r; + ComplexView8DType _x("_x", n0, n1, n2, n3, n4, n5, n6, n7), + out("out", n0, n1, n2, n3, n4, n5, n6, n7), + ref_out("ref_out", n0, n1, n2, n3, n4, n5, n6, n7); + RealView8DType xr("xr", n0, n1, n2, n3, n4, n5, n6, n7), + ref_xr("ref_xr", n0, n1, n2, n3, n4, n5, n6, n7), + _xr("_xr", n0, n1, n2, n3, n4, n5, n6, n7); + ComplexView8DType outr("outr", _n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7); + + const Kokkos::complex I(1.0, 1.0); + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::fill_random(x, random_pool, I); + Kokkos::fill_random(xr, random_pool, 1); + + // Since HIP FFT destructs the input data, we need to keep the input data in + // different place + Kokkos::deep_copy(ref_x, x); + Kokkos::deep_copy(ref_xr, xr); + + Kokkos::fence(); + + // Along one axis + // Simple identity tests + KokkosFFT::fft(execution_space(), x, out, + KokkosFFT::Normalization::BACKWARD, axis); + KokkosFFT::ifft(execution_space(), out, _x, + KokkosFFT::Normalization::BACKWARD, axis); + EXPECT_TRUE(allclose(_x, ref_x, 1.e-5, atol)); + + // Simple identity tests for r2c and c2r transforms + KokkosFFT::rfft(execution_space(), xr, outr, + KokkosFFT::Normalization::BACKWARD, axis); + KokkosFFT::irfft(execution_space(), outr, _xr, + KokkosFFT::Normalization::BACKWARD, axis); + + EXPECT_TRUE(allclose(_xr, ref_xr, 1.e-5, atol)); + } +} + // Identity tests on 1D Views TYPED_TEST(FFT1D, Identity_1DView) { using float_type = typename TestFixture::float_type; @@ -852,6 +1233,51 @@ TYPED_TEST(FFT1D, FFT_batched_3DView) { test_fft1_1dfft_3dview(atol); } +// batced fft1 on 4D Views +TYPED_TEST(FFT1D, FFT_batched_4DView) { + using float_type = typename TestFixture::float_type; + using layout_type = typename TestFixture::layout_type; + + float_type atol = std::is_same_v ? 1.0e-6 : 1.0e-12; + test_fft1_1dfft_4dview(atol); +} + +// batced fft1 on 5D Views +TYPED_TEST(FFT1D, FFT_batched_5DView) { + using float_type = typename TestFixture::float_type; + using layout_type = typename TestFixture::layout_type; + + float_type atol = std::is_same_v ? 1.0e-6 : 1.0e-12; + test_fft1_1dfft_5dview(atol); +} + +// batced fft1 on 6D Views +TYPED_TEST(FFT1D, FFT_batched_6DView) { + using float_type = typename TestFixture::float_type; + using layout_type = typename TestFixture::layout_type; + + float_type atol = std::is_same_v ? 1.0e-6 : 1.0e-12; + test_fft1_1dfft_6dview(atol); +} + +// batced fft1 on 7D Views +TYPED_TEST(FFT1D, FFT_batched_7DView) { + using float_type = typename TestFixture::float_type; + using layout_type = typename TestFixture::layout_type; + + float_type atol = std::is_same_v ? 1.0e-6 : 1.0e-12; + test_fft1_1dfft_7dview(atol); +} + +// batced fft1 on 8D Views +TYPED_TEST(FFT1D, FFT_batched_8DView) { + using float_type = typename TestFixture::float_type; + using layout_type = typename TestFixture::layout_type; + + float_type atol = std::is_same_v ? 1.0e-6 : 1.0e-12; + test_fft1_1dfft_8dview(atol); +} + // Tests for FFT2 template void test_fft2_2dfft_2dview() {