diff --git a/common/src/KokkosFFT_padding.hpp b/common/src/KokkosFFT_padding.hpp index 1252b372..b20ec9d5 100644 --- a/common/src/KokkosFFT_padding.hpp +++ b/common/src/KokkosFFT_padding.hpp @@ -80,11 +80,11 @@ auto is_crop_or_pad_needed(const ViewType& view, return not_same; } -template -void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, - ViewType& out, shape_type<1> s) { +template +void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, + OutViewType& out, shape_type<1> s) { auto _n0 = s.at(0); - out = ViewType("out", _n0); + out = OutViewType("out", _n0); auto n0 = std::min(_n0, in.extent(0)); @@ -94,13 +94,13 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, KOKKOS_LAMBDA(int i0) { out(i0) = in(i0); }); } -template -void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, - ViewType& out, shape_type<2> s) { +template +void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, + OutViewType& out, shape_type<2> s) { constexpr std::size_t DIM = 2; auto [_n0, _n1] = s; - out = ViewType("out", _n0, _n1); + out = OutViewType("out", _n0, _n1); int n0 = std::min(_n0, in.extent(0)); int n1 = std::min(_n1, in.extent(1)); @@ -119,13 +119,13 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, range, KOKKOS_LAMBDA(int i0, int i1) { out(i0, i1) = in(i0, i1); }); } -template -void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, - ViewType& out, shape_type<3> s) { +template +void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, + OutViewType& out, shape_type<3> s) { constexpr std::size_t DIM = 3; auto [_n0, _n1, _n2] = s; - out = ViewType("out", _n0, _n1, _n2); + out = OutViewType("out", _n0, _n1, _n2); int n0 = std::min(_n0, in.extent(0)); int n1 = std::min(_n1, in.extent(1)); @@ -148,13 +148,13 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, }); } -template -void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, - ViewType& out, shape_type<4> s) { +template +void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, + OutViewType& out, shape_type<4> s) { constexpr std::size_t DIM = 4; auto [_n0, _n1, _n2, _n3] = s; - out = ViewType("out", _n0, _n1, _n2, _n3); + out = OutViewType("out", _n0, _n1, _n2, _n3); int n0 = std::min(_n0, in.extent(0)); int n1 = std::min(_n1, in.extent(1)); @@ -178,13 +178,13 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, }); } -template -void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, - ViewType& out, shape_type<5> s) { +template +void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, + OutViewType& out, shape_type<5> s) { constexpr std::size_t DIM = 5; auto [_n0, _n1, _n2, _n3, _n4] = s; - out = ViewType("out", _n0, _n1, _n2, _n3, _n4); + out = OutViewType("out", _n0, _n1, _n2, _n3, _n4); int n0 = std::min(_n0, in.extent(0)); int n1 = std::min(_n1, in.extent(1)); @@ -209,13 +209,13 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, }); } -template -void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, - ViewType& out, shape_type<6> s) { +template +void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, + OutViewType& out, shape_type<6> s) { constexpr std::size_t DIM = 6; auto [_n0, _n1, _n2, _n3, _n4, _n5] = s; - out = ViewType("out", _n0, _n1, _n2, _n3, _n4, _n5); + out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5); int n0 = std::min(_n0, in.extent(0)); int n1 = std::min(_n1, in.extent(1)); @@ -242,13 +242,13 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, }); } -template -void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, - ViewType& out, shape_type<7> s) { +template +void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, + OutViewType& out, shape_type<7> s) { constexpr std::size_t DIM = 6; auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6] = s; - out = ViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6); + out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6); int n0 = std::min(_n0, in.extent(0)); int n1 = std::min(_n1, in.extent(1)); @@ -278,13 +278,13 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, }); } -template -void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, - ViewType& out, shape_type<8> s) { +template +void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, + OutViewType& out, shape_type<8> s) { constexpr std::size_t DIM = 6; auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7] = s; - out = ViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7); + out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7); int n0 = std::min(_n0, in.extent(0)); int n1 = std::min(_n1, in.extent(1)); @@ -318,10 +318,14 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, }); } -template -void crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, - ViewType& out, shape_type s) { - static_assert(ViewType::rank() == DIM, +template +void crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in, + OutViewType& out, shape_type s) { + static_assert(InViewType::rank() == DIM, + "crop_or_pad: Rank of View must be equal to Rank " + "of extended shape."); + static_assert(OutViewType::rank() == DIM, "crop_or_pad: Rank of View must be equal to Rank " "of extended shape."); _crop_or_pad(exec_space, in, out, s); @@ -329,4 +333,4 @@ void crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in, } // namespace Impl } // namespace KokkosFFT -#endif \ No newline at end of file +#endif diff --git a/common/src/KokkosFFT_transpose.hpp b/common/src/KokkosFFT_transpose.hpp index 3bef843d..18217308 100644 --- a/common/src/KokkosFFT_transpose.hpp +++ b/common/src/KokkosFFT_transpose.hpp @@ -86,6 +86,31 @@ auto get_map_axes(const ViewType& view, int axis) { return get_map_axes(view, axis_type<1>({axis})); } +template +void _prep_transpose_view(InViewType& in, OutViewType& out, + axis_type _map) { + constexpr std::size_t rank = OutViewType::rank(); + + // Assign a View if not a shallow copy + bool is_out_view_ready = true; + std::array out_extents; + for (int i = 0; i < rank; i++) { + out_extents.at(i) = in.extent(_map.at(i)); + if (out_extents.at(i) != out.extent(i)) { + is_out_view_ready = false; + } + } + + if (!is_out_view_ready) { + if constexpr (!OutViewType::memory_traits::is_unmanaged) { + KokkosFFT::Impl::create_view(out, "out", out_extents); + } else { + // try to reshape out if it currently has enough memory available + KokkosFFT::Impl::reshape_view(out, out_extents); + } + } +} + template = nullptr> void _transpose(const ExecutionSpace& exec_space, InViewType& in, @@ -110,19 +135,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, // [TO DO] Choose optimal tile sizes for each device ); - bool is_out_view_ready = true; - std::array out_extents; - for (int i = 0; i < rank; i++) { - out_extents.at(i) = in.extent(_map.at(i)); - if (out_extents.at(i) != out.extent(i)) { - is_out_view_ready = false; - } - } - - if (!is_out_view_ready) { - auto [_n0, _n1] = out_extents; - out = OutViewType("out", _n0, _n1); - } + _prep_transpose_view(in, out, _map); Kokkos::parallel_for( range, KOKKOS_LAMBDA(int i0, int i1) { out(i1, i0) = in(i0, i1); }); @@ -148,20 +161,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, // [TO DO] Choose optimal tile sizes for each device ); - // Assign a View if not a shallow copy - bool is_out_view_ready = true; - std::array out_extents; - for (int i = 0; i < rank; i++) { - out_extents.at(i) = in.extent(_map.at(i)); - if (out_extents.at(i) != out.extent(i)) { - is_out_view_ready = false; - } - } - - if (!is_out_view_ready) { - auto [_n0, _n1, _n2] = out_extents; - out = OutViewType("out", _n0, _n1, _n2); - } + _prep_transpose_view(in, out, _map); Kokkos::Array map = {_map[0], _map[1], _map[2]}; Kokkos::parallel_for( @@ -196,20 +196,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, // [TO DO] Choose optimal tile sizes for each device ); - // Assign a View if not a shallow copy - bool is_out_view_ready = true; - std::array out_extents; - for (int i = 0; i < rank; i++) { - out_extents.at(i) = in.extent(_map.at(i)); - if (out_extents.at(i) != out.extent(i)) { - is_out_view_ready = false; - } - } - - if (!is_out_view_ready) { - auto [_n0, _n1, _n2, _n3] = out_extents; - out = OutViewType("out", _n0, _n1, _n2, _n3); - } + _prep_transpose_view(in, out, _map); Kokkos::Array map = {_map[0], _map[1], _map[2], _map[3]}; Kokkos::parallel_for( @@ -246,20 +233,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, // [TO DO] Choose optimal tile sizes for each device ); - // Assign a View if not a shallow copy - bool is_out_view_ready = true; - std::array out_extents; - for (int i = 0; i < rank; i++) { - out_extents.at(i) = in.extent(_map.at(i)); - if (out_extents.at(i) != out.extent(i)) { - is_out_view_ready = false; - } - } - - if (!is_out_view_ready) { - auto [_n0, _n1, _n2, _n3, _n4] = out_extents; - out = OutViewType("out", _n0, _n1, _n2, _n3, _n4); - } + _prep_transpose_view(in, out, _map); Kokkos::Array map = {_map[0], _map[1], _map[2], _map[3], _map[4]}; Kokkos::parallel_for( @@ -298,20 +272,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, // [TO DO] Choose optimal tile sizes for each device ); - // Assign a View if not a shallow copy - bool is_out_view_ready = true; - std::array out_extents; - for (int i = 0; i < rank; i++) { - out_extents.at(i) = in.extent(_map.at(i)); - if (out_extents.at(i) != out.extent(i)) { - is_out_view_ready = false; - } - } - - if (!is_out_view_ready) { - auto [_n0, _n1, _n2, _n3, _n4, _n5] = out_extents; - out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5); - } + _prep_transpose_view(in, out, _map); Kokkos::Array map = {_map[0], _map[1], _map[2], _map[3], _map[4], _map[5]}; @@ -352,20 +313,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, // [TO DO] Choose optimal tile sizes for each device ); - // Assign a View if not a shallow copy - bool is_out_view_ready = true; - std::array out_extents; - for (int i = 0; i < rank; i++) { - out_extents.at(i) = in.extent(_map.at(i)); - if (out_extents.at(i) != out.extent(i)) { - is_out_view_ready = false; - } - } - - if (!is_out_view_ready) { - auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6] = out_extents; - out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6); - } + _prep_transpose_view(in, out, _map); Kokkos::Array map = {_map[0], _map[1], _map[2], _map[3], _map[4], _map[5], _map[6]}; @@ -412,20 +360,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in, // [TO DO] Choose optimal tile sizes for each device ); - // Assign a View if not a shallow copy - bool is_out_view_ready = true; - std::array out_extents; - for (int i = 0; i < rank; i++) { - out_extents.at(i) = in.extent(_map.at(i)); - if (out_extents.at(i) != out.extent(i)) { - is_out_view_ready = false; - } - } - - if (!is_out_view_ready) { - auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7] = out_extents; - out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7); - } + _prep_transpose_view(in, out, _map); Kokkos::Array map = {_map[0], _map[1], _map[2], _map[3], _map[4], _map[5], _map[6], _map[7]}; @@ -499,4 +434,4 @@ void transpose(const ExecutionSpace& exec_space, InViewType& in, } // namespace Impl } // namespace KokkosFFT -#endif \ No newline at end of file +#endif diff --git a/common/src/KokkosFFT_utils.hpp b/common/src/KokkosFFT_utils.hpp index b26b75a3..0661baab 100644 --- a/common/src/KokkosFFT_utils.hpp +++ b/common/src/KokkosFFT_utils.hpp @@ -23,6 +23,14 @@ struct real_type> { using type = T; }; +template +struct managable_view_type { + using type = Kokkos::View>; +}; + template using real_type_t = typename real_type::type; @@ -199,7 +207,150 @@ auto extract_extents(const ViewType& view) { return extents; } +template +void create_view(ViewType& out, const Label& label, + const std::array& extents) { + out = ViewType(label, extents[0]); +} + +template +void create_view(ViewType& out, const Label& label, + const std::array& extents) { + out = ViewType(label, extents[0], extents[1]); +} + +template +void create_view(ViewType& out, const Label& label, + const std::array& extents) { + out = ViewType(label, extents[0], extents[1], extents[2]); +} + +template +void create_view(ViewType& out, const Label& label, + const std::array& extents) { + out = ViewType(label, extents[0], extents[1], extents[2], extents[3]); +} + +template +void create_view(ViewType& out, const Label& label, + const std::array& extents) { + out = ViewType(label, extents[0], extents[1], extents[2], extents[3], + extents[4]); +} + +template +void create_view(ViewType& out, const Label& label, + const std::array& extents) { + out = ViewType(label, extents[0], extents[1], extents[2], extents[3], + extents[4], extents[5]); +} + +template +void create_view(ViewType& out, const Label& label, + const std::array& extents) { + out = ViewType(label, extents[0], extents[1], extents[2], extents[3], + extents[4], extents[5], extents[6]); +} + +template +void create_view(ViewType& out, const Label& label, + const std::array& extents) { + out = ViewType(label, extents[0], extents[1], extents[2], extents[3], + extents[4], extents[5], extents[6], extents[7]); +} + +template +void reshape_view(ViewType& out, const std::array& extents) { + if (ViewType::required_allocation_size(out.layout()) >= + ViewType::required_allocation_size(extents[0])) { + out = ViewType(out.data(), extents[0]); + } else { + throw std::runtime_error("reshape_view: insufficient memory"); + } +} + +template +void reshape_view(ViewType& out, const std::array& extents) { + if (ViewType::required_allocation_size(out.layout()) >= + ViewType::required_allocation_size(extents[0], extents[1])) { + out = ViewType(out.data(), extents[0], extents[1]); + } else { + throw std::runtime_error("reshape_view: insufficient memory"); + } +} + +template +void reshape_view(ViewType& out, const std::array& extents) { + if (ViewType::required_allocation_size(out.layout()) >= + ViewType::required_allocation_size(extents[0], extents[1], extents[2])) { + out = ViewType(out.data(), extents[0], extents[1], extents[2]); + } else { + throw std::runtime_error("reshape_view: insufficient memory"); + } +} + +template +void reshape_view(ViewType& out, const std::array& extents) { + if (ViewType::required_allocation_size(out.layout()) >= + ViewType::required_allocation_size(extents[0], extents[1], extents[2], + extents[3])) { + out = ViewType(out.data(), extents[0], extents[1], extents[2], extents[3]); + } else { + throw std::runtime_error("reshape_view: insufficient memory"); + } +} + +template +void reshape_view(ViewType& out, const std::array& extents) { + if (ViewType::required_allocation_size(out.layout()) >= + ViewType::required_allocation_size(extents[0], extents[1], extents[2], + extents[3], extents[4])) { + out = ViewType(out.data(), extents[0], extents[1], extents[2], extents[3], + extents[4]); + } else { + throw std::runtime_error("reshape_view: insufficient memory"); + } +} + +template +void reshape_view(ViewType& out, const std::array& extents) { + if (ViewType::required_allocation_size(out.layout()) >= + ViewType::required_allocation_size(extents[0], extents[1], extents[2], + extents[3], extents[4], extents[5])) { + out = ViewType(out.data(), extents[0], extents[1], extents[2], extents[3], + extents[4], extents[5]); + } else { + throw std::runtime_error("reshape_view: insufficient memory"); + } +} + +template +void reshape_view(ViewType& out, const std::array& extents) { + if (ViewType::required_allocation_size(out.layout()) >= + ViewType::required_allocation_size(extents[0], extents[1], extents[2], + extents[3], extents[4], extents[5], + extents[6])) { + out = ViewType(out.data(), extents[0], extents[1], extents[2], extents[3], + extents[4], extents[5], extents[6]); + } else { + throw std::runtime_error("reshape_view: insufficient memory"); + } +} + +template +void reshape_view(ViewType& out, const std::array& extents) { + if (ViewType::required_allocation_size(out.layout()) >= + ViewType::required_allocation_size(extents[0], extents[1], extents[2], + extents[3], extents[4], extents[5], + extents[6], extents[7])) { + out = ViewType(out.data(), extents[0], extents[1], extents[2], extents[3], + extents[4], extents[5], extents[6], extents[7]); + } else { + throw std::runtime_error("reshape_view: insufficient memory"); + } +} + } // namespace Impl } // namespace KokkosFFT -#endif \ No newline at end of file +#endif diff --git a/common/unit_test/CMakeLists.txt b/common/unit_test/CMakeLists.txt index 952c5047..2032dc7a 100644 --- a/common/unit_test/CMakeLists.txt +++ b/common/unit_test/CMakeLists.txt @@ -10,6 +10,7 @@ add_executable(unit-tests-kokkos-fft-common Test_Layouts.cpp Test_Padding.cpp Test_Helpers.cpp + Test_prep_transpose_view.cpp ) target_compile_features(unit-tests-kokkos-fft-common PUBLIC cxx_std_17) diff --git a/common/unit_test/Test_prep_transpose_view.cpp b/common/unit_test/Test_prep_transpose_view.cpp new file mode 100644 index 00000000..4b0eab62 --- /dev/null +++ b/common/unit_test/Test_prep_transpose_view.cpp @@ -0,0 +1,258 @@ +// SPDX-FileCopyrightText: (C) The Kokkos-FFT development team, see COPYRIGHT.md file +// +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include "KokkosFFT_transpose.hpp" +#include "Test_Types.hpp" +#include "Test_Utils.hpp" + +template +void test_managed_prep_transpose_view() { + constexpr std::size_t DIMS = InViewType::rank(); + static_assert(InViewType::rank() == OutViewType::rank(), + "input and output have different ranks"); + + using InManagedViewType = + typename KokkosFFT::Impl::managable_view_type::type; + using OutManagedViewType = + typename KokkosFFT::Impl::managable_view_type::type; + static_assert(!InManagedViewType::memory_traits::is_unmanaged, + "Unable to get managed input view type"); + static_assert(!OutManagedViewType::memory_traits::is_unmanaged, + "Unable to get managed output view type"); + + static_assert(!InViewType::memory_traits::is_unmanaged, + "Unable to get managed input view type"); + static_assert(!OutViewType::memory_traits::is_unmanaged, + "Unable to get managed output view type"); + using LayoutType = typename InViewType::array_layout; + + // no need to allocate + { + LayoutType layout; + KokkosFFT::axis_type map; + for (int i = 0; i < DIMS; ++i) { + layout.dimension[i] = 5; + map[i] = i; + } + InViewType in("in", layout); + OutViewType out("out", layout); + auto data_prev = out.data(); + KokkosFFT::Impl::_prep_transpose_view(in, out, map); + // ensure no allocation + EXPECT_EQ(data_prev, out.data()); + // check shape + for (int i = 0; i < DIMS; ++i) { + EXPECT_EQ(out.extent(i), 5); + } + } + // allocate + { + LayoutType layout; + KokkosFFT::axis_type map; + for (int i = 0; i < DIMS; ++i) { + layout.dimension[i] = 5; + map[i] = i; + } + InViewType in("in", layout); + OutViewType out; + KokkosFFT::Impl::_prep_transpose_view(in, out, map); + // check shape + for (int i = 0; i < DIMS; ++i) { + EXPECT_EQ(out.extent(i), 5); + } + } +} + +template +void test_unmanaged_prep_transpose_view() { + constexpr std::size_t DIMS = InViewType::rank(); + static_assert(InViewType::rank() == OutViewType::rank(), + "input and output have different ranks"); + + using InManagedViewType = + typename KokkosFFT::Impl::managable_view_type::type; + using OutManagedViewType = + typename KokkosFFT::Impl::managable_view_type::type; + static_assert(!InManagedViewType::memory_traits::is_unmanaged, + "Unable to get managed input view type"); + static_assert(!OutManagedViewType::memory_traits::is_unmanaged, + "Unable to get managed output view type"); + using LayoutType = typename InViewType::array_layout; + + // no need to reshape + { + LayoutType layout; + KokkosFFT::axis_type map; + for (int i = 0; i < DIMS; ++i) { + layout.dimension[i] = 5; + map[i] = i; + } + InManagedViewType in("in", layout); + OutManagedViewType out("out", layout); + OutViewType u_out(out.data(), layout); + auto data_prev = out.data(); + KokkosFFT::Impl::_prep_transpose_view(in, u_out, map); + EXPECT_EQ(data_prev, u_out.data()); + // check shape + for (int i = 0; i < DIMS; ++i) { + EXPECT_EQ(u_out.extent(i), 5); + } + } + // reshape success + { + LayoutType layout; + KokkosFFT::axis_type map; + for (int i = 0; i < DIMS; ++i) { + layout.dimension[i] = 5; + map[i] = i; + } + + LayoutType layout_orig; + layout_orig.dimension[0] = 5; + if (DIMS == 0) { + // give the 1D version a larger original shape so it will reshape down to + // a portion of the allocation + layout_orig.dimension[0] = 10; + } + for (int i = 1; i < DIMS; ++i) { + layout_orig.dimension[0] *= 5; + layout_orig.dimension[i] = 1; + } + InManagedViewType in("in", layout); + OutManagedViewType out("out", layout_orig); + OutViewType u_out(out.data(), layout_orig); + KokkosFFT::Impl::_prep_transpose_view(in, u_out, map); + // check shape + for (int i = 0; i < DIMS; ++i) { + EXPECT_EQ(u_out.extent(i), 5); + } + } + // reshape failure + { + LayoutType layout; + KokkosFFT::axis_type map; + for (int i = 0; i < DIMS; ++i) { + layout.dimension[i] = 5; + map[i] = i; + } + + LayoutType layout_orig; + for (int i = 0; i < DIMS; ++i) { + layout_orig.dimension[i] = 1; + } + InManagedViewType in("in", layout); + OutManagedViewType out("out", layout_orig); + OutViewType u_out(out.data(), layout_orig); + EXPECT_THROW(KokkosFFT::Impl::_prep_transpose_view(in, u_out, map), + std::runtime_error); + } +} + +TEST(prep_transpose_view, 1DManaged) { + test_managed_prep_transpose_view< + Kokkos::View, + Kokkos::View>(); +} + +TEST(prep_transpose_view, 1DUnmanaged) { + test_unmanaged_prep_transpose_view< + Kokkos::View, + Kokkos::View>>(); +} + +TEST(prep_transpose_view, 2DManaged) { + test_managed_prep_transpose_view< + Kokkos::View, + Kokkos::View>(); +} + +TEST(prep_transpose_view, 2DUnmanaged) { + test_unmanaged_prep_transpose_view< + Kokkos::View, + Kokkos::View>>(); +} + +TEST(prep_transpose_view, 3DManaged) { + test_managed_prep_transpose_view< + Kokkos::View, + Kokkos::View>(); +} + +TEST(prep_transpose_view, 3DUnmanaged) { + test_unmanaged_prep_transpose_view< + Kokkos::View, + Kokkos::View>>(); +} + +TEST(prep_transpose_view, 4DManaged) { + test_managed_prep_transpose_view< + Kokkos::View, + Kokkos::View>(); +} + +TEST(prep_transpose_view, 4DUnmanaged) { + test_unmanaged_prep_transpose_view< + Kokkos::View, + Kokkos::View>>(); +} + +TEST(prep_transpose_view, 5DManaged) { + test_managed_prep_transpose_view< + Kokkos::View, + Kokkos::View>(); +} + +TEST(prep_transpose_view, 5DUnmanaged) { + test_unmanaged_prep_transpose_view< + Kokkos::View, + Kokkos::View>>(); +} + +TEST(prep_transpose_view, 6DManaged) { + test_managed_prep_transpose_view< + Kokkos::View, + Kokkos::View>(); +} + +TEST(prep_transpose_view, 6DUnmanaged) { + test_unmanaged_prep_transpose_view< + Kokkos::View, + Kokkos::View>>(); +} + +TEST(prep_transpose_view, 7DManaged) { + test_managed_prep_transpose_view< + Kokkos::View, + Kokkos::View>(); +} + +TEST(prep_transpose_view, 7DUnmanaged) { + test_unmanaged_prep_transpose_view< + Kokkos::View, + Kokkos::View>>(); +} + +TEST(prep_transpose_view, 8DManaged) { + test_managed_prep_transpose_view< + Kokkos::View, + Kokkos::View>(); +} + +TEST(prep_transpose_view, 8DUnmanaged) { + test_unmanaged_prep_transpose_view< + Kokkos::View, + Kokkos::View>>(); +} diff --git a/examples/03_NDFFT/03_NDFFT.cpp b/examples/03_NDFFT/03_NDFFT.cpp index ea282d52..65b70e32 100644 --- a/examples/03_NDFFT/03_NDFFT.cpp +++ b/examples/03_NDFFT/03_NDFFT.cpp @@ -54,4 +54,4 @@ int main(int argc, char* argv[]) { Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/examples/07_unmanaged_views/07_unmanaged_views.cpp b/examples/07_unmanaged_views/07_unmanaged_views.cpp new file mode 100644 index 00000000..50580caa --- /dev/null +++ b/examples/07_unmanaged_views/07_unmanaged_views.cpp @@ -0,0 +1,70 @@ +// SPDX-FileCopyrightText: (C) The Kokkos-FFT development team, see COPYRIGHT.md file +// +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include + +using execution_space = Kokkos::DefaultExecutionSpace; +template +using View1D = Kokkos::View; + +template +using View3D = Kokkos::View; + +template +using UView3D = Kokkos::View>; + +template +using axis_type = KokkosFFT::axis_type; +template +using shape_type = KokkosFFT::shape_type; + +int main(int argc, char* argv[]) { + Kokkos::initialize(argc, argv); + { + constexpr int n0 = 128, n1 = 128, n2 = 16; + const Kokkos::complex I(1.0, 1.0); + + shape_type<3> shape; + shape[0] = n0; + shape[1] = n1; + shape[2] = n2; + + // any combination of inputs/outputs can be managed or unmanaged views + // should work on all functions in any number of dimensions + // 3D C2C FFT (Forward and Backward) + + // combined storage buffer for xc2c and xc2c_inv + View1D> storage( + "storage", (UView3D>::required_allocation_size( + n0, n1, n2) + + sizeof(Kokkos::complex)) / + sizeof(Kokkos::complex) * 2); + UView3D> xc2c(storage.data(), n0, n1, n2); + View3D> xc2c_hat("xc2c_hat", n0, n1, n2); + UView3D> xc2c_inv( + storage.data() + + (UView3D>::required_allocation_size(n0, n1, + n2) + + sizeof(Kokkos::complex)) / + sizeof(Kokkos::complex), + n0, n1, n2); + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + execution_space exec; + Kokkos::fill_random(exec, xc2c, random_pool, I); + exec.fence(); + + KokkosFFT::fftn(exec, xc2c, xc2c_hat, axis_type<3>{-3, -2, -1}, + KokkosFFT::Normalization::backward, shape); + KokkosFFT::ifftn(exec, xc2c_hat, xc2c_inv, axis_type<3>{-3, -2, -1}, + KokkosFFT::Normalization::backward, shape); + exec.fence(); + } + Kokkos::finalize(); + + return 0; +} diff --git a/examples/07_unmanaged_views/CMakeLists.txt b/examples/07_unmanaged_views/CMakeLists.txt new file mode 100644 index 00000000..77421d4c --- /dev/null +++ b/examples/07_unmanaged_views/CMakeLists.txt @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: (C) The Kokkos-FFT development team, see COPYRIGHT.md file +# +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +add_executable(07_unmanaged_views 07_unmanaged_views.cpp) +target_link_libraries(07_unmanaged_views PUBLIC KokkosFFT::fft) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 58cdbcd4..f587f4c2 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -7,4 +7,5 @@ add_subdirectory(02_2DFFT) add_subdirectory(03_NDFFT) add_subdirectory(04_batchedFFT) add_subdirectory(05_1DFFT_HOST_DEVICE) -add_subdirectory(06_1DFFT_reuse_plans) \ No newline at end of file +add_subdirectory(06_1DFFT_reuse_plans) +add_subdirectory(07_unmanaged_views) diff --git a/fft/src/KokkosFFT_Transform.hpp b/fft/src/KokkosFFT_Transform.hpp index 2b5e8be3..573c01bf 100644 --- a/fft/src/KokkosFFT_Transform.hpp +++ b/fft/src/KokkosFFT_Transform.hpp @@ -40,6 +40,8 @@ #include "KokkosFFT_OpenMP_transform.hpp" #endif +#include + // General Transform Interface namespace KokkosFFT { namespace Impl { @@ -129,17 +131,23 @@ void fft_exec_impl( plan.template good(in, out); const auto exec_space = plan.exec_space(); + using ManagableInViewType = + typename KokkosFFT::Impl::managable_view_type::type; + using ManagableOutViewType = + typename KokkosFFT::Impl::managable_view_type::type; + ManagableInViewType _in_s; InViewType _in; if (plan.is_crop_or_pad_needed()) { auto new_shape = plan.shape(); - KokkosFFT::Impl::crop_or_pad(exec_space, in, _in, new_shape); + KokkosFFT::Impl::crop_or_pad(exec_space, in, _in_s, new_shape); + _in = _in_s; } else { _in = in; } if (plan.is_transpose_needed()) { - InViewType in_T; - OutViewType out_T; + ManagableInViewType in_T; + ManagableOutViewType out_T; KokkosFFT::Impl::transpose(exec_space, _in, in_T, plan.map()); KokkosFFT::Impl::transpose(exec_space, out, out_T, plan.map()); @@ -828,4 +836,4 @@ void irfftn(const ExecutionSpace& exec_space, const InViewType& in, } // namespace KokkosFFT -#endif \ No newline at end of file +#endif