Skip to content

Commit

Permalink
Rework internal allocations (#167)
Browse files Browse the repository at this point in the history
* Rework internal allocations

* Update common/src/KokkosFFT_transpose.hpp

Co-authored-by: yasahi-hpc <[email protected]>

* Revert example

---------

Co-authored-by: yasahi-hpc <[email protected]>
  • Loading branch information
tpadioleau and yasahi-hpc authored Oct 11, 2024
1 parent 770a301 commit f3a3385
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 489 deletions.
85 changes: 27 additions & 58 deletions common/src/KokkosFFT_transpose.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,41 +75,24 @@ auto get_map_axes(const ViewType& view, int axis) {
return get_map_axes(view, axis_type<1>({axis}));
}

template <class InViewType, class OutViewType, std::size_t DIMS>
void prep_transpose_view(InViewType& in, OutViewType& out,
axis_type<DIMS> map) {
constexpr int rank = OutViewType::rank();

// Assign a View if not a shallow copy
bool is_out_view_ready = true;
std::array<int, rank> out_extents;
for (int i = 0; i < rank; i++) {
out_extents.at(i) = in.extent(map.at(i));
if (static_cast<std::size_t>(out_extents.at(i)) != out.extent(i)) {
is_out_view_ready = false;
}
template <class ViewType>
axis_type<ViewType::rank()> compute_transpose_extents(
ViewType const& view, axis_type<ViewType::rank()> const& map) {
static_assert(Kokkos::is_view_v<ViewType>,
"compute_transpose_extents: ViewType must be a Kokkos::View.");
constexpr std::size_t rank = ViewType::rank();

axis_type<rank> out_extents;
for (std::size_t i = 0; i < rank; ++i) {
out_extents.at(i) = view.extent(map.at(i));
}

if constexpr (std::is_const_v<OutViewType>) {
KOKKOSFFT_THROW_IF(
!is_out_view_ready,
"prep_transpose_view: OutViewType is const, but does not "
"have the required extents");
} else {
if (!is_out_view_ready) {
if constexpr (!OutViewType::memory_traits::is_unmanaged) {
KokkosFFT::Impl::create_view(out, "out", out_extents);
} else {
// try to reshape out if it currently has enough memory available
KokkosFFT::Impl::reshape_view(out, out_extents);
}
}
}
return out_extents;
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<2> _map) {
void transpose_impl(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<2> /*_map*/) {
constexpr std::size_t DIM = 2;

using range_type = Kokkos::MDRangePolicy<
Expand All @@ -125,16 +108,14 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

prep_transpose_view(in, out, _map);

Kokkos::parallel_for(
"KokkosFFT::transpose", range,
KOKKOS_LAMBDA(int i0, int i1) { out(i1, i0) = in(i0, i1); });
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<3> _map) {
void transpose_impl(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<3> _map) {
constexpr std::size_t DIM = 3;
constexpr std::size_t rank = InViewType::rank();

Expand All @@ -151,8 +132,6 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
tile_type{{4, 4, 4}} // [TO DO] Choose optimal tile sizes for each device
);

prep_transpose_view(in, out, _map);

Kokkos::Array<int, 3> map = {_map[0], _map[1], _map[2]};
Kokkos::parallel_for(
"KokkosFFT::transpose", range, KOKKOS_LAMBDA(int i0, int i1, int i2) {
Expand All @@ -166,8 +145,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<4> _map) {
void transpose_impl(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<4> _map) {
constexpr std::size_t DIM = 4;
constexpr std::size_t rank = InViewType::rank();

Expand All @@ -185,8 +164,6 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3]};
Kokkos::parallel_for(
"KokkosFFT::transpose", range,
Expand All @@ -202,8 +179,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<5> _map) {
void transpose_impl(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<5> _map) {
constexpr std::size_t DIM = 5;
constexpr std::size_t rank = InViewType::rank();

Expand All @@ -222,8 +199,6 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3], _map[4]};
Kokkos::parallel_for(
"KokkosFFT::transpose", range,
Expand All @@ -240,8 +215,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<6> _map) {
void transpose_impl(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<6> _map) {
constexpr std::size_t DIM = 6;
constexpr std::size_t rank = InViewType::rank();

Expand All @@ -261,8 +236,6 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2],
_map[3], _map[4], _map[5]};
Kokkos::parallel_for(
Expand All @@ -281,8 +254,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<7> _map) {
void transpose_impl(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<7> _map) {
constexpr std::size_t DIM = 6;
constexpr std::size_t rank = InViewType::rank();

Expand All @@ -302,8 +275,6 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3],
_map[4], _map[5], _map[6]};
Kokkos::parallel_for(
Expand All @@ -326,8 +297,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<8> _map) {
void transpose_impl(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<8> _map) {
constexpr std::size_t DIM = 6;

constexpr std::size_t rank = InViewType::rank();
Expand All @@ -349,8 +320,6 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3],
_map[4], _map[5], _map[6], _map[7]};
Kokkos::parallel_for(
Expand Down Expand Up @@ -396,8 +365,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
*/
template <typename ExecutionSpace, typename InViewType, typename OutViewType,
std::size_t DIM = 1>
void transpose(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<DIM> map) {
void transpose(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<DIM> map) {
static_assert(
KokkosFFT::Impl::are_operatable_views_v<ExecutionSpace, InViewType,
OutViewType>,
Expand Down
139 changes: 9 additions & 130 deletions common/src/KokkosFFT_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,136 +204,15 @@ auto extract_extents(const ViewType& view) {
return extents;
}

template <typename ViewType, typename Label>
void create_view(ViewType& out, const Label& label,
const std::array<int, 1>& extents) {
out = ViewType(label, extents[0]);
}

template <typename ViewType, typename Label>
void create_view(ViewType& out, const Label& label,
const std::array<int, 2>& extents) {
out = ViewType(label, extents[0], extents[1]);
}

template <typename ViewType, typename Label>
void create_view(ViewType& out, const Label& label,
const std::array<int, 3>& extents) {
out = ViewType(label, extents[0], extents[1], extents[2]);
}

template <typename ViewType, typename Label>
void create_view(ViewType& out, const Label& label,
const std::array<int, 4>& extents) {
out = ViewType(label, extents[0], extents[1], extents[2], extents[3]);
}

template <typename ViewType, typename Label>
void create_view(ViewType& out, const Label& label,
const std::array<int, 5>& extents) {
out = ViewType(label, extents[0], extents[1], extents[2], extents[3],
extents[4]);
}

template <typename ViewType, typename Label>
void create_view(ViewType& out, const Label& label,
const std::array<int, 6>& extents) {
out = ViewType(label, extents[0], extents[1], extents[2], extents[3],
extents[4], extents[5]);
}

template <typename ViewType, typename Label>
void create_view(ViewType& out, const Label& label,
const std::array<int, 7>& extents) {
out = ViewType(label, extents[0], extents[1], extents[2], extents[3],
extents[4], extents[5], extents[6]);
}

template <typename ViewType, typename Label>
void create_view(ViewType& out, const Label& label,
const std::array<int, 8>& extents) {
out = ViewType(label, extents[0], extents[1], extents[2], extents[3],
extents[4], extents[5], extents[6], extents[7]);
}

template <typename ViewType>
void reshape_view(ViewType& out, const std::array<int, 1>& extents) {
KOKKOSFFT_THROW_IF(ViewType::required_allocation_size(out.layout()) <
ViewType::required_allocation_size(extents[0]),
"reshape_view: insufficient memory");
out = ViewType(out.data(), extents[0]);
}

template <typename ViewType>
void reshape_view(ViewType& out, const std::array<int, 2>& extents) {
KOKKOSFFT_THROW_IF(
ViewType::required_allocation_size(out.layout()) <
ViewType::required_allocation_size(extents[0], extents[1]),
"reshape_view: insufficient memory");
out = ViewType(out.data(), extents[0], extents[1]);
}

template <typename ViewType>
void reshape_view(ViewType& out, const std::array<int, 3>& extents) {
KOKKOSFFT_THROW_IF(ViewType::required_allocation_size(out.layout()) <
ViewType::required_allocation_size(
extents[0], extents[1], extents[2]),
"reshape_view: insufficient memory");
out = ViewType(out.data(), extents[0], extents[1], extents[2]);
}

template <typename ViewType>
void reshape_view(ViewType& out, const std::array<int, 4>& extents) {
KOKKOSFFT_THROW_IF(ViewType::required_allocation_size(out.layout()) <
ViewType::required_allocation_size(
extents[0], extents[1], extents[2], extents[3]),
"reshape_view: insufficient memory");

out = ViewType(out.data(), extents[0], extents[1], extents[2], extents[3]);
}

template <typename ViewType>
void reshape_view(ViewType& out, const std::array<int, 5>& extents) {
KOKKOSFFT_THROW_IF(
ViewType::required_allocation_size(out.layout()) <
ViewType::required_allocation_size(extents[0], extents[1], extents[2],
extents[3], extents[4]),
"reshape_view: insufficient memory");
out = ViewType(out.data(), extents[0], extents[1], extents[2], extents[3],
extents[4]);
}

template <typename ViewType>
void reshape_view(ViewType& out, const std::array<int, 6>& extents) {
KOKKOSFFT_THROW_IF(ViewType::required_allocation_size(out.layout()) <
ViewType::required_allocation_size(
extents[0], extents[1], extents[2], extents[3],
extents[4], extents[5]),
"reshape_view: insufficient memory");
out = ViewType(out.data(), extents[0], extents[1], extents[2], extents[3],
extents[4], extents[5]);
}

template <typename ViewType>
void reshape_view(ViewType& out, const std::array<int, 7>& extents) {
KOKKOSFFT_THROW_IF(ViewType::required_allocation_size(out.layout()) <
ViewType::required_allocation_size(
extents[0], extents[1], extents[2], extents[3],
extents[4], extents[5], extents[6]),
"reshape_view: insufficient memory");
out = ViewType(out.data(), extents[0], extents[1], extents[2], extents[3],
extents[4], extents[5], extents[6]);
}

template <typename ViewType>
void reshape_view(ViewType& out, const std::array<int, 8>& extents) {
KOKKOSFFT_THROW_IF(ViewType::required_allocation_size(out.layout()) <
ViewType::required_allocation_size(
extents[0], extents[1], extents[2], extents[3],
extents[4], extents[5], extents[6], extents[7]),
"reshape_view: insufficient memory");
out = ViewType(out.data(), extents[0], extents[1], extents[2], extents[3],
extents[4], extents[5], extents[6], extents[7]);
template <typename Layout, std::size_t N>
Layout create_layout(const std::array<int, N>& extents) {
static_assert(std::is_same_v<Layout, Kokkos::LayoutLeft> ||
std::is_same_v<Layout, Kokkos::LayoutRight>,
"create_layout: Layout must be either Kokkos::LayoutLeft or "
"Kokkos::LayoutRight.");
Layout layout;
std::copy_n(extents.begin(), N, layout.dimension);
return layout;
}

} // namespace Impl
Expand Down
1 change: 0 additions & 1 deletion common/unit_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ add_executable(unit-tests-kokkos-fft-common
Test_Layouts.cpp
Test_Padding.cpp
Test_Helpers.cpp
Test_prep_transpose_view.cpp
)

target_compile_features(unit-tests-kokkos-fft-common PUBLIC cxx_std_17)
Expand Down
Loading

0 comments on commit f3a3385

Please sign in to comment.