Skip to content

Commit

Permalink
Merge pull request #101 from helloworld922/main
Browse files Browse the repository at this point in the history
Implemented passing unmanaged views
  • Loading branch information
yasahi-hpc authored May 25, 2024
2 parents 0edcf54 + 19bd366 commit c7e13b0
Show file tree
Hide file tree
Showing 10 changed files with 576 additions and 142 deletions.
78 changes: 41 additions & 37 deletions common/src/KokkosFFT_padding.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,11 @@ auto is_crop_or_pad_needed(const ViewType& view,
return not_same;
}

template <typename ExecutionSpace, typename ViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
ViewType& out, shape_type<1> s) {
template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in,
OutViewType& out, shape_type<1> s) {
auto _n0 = s.at(0);
out = ViewType("out", _n0);
out = OutViewType("out", _n0);

auto n0 = std::min(_n0, in.extent(0));

Expand All @@ -94,13 +94,13 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
KOKKOS_LAMBDA(int i0) { out(i0) = in(i0); });
}

template <typename ExecutionSpace, typename ViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
ViewType& out, shape_type<2> s) {
template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in,
OutViewType& out, shape_type<2> s) {
constexpr std::size_t DIM = 2;

auto [_n0, _n1] = s;
out = ViewType("out", _n0, _n1);
out = OutViewType("out", _n0, _n1);

int n0 = std::min(_n0, in.extent(0));
int n1 = std::min(_n1, in.extent(1));
Expand All @@ -119,13 +119,13 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
range, KOKKOS_LAMBDA(int i0, int i1) { out(i0, i1) = in(i0, i1); });
}

template <typename ExecutionSpace, typename ViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
ViewType& out, shape_type<3> s) {
template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in,
OutViewType& out, shape_type<3> s) {
constexpr std::size_t DIM = 3;

auto [_n0, _n1, _n2] = s;
out = ViewType("out", _n0, _n1, _n2);
out = OutViewType("out", _n0, _n1, _n2);

int n0 = std::min(_n0, in.extent(0));
int n1 = std::min(_n1, in.extent(1));
Expand All @@ -148,13 +148,13 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
});
}

template <typename ExecutionSpace, typename ViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
ViewType& out, shape_type<4> s) {
template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in,
OutViewType& out, shape_type<4> s) {
constexpr std::size_t DIM = 4;

auto [_n0, _n1, _n2, _n3] = s;
out = ViewType("out", _n0, _n1, _n2, _n3);
out = OutViewType("out", _n0, _n1, _n2, _n3);

int n0 = std::min(_n0, in.extent(0));
int n1 = std::min(_n1, in.extent(1));
Expand All @@ -178,13 +178,13 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
});
}

template <typename ExecutionSpace, typename ViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
ViewType& out, shape_type<5> s) {
template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in,
OutViewType& out, shape_type<5> s) {
constexpr std::size_t DIM = 5;

auto [_n0, _n1, _n2, _n3, _n4] = s;
out = ViewType("out", _n0, _n1, _n2, _n3, _n4);
out = OutViewType("out", _n0, _n1, _n2, _n3, _n4);

int n0 = std::min(_n0, in.extent(0));
int n1 = std::min(_n1, in.extent(1));
Expand All @@ -209,13 +209,13 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
});
}

template <typename ExecutionSpace, typename ViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
ViewType& out, shape_type<6> s) {
template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in,
OutViewType& out, shape_type<6> s) {
constexpr std::size_t DIM = 6;

auto [_n0, _n1, _n2, _n3, _n4, _n5] = s;
out = ViewType("out", _n0, _n1, _n2, _n3, _n4, _n5);
out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5);

int n0 = std::min(_n0, in.extent(0));
int n1 = std::min(_n1, in.extent(1));
Expand All @@ -242,13 +242,13 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
});
}

template <typename ExecutionSpace, typename ViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
ViewType& out, shape_type<7> s) {
template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in,
OutViewType& out, shape_type<7> s) {
constexpr std::size_t DIM = 6;

auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6] = s;
out = ViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6);
out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6);

int n0 = std::min(_n0, in.extent(0));
int n1 = std::min(_n1, in.extent(1));
Expand Down Expand Up @@ -278,13 +278,13 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
});
}

template <typename ExecutionSpace, typename ViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
ViewType& out, shape_type<8> s) {
template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void _crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in,
OutViewType& out, shape_type<8> s) {
constexpr std::size_t DIM = 6;

auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7] = s;
out = ViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7);
out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7);

int n0 = std::min(_n0, in.extent(0));
int n1 = std::min(_n1, in.extent(1));
Expand Down Expand Up @@ -318,15 +318,19 @@ void _crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
});
}

template <typename ExecutionSpace, typename ViewType, std::size_t DIM = 1>
void crop_or_pad(const ExecutionSpace& exec_space, const ViewType& in,
ViewType& out, shape_type<DIM> s) {
static_assert(ViewType::rank() == DIM,
template <typename ExecutionSpace, typename InViewType, typename OutViewType,
std::size_t DIM = 1>
void crop_or_pad(const ExecutionSpace& exec_space, const InViewType& in,
OutViewType& out, shape_type<DIM> s) {
static_assert(InViewType::rank() == DIM,
"crop_or_pad: Rank of View must be equal to Rank "
"of extended shape.");
static_assert(OutViewType::rank() == DIM,
"crop_or_pad: Rank of View must be equal to Rank "
"of extended shape.");
_crop_or_pad(exec_space, in, out, s);
}
} // namespace Impl
} // namespace KokkosFFT

#endif
#endif
131 changes: 33 additions & 98 deletions common/src/KokkosFFT_transpose.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,31 @@ auto get_map_axes(const ViewType& view, int axis) {
return get_map_axes(view, axis_type<1>({axis}));
}

template <class InViewType, class OutViewType, std::size_t DIMS>
void _prep_transpose_view(InViewType& in, OutViewType& out,
axis_type<DIMS> _map) {
constexpr std::size_t rank = OutViewType::rank();

// Assign a View if not a shallow copy
bool is_out_view_ready = true;
std::array<int, rank> out_extents;
for (int i = 0; i < rank; i++) {
out_extents.at(i) = in.extent(_map.at(i));
if (out_extents.at(i) != out.extent(i)) {
is_out_view_ready = false;
}
}

if (!is_out_view_ready) {
if constexpr (!OutViewType::memory_traits::is_unmanaged) {
KokkosFFT::Impl::create_view(out, "out", out_extents);
} else {
// try to reshape out if it currently has enough memory available
KokkosFFT::Impl::reshape_view(out, out_extents);
}
}
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType,
std::enable_if_t<InViewType::rank() == 1, std::nullptr_t> = nullptr>
void _transpose(const ExecutionSpace& exec_space, InViewType& in,
Expand All @@ -110,19 +135,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

bool is_out_view_ready = true;
std::array<int, rank> out_extents;
for (int i = 0; i < rank; i++) {
out_extents.at(i) = in.extent(_map.at(i));
if (out_extents.at(i) != out.extent(i)) {
is_out_view_ready = false;
}
}

if (!is_out_view_ready) {
auto [_n0, _n1] = out_extents;
out = OutViewType("out", _n0, _n1);
}
_prep_transpose_view(in, out, _map);

Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1) { out(i1, i0) = in(i0, i1); });
Expand All @@ -148,20 +161,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

// Assign a View if not a shallow copy
bool is_out_view_ready = true;
std::array<int, rank> out_extents;
for (int i = 0; i < rank; i++) {
out_extents.at(i) = in.extent(_map.at(i));
if (out_extents.at(i) != out.extent(i)) {
is_out_view_ready = false;
}
}

if (!is_out_view_ready) {
auto [_n0, _n1, _n2] = out_extents;
out = OutViewType("out", _n0, _n1, _n2);
}
_prep_transpose_view(in, out, _map);

Kokkos::Array<int, 3> map = {_map[0], _map[1], _map[2]};
Kokkos::parallel_for(
Expand Down Expand Up @@ -196,20 +196,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

// Assign a View if not a shallow copy
bool is_out_view_ready = true;
std::array<int, rank> out_extents;
for (int i = 0; i < rank; i++) {
out_extents.at(i) = in.extent(_map.at(i));
if (out_extents.at(i) != out.extent(i)) {
is_out_view_ready = false;
}
}

if (!is_out_view_ready) {
auto [_n0, _n1, _n2, _n3] = out_extents;
out = OutViewType("out", _n0, _n1, _n2, _n3);
}
_prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3]};
Kokkos::parallel_for(
Expand Down Expand Up @@ -246,20 +233,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

// Assign a View if not a shallow copy
bool is_out_view_ready = true;
std::array<int, rank> out_extents;
for (int i = 0; i < rank; i++) {
out_extents.at(i) = in.extent(_map.at(i));
if (out_extents.at(i) != out.extent(i)) {
is_out_view_ready = false;
}
}

if (!is_out_view_ready) {
auto [_n0, _n1, _n2, _n3, _n4] = out_extents;
out = OutViewType("out", _n0, _n1, _n2, _n3, _n4);
}
_prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3], _map[4]};
Kokkos::parallel_for(
Expand Down Expand Up @@ -298,20 +272,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

// Assign a View if not a shallow copy
bool is_out_view_ready = true;
std::array<int, rank> out_extents;
for (int i = 0; i < rank; i++) {
out_extents.at(i) = in.extent(_map.at(i));
if (out_extents.at(i) != out.extent(i)) {
is_out_view_ready = false;
}
}

if (!is_out_view_ready) {
auto [_n0, _n1, _n2, _n3, _n4, _n5] = out_extents;
out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5);
}
_prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2],
_map[3], _map[4], _map[5]};
Expand Down Expand Up @@ -352,20 +313,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

// Assign a View if not a shallow copy
bool is_out_view_ready = true;
std::array<int, rank> out_extents;
for (int i = 0; i < rank; i++) {
out_extents.at(i) = in.extent(_map.at(i));
if (out_extents.at(i) != out.extent(i)) {
is_out_view_ready = false;
}
}

if (!is_out_view_ready) {
auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6] = out_extents;
out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6);
}
_prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3],
_map[4], _map[5], _map[6]};
Expand Down Expand Up @@ -412,20 +360,7 @@ void _transpose(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

// Assign a View if not a shallow copy
bool is_out_view_ready = true;
std::array<int, rank> out_extents;
for (int i = 0; i < rank; i++) {
out_extents.at(i) = in.extent(_map.at(i));
if (out_extents.at(i) != out.extent(i)) {
is_out_view_ready = false;
}
}

if (!is_out_view_ready) {
auto [_n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7] = out_extents;
out = OutViewType("out", _n0, _n1, _n2, _n3, _n4, _n5, _n6, _n7);
}
_prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3],
_map[4], _map[5], _map[6], _map[7]};
Expand Down Expand Up @@ -499,4 +434,4 @@ void transpose(const ExecutionSpace& exec_space, InViewType& in,
} // namespace Impl
} // namespace KokkosFFT

#endif
#endif
Loading

0 comments on commit c7e13b0

Please sign in to comment.