From a9c3d0f509d2ffb441ae51111bd2f72ff2d75746 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Sun, 27 Oct 2024 14:36:33 +0900 Subject: [PATCH 1/5] implement batched serial iamax Signed-off-by: Yuuichi Asahi --- .../impl/KokkosBatched_Iamax_Serial_Impl.hpp | 35 +++ .../KokkosBatched_Iamax_Serial_Internal.hpp | 59 ++++ batched/dense/src/KokkosBatched_Iamax.hpp | 43 +++ .../dense/unit_test/Test_Batched_Dense.hpp | 1 + .../unit_test/Test_Batched_SerialIamax.hpp | 279 ++++++++++++++++++ 5 files changed, 417 insertions(+) create mode 100644 batched/dense/impl/KokkosBatched_Iamax_Serial_Impl.hpp create mode 100644 batched/dense/impl/KokkosBatched_Iamax_Serial_Internal.hpp create mode 100644 batched/dense/src/KokkosBatched_Iamax.hpp create mode 100644 batched/dense/unit_test/Test_Batched_SerialIamax.hpp diff --git a/batched/dense/impl/KokkosBatched_Iamax_Serial_Impl.hpp b/batched/dense/impl/KokkosBatched_Iamax_Serial_Impl.hpp new file mode 100644 index 0000000000..2f6db1e544 --- /dev/null +++ b/batched/dense/impl/KokkosBatched_Iamax_Serial_Impl.hpp @@ -0,0 +1,35 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSBATCHED_IAMAX_SERIAL_IMPL_HPP_ +#define KOKKOSBATCHED_IAMAX_SERIAL_IMPL_HPP_ + +/// \author Yuuichi Asahi (yuuichi.asahi@cea.fr) + +#include "KokkosBatched_Iamax_Serial_Internal.hpp" + +namespace KokkosBatched { + +template +KOKKOS_INLINE_FUNCTION int SerialIamax::invoke(const XViewType &x) { + if (x.extent(0) <= 0) return -1; + if (x.extent(0) == 1) return 0; + return KokkosBatched::Impl::SerialIamaxInternal::invoke(x.extent(0), x.data(), x.stride(0)); +} + +} // namespace KokkosBatched + +#endif // KOKKOSBATCHED_IAMAX_SERIAL_IMPL_HPP_ diff --git a/batched/dense/impl/KokkosBatched_Iamax_Serial_Internal.hpp b/batched/dense/impl/KokkosBatched_Iamax_Serial_Internal.hpp new file mode 100644 index 0000000000..7b7d3f251d --- /dev/null +++ b/batched/dense/impl/KokkosBatched_Iamax_Serial_Internal.hpp @@ -0,0 +1,59 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSBATCHED_IAMAX_SERIAL_INTERNAL_HPP_ +#define KOKKOSBATCHED_IAMAX_SERIAL_INTERNAL_HPP_ + +/// \author Yuuichi Asahi (yuuichi.asahi@cea.fr) + +#include +#include "KokkosBatched_Util.hpp" + +namespace KokkosBatched { +namespace Impl { + +/// +/// Serial Internal Impl +/// ======================== + +struct SerialIamaxInternal { + template + KOKKOS_INLINE_FUNCTION static int invoke(const int n, const ValueType *KOKKOS_RESTRICT x, const int xs0); +}; + +template +KOKKOS_INLINE_FUNCTION int SerialIamaxInternal::invoke(const int n, const ValueType *KOKKOS_RESTRICT x, const int xs0) { + using ats = typename Kokkos::ArithTraits; + using RealType = typename ats::mag_type; + + RealType amax = Kokkos::abs(x[0 * xs0]); + int imax = 0; + + for (int i = 1; i < n; ++i) { + const RealType abs_x_i = Kokkos::abs(x[i * xs0]); + if (abs_x_i > amax) { + amax = abs_x_i; + imax = i; + } + } + + return imax; +}; + +} // namespace Impl +} // namespace KokkosBatched + +#endif // KOKKOSBATCHED_IAMAX_SERIAL_INTERNAL_HPP_ diff --git a/batched/dense/src/KokkosBatched_Iamax.hpp b/batched/dense/src/KokkosBatched_Iamax.hpp new file mode 100644 index 0000000000..02f389f48b --- /dev/null +++ b/batched/dense/src/KokkosBatched_Iamax.hpp @@ -0,0 +1,43 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSBATCHED_IAMAX_HPP_ +#define KOKKOSBATCHED_IAMAX_HPP_ + +/// \author Yuuichi Asahi (yuuichi.asahi@cea.fr) + +namespace KokkosBatched { + +/// \brief Serial Batched Iamax: +/// +/// IAMAX finds the index of the first element having maximum absolute value. +/// +/// \tparam XViewType: Input view type, needs to be a 1D view +/// +/// \param X [in]: Input view type +/// +/// \return The index of the first element having maximum absolute value +/// No nested parallel_for is used inside of the function. +/// + +struct SerialIamax { + template + KOKKOS_INLINE_FUNCTION static int invoke(const XViewType &x); +}; +} // namespace KokkosBatched + +#include "KokkosBatched_Iamax_Serial_Impl.hpp" + +#endif // KOKKOSBATCHED_IAMAX_HPP_ diff --git a/batched/dense/unit_test/Test_Batched_Dense.hpp b/batched/dense/unit_test/Test_Batched_Dense.hpp index b28f0450c1..2378e5ff01 100644 --- a/batched/dense/unit_test/Test_Batched_Dense.hpp +++ b/batched/dense/unit_test/Test_Batched_Dense.hpp @@ -62,6 +62,7 @@ #include "Test_Batched_SerialPbtrs_Real.hpp" #include "Test_Batched_SerialPbtrs_Complex.hpp" #include "Test_Batched_SerialLaswp.hpp" +#include "Test_Batched_SerialIamax.hpp" // Team Kernels #include "Test_Batched_TeamAxpy.hpp" diff --git a/batched/dense/unit_test/Test_Batched_SerialIamax.hpp b/batched/dense/unit_test/Test_Batched_SerialIamax.hpp new file mode 100644 index 0000000000..ef14c5ff33 --- /dev/null +++ b/batched/dense/unit_test/Test_Batched_SerialIamax.hpp @@ -0,0 +1,279 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +/// \author Yuuichi Asahi (yuuichi.asahi@cea.fr) +#include +#include +#include + +#include "KokkosBatched_Util.hpp" +#include "KokkosBatched_Iamax.hpp" + +using namespace KokkosBatched; + +namespace Test { +namespace Iamax { + +template +struct Functor_BatchedSerialIamax { + using execution_space = typename DeviceType::execution_space; + XViewType m_x; + RViewType m_r; + + KOKKOS_INLINE_FUNCTION + Functor_BatchedSerialIamax(const XViewType &x, const RViewType &r) : m_x(x), m_r(r) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int k) const { + auto sub_x = Kokkos::subview(m_x, k, Kokkos::ALL()); + auto iamax = KokkosBatched::SerialIamax::invoke(sub_x); + m_r(k) = iamax; + } + + inline void run() { + using value_type = typename XViewType::non_const_value_type; + std::string name_region("KokkosBatched::Test::SerialIamax"); + std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; + Kokkos::Profiling::pushRegion(name.c_str()); + Kokkos::RangePolicy policy(0, m_x.extent(0)); + Kokkos::parallel_for(name.c_str(), policy, *this); + Kokkos::Profiling::popRegion(); + } +}; + +/// \brief Implementation details of batched iamax analytical test +/// A0: [1, 2, 0] -> 1 +/// A1: [-5, 4, 3] -> 0 +/// A2: [0, 0, 0] -> 0 +/// A3: [0, -1, -1] -> 1 +/// +/// \param N [in] Batch size of A +template +void impl_test_batched_iamax_analytical(const std::size_t N) { + using ats = typename Kokkos::ArithTraits; + using RealType = typename ats::mag_type; + using View2DType = Kokkos::View; + using StridedView2DType = Kokkos::View; + using MaxView1DType = Kokkos::View; + + View2DType A0("A0", N, 3), A1("A1", N, 3), A2("A2", N, 3), A3("A3", N, 3); + MaxView1DType iamax0("iamax0", N), iamax_ref0("iamax_ref0", N), iamax1("iamax1", N), iamax_ref1("iamax_ref1", N), + iamax2("iamax2", N), iamax_ref2("iamax_ref2", N), iamax3("iamax3", N), iamax_ref3("iamax_ref3", N); + + // Testing incx argument with strided views + constexpr std::size_t incx = 2; + Kokkos::LayoutStride layout{N, incx, 3, N * incx}; + StridedView2DType A0_s("A0_s", layout), A1_s("A1_s", layout), A2_s("A2_s", layout), A3_s("A3_s", layout); + MaxView1DType iamax_s0("iamax_s0", N), iamax_s1("iamax_s1", N), iamax_s2("iamax_s2", N), iamax_s3("iamax_s3", N); + + // Initialize A0, A1, A2, A3 + auto h_A0 = Kokkos::create_mirror_view(A0); + auto h_A1 = Kokkos::create_mirror_view(A1); + auto h_A2 = Kokkos::create_mirror_view(A2); + auto h_A3 = Kokkos::create_mirror_view(A3); + + auto h_iamax_ref0 = Kokkos::create_mirror_view(iamax_ref0); + auto h_iamax_ref1 = Kokkos::create_mirror_view(iamax_ref1); + auto h_iamax_ref2 = Kokkos::create_mirror_view(iamax_ref2); + auto h_iamax_ref3 = Kokkos::create_mirror_view(iamax_ref3); + for (std::size_t k = 0; k < N; k++) { + h_A0(k, 0) = 1; + h_A0(k, 1) = 2; + h_A0(k, 2) = 0; + + h_A1(k, 0) = -5; + h_A1(k, 1) = 4; + h_A1(k, 2) = 3; + + h_A2(k, 0) = 0; + h_A2(k, 1) = 0; + h_A2(k, 2) = 0; + + h_A3(k, 0) = 0; + h_A3(k, 1) = -1; + h_A3(k, 2) = -1; + + h_iamax_ref0(k) = 1; + h_iamax_ref1(k) = 0; + h_iamax_ref2(k) = 0; + h_iamax_ref3(k) = 1; + } + Kokkos::deep_copy(A0, h_A0); + Kokkos::deep_copy(A1, h_A1); + Kokkos::deep_copy(A2, h_A2); + Kokkos::deep_copy(A3, h_A3); + + // Strided view can be copied only on the same device + Kokkos::deep_copy(A0_s, A0); + Kokkos::deep_copy(A1_s, A1); + Kokkos::deep_copy(A2_s, A2); + Kokkos::deep_copy(A3_s, A3); + + Functor_BatchedSerialIamax(A0, iamax0).run(); + Functor_BatchedSerialIamax(A1, iamax1).run(); + Functor_BatchedSerialIamax(A2, iamax2).run(); + Functor_BatchedSerialIamax(A3, iamax3).run(); + + // For strided views + Functor_BatchedSerialIamax(A0_s, iamax_s0).run(); + Functor_BatchedSerialIamax(A1_s, iamax_s1).run(); + Functor_BatchedSerialIamax(A2_s, iamax_s2).run(); + Functor_BatchedSerialIamax(A3_s, iamax_s3).run(); + + Kokkos::fence(); + + // Copy to host for comparison + auto h_iamax0 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), iamax0); + auto h_iamax1 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), iamax1); + auto h_iamax2 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), iamax2); + auto h_iamax3 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), iamax3); + auto h_iamax_s0 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), iamax_s0); + auto h_iamax_s1 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), iamax_s1); + auto h_iamax_s2 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), iamax_s2); + auto h_iamax_s3 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), iamax_s3); + + // Check if max index is correct + for (std::size_t k = 0; k < N; k++) { + EXPECT_EQ(h_iamax0(k), h_iamax_ref0(k)); + EXPECT_EQ(h_iamax1(k), h_iamax_ref1(k)); + EXPECT_EQ(h_iamax2(k), h_iamax_ref2(k)); + EXPECT_EQ(h_iamax3(k), h_iamax_ref3(k)); + EXPECT_EQ(h_iamax_s0(k), h_iamax_ref0(k)); + EXPECT_EQ(h_iamax_s1(k), h_iamax_ref1(k)); + EXPECT_EQ(h_iamax_s2(k), h_iamax_ref2(k)); + EXPECT_EQ(h_iamax_s3(k), h_iamax_ref3(k)); + } +} + +/// \brief Implementation details of batched pbtrs test +/// +/// \param N [in] Batch size of RHS (banded matrix can also be batched matrix) +/// \param BlkSize [in] Block size of matrix A +template +void impl_test_batched_iamax(const std::size_t N, const std::size_t BlkSize) { + using ats = typename Kokkos::ArithTraits; + using RealType = typename ats::mag_type; + using View2DType = Kokkos::View; + using StridedView2DType = Kokkos::View; + using MaxView1DType = Kokkos::View; + + View2DType A("A", N, BlkSize); + MaxView1DType iamax("iamax", N), iamax_ref("iamax_ref", N); + + // Testing incx argument with strided views + constexpr std::size_t incx = 2; + Kokkos::LayoutStride layout{N, incx, BlkSize, N * incx}; + StridedView2DType A_s("A_s", layout); + MaxView1DType iamax_s("iamax_s", N); + + // Initialize A_reconst with random matrix + using execution_space = typename DeviceType::execution_space; + Kokkos::Random_XorShift64_Pool rand_pool(13718); + ScalarType randStart, randEnd; + + KokkosKernels::Impl::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(A, rand_pool, randStart, randEnd); + + // Strided view can be copied only on the same device + Kokkos::deep_copy(A_s, A); + + Functor_BatchedSerialIamax(A, iamax).run(); + + // For strided views + Functor_BatchedSerialIamax(A_s, iamax_s).run(); + + Kokkos::fence(); + + // Reference + auto h_iamax_ref = Kokkos::create_mirror_view(iamax_ref); + if (BlkSize == 0) { + // As well as blas, we store -1 (0 in Fortran) for empty matrix + for (std::size_t k = 0; k < N; k++) { + h_iamax_ref(k) = -1; + } + } else { + auto h_A = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A); + for (std::size_t k = 0; k < N; k++) { + RealType amax = Kokkos::abs(h_A(k, 0)); + int iamax = 0; + for (std::size_t i = 1; i < BlkSize; i++) { + const RealType abs_A_i = Kokkos::abs(h_A(k, i)); + if (abs_A_i > amax) { + amax = abs_A_i; + iamax = static_cast(i); + } + } + h_iamax_ref(k) = iamax; + } + } + + // Copy to host for comparison + auto h_iamax = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), iamax); + auto h_iamax_s = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), iamax_s); + + // Check if max index is correct + for (std::size_t k = 0; k < N; k++) { + EXPECT_EQ(h_iamax(k), h_iamax_ref(k)); + EXPECT_EQ(h_iamax_s(k), h_iamax_ref(k)); + } +} + +} // namespace Iamax +} // namespace Test + +template +int test_batched_iamax() { +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) + { + using LayoutType = Kokkos::LayoutLeft; + Test::Iamax::impl_test_batched_iamax_analytical(1); + Test::Iamax::impl_test_batched_iamax_analytical(2); + for (std::size_t i = 0; i < 10; i++) { + Test::Iamax::impl_test_batched_iamax(1, i); + Test::Iamax::impl_test_batched_iamax(2, i); + } + } +#endif +#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) + { + using LayoutType = Kokkos::LayoutRight; + Test::Iamax::impl_test_batched_iamax_analytical(1); + Test::Iamax::impl_test_batched_iamax_analytical(2); + for (std::size_t i = 0; i < 10; i++) { + Test::Iamax::impl_test_batched_iamax(1, i); + Test::Iamax::impl_test_batched_iamax(2, i); + } + } +#endif + + return 0; +} + +#if defined(KOKKOSKERNELS_INST_FLOAT) +TEST_F(TestCategory, test_batched_iamax_float) { test_batched_iamax(); } +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) +TEST_F(TestCategory, test_batched_iamax_double) { test_batched_iamax(); } +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) +TEST_F(TestCategory, test_batched_iamax_fcomplex) { test_batched_iamax>(); } +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) +TEST_F(TestCategory, test_batched_iamax_dcomplex) { test_batched_iamax>(); } +#endif From ffc884a7ab495d50abb7beeb8dae8dd0ecb8ee56 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Sun, 27 Oct 2024 14:44:39 +0900 Subject: [PATCH 2/5] Add missing static_assertion in iamax Signed-off-by: Yuuichi Asahi --- batched/dense/impl/KokkosBatched_Iamax_Serial_Impl.hpp | 1 + batched/dense/src/KokkosBatched_Iamax.hpp | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/batched/dense/impl/KokkosBatched_Iamax_Serial_Impl.hpp b/batched/dense/impl/KokkosBatched_Iamax_Serial_Impl.hpp index 2f6db1e544..c357e65bdc 100644 --- a/batched/dense/impl/KokkosBatched_Iamax_Serial_Impl.hpp +++ b/batched/dense/impl/KokkosBatched_Iamax_Serial_Impl.hpp @@ -25,6 +25,7 @@ namespace KokkosBatched { template KOKKOS_INLINE_FUNCTION int SerialIamax::invoke(const XViewType &x) { + static_assert(Kokkos::is_view_v, "KokkosBatched::iamax: XViewType is not a Kokkos::View."); if (x.extent(0) <= 0) return -1; if (x.extent(0) == 1) return 0; return KokkosBatched::Impl::SerialIamaxInternal::invoke(x.extent(0), x.data(), x.stride(0)); diff --git a/batched/dense/src/KokkosBatched_Iamax.hpp b/batched/dense/src/KokkosBatched_Iamax.hpp index 02f389f48b..cb37e6c23a 100644 --- a/batched/dense/src/KokkosBatched_Iamax.hpp +++ b/batched/dense/src/KokkosBatched_Iamax.hpp @@ -21,14 +21,14 @@ namespace KokkosBatched { /// \brief Serial Batched Iamax: -/// -/// IAMAX finds the index of the first element having maximum absolute value. +/// Iamax finds the index of the first element having maximum absolute value. /// /// \tparam XViewType: Input view type, needs to be a 1D view /// /// \param X [in]: Input view type /// /// \return The index of the first element having maximum absolute value +/// As well as Blas, this returns -1 (0 in Fortran) for an empty vector /// No nested parallel_for is used inside of the function. /// From 9e7fc7731dfd676bc8330866af6eb4492da9a6e6 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Sun, 27 Oct 2024 15:08:07 +0900 Subject: [PATCH 3/5] fix: CodeQL Signed-off-by: Yuuichi Asahi --- .../dense/unit_test/Test_Batched_SerialIamax.hpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/batched/dense/unit_test/Test_Batched_SerialIamax.hpp b/batched/dense/unit_test/Test_Batched_SerialIamax.hpp index ef14c5ff33..0b15c42b8a 100644 --- a/batched/dense/unit_test/Test_Batched_SerialIamax.hpp +++ b/batched/dense/unit_test/Test_Batched_SerialIamax.hpp @@ -63,8 +63,6 @@ struct Functor_BatchedSerialIamax { /// \param N [in] Batch size of A template void impl_test_batched_iamax_analytical(const std::size_t N) { - using ats = typename Kokkos::ArithTraits; - using RealType = typename ats::mag_type; using View2DType = Kokkos::View; using StridedView2DType = Kokkos::View; using MaxView1DType = Kokkos::View; @@ -164,8 +162,6 @@ void impl_test_batched_iamax_analytical(const std::size_t N) { /// \param BlkSize [in] Block size of matrix A template void impl_test_batched_iamax(const std::size_t N, const std::size_t BlkSize) { - using ats = typename Kokkos::ArithTraits; - using RealType = typename ats::mag_type; using View2DType = Kokkos::View; using StridedView2DType = Kokkos::View; using MaxView1DType = Kokkos::View; @@ -179,7 +175,7 @@ void impl_test_batched_iamax(const std::size_t N, const std::size_t BlkSize) { StridedView2DType A_s("A_s", layout); MaxView1DType iamax_s("iamax_s", N); - // Initialize A_reconst with random matrix + // Initialize A with random values using execution_space = typename DeviceType::execution_space; Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarType randStart, randEnd; @@ -208,15 +204,15 @@ void impl_test_batched_iamax(const std::size_t N, const std::size_t BlkSize) { auto h_A = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A); for (std::size_t k = 0; k < N; k++) { RealType amax = Kokkos::abs(h_A(k, 0)); - int iamax = 0; + int iamax_tmp = 0; for (std::size_t i = 1; i < BlkSize; i++) { const RealType abs_A_i = Kokkos::abs(h_A(k, i)); if (abs_A_i > amax) { - amax = abs_A_i; - iamax = static_cast(i); + amax = abs_A_i; + iamax_tmp = static_cast(i); } } - h_iamax_ref(k) = iamax; + h_iamax_ref(k) = iamax_tmp; } } From 5b53a2a8b0ef11abc8b77666fe4e53c7d51c02f9 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Sun, 27 Oct 2024 15:18:51 +0900 Subject: [PATCH 4/5] fix: reintroduce RealType in impl_test_batched_iamax Signed-off-by: Yuuichi Asahi --- batched/dense/unit_test/Test_Batched_SerialIamax.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/batched/dense/unit_test/Test_Batched_SerialIamax.hpp b/batched/dense/unit_test/Test_Batched_SerialIamax.hpp index 0b15c42b8a..85bf1b95ad 100644 --- a/batched/dense/unit_test/Test_Batched_SerialIamax.hpp +++ b/batched/dense/unit_test/Test_Batched_SerialIamax.hpp @@ -162,6 +162,8 @@ void impl_test_batched_iamax_analytical(const std::size_t N) { /// \param BlkSize [in] Block size of matrix A template void impl_test_batched_iamax(const std::size_t N, const std::size_t BlkSize) { + using ats = typename Kokkos::ArithTraits; + using RealType = typename ats::mag_type; using View2DType = Kokkos::View; using StridedView2DType = Kokkos::View; using MaxView1DType = Kokkos::View; From b0ab2976ab25d7c0792c09e751906a5a62d24f49 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Tue, 29 Oct 2024 04:41:52 +0900 Subject: [PATCH 5/5] fix: use view size_type as a return type of iamax Signed-off-by: Yuuichi Asahi --- .../impl/KokkosBatched_Iamax_Serial_Impl.hpp | 9 +++++---- .../impl/KokkosBatched_Iamax_Serial_Internal.hpp | 15 ++++++++------- batched/dense/src/KokkosBatched_Iamax.hpp | 4 ++-- .../dense/unit_test/Test_Batched_SerialIamax.hpp | 6 +++--- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/batched/dense/impl/KokkosBatched_Iamax_Serial_Impl.hpp b/batched/dense/impl/KokkosBatched_Iamax_Serial_Impl.hpp index c357e65bdc..9c0f99028b 100644 --- a/batched/dense/impl/KokkosBatched_Iamax_Serial_Impl.hpp +++ b/batched/dense/impl/KokkosBatched_Iamax_Serial_Impl.hpp @@ -24,11 +24,12 @@ namespace KokkosBatched { template -KOKKOS_INLINE_FUNCTION int SerialIamax::invoke(const XViewType &x) { +KOKKOS_INLINE_FUNCTION typename XViewType::size_type SerialIamax::invoke(const XViewType &x) { static_assert(Kokkos::is_view_v, "KokkosBatched::iamax: XViewType is not a Kokkos::View."); - if (x.extent(0) <= 0) return -1; - if (x.extent(0) == 1) return 0; - return KokkosBatched::Impl::SerialIamaxInternal::invoke(x.extent(0), x.data(), x.stride(0)); + if (x.extent(0) <= 1) return 0; + using size_type = typename XViewType::size_type; + using value_type = typename XViewType::non_const_value_type; + return KokkosBatched::Impl::SerialIamaxInternal::invoke(x.extent(0), x.data(), x.stride(0)); } } // namespace KokkosBatched diff --git a/batched/dense/impl/KokkosBatched_Iamax_Serial_Internal.hpp b/batched/dense/impl/KokkosBatched_Iamax_Serial_Internal.hpp index 7b7d3f251d..89aed299ae 100644 --- a/batched/dense/impl/KokkosBatched_Iamax_Serial_Internal.hpp +++ b/batched/dense/impl/KokkosBatched_Iamax_Serial_Internal.hpp @@ -30,19 +30,20 @@ namespace Impl { /// ======================== struct SerialIamaxInternal { - template - KOKKOS_INLINE_FUNCTION static int invoke(const int n, const ValueType *KOKKOS_RESTRICT x, const int xs0); + template + KOKKOS_INLINE_FUNCTION static IndexType invoke(const int n, const ValueType *KOKKOS_RESTRICT x, const int xs0); }; -template -KOKKOS_INLINE_FUNCTION int SerialIamaxInternal::invoke(const int n, const ValueType *KOKKOS_RESTRICT x, const int xs0) { +template +KOKKOS_INLINE_FUNCTION IndexType SerialIamaxInternal::invoke(const int n, const ValueType *KOKKOS_RESTRICT x, + const int xs0) { using ats = typename Kokkos::ArithTraits; using RealType = typename ats::mag_type; - RealType amax = Kokkos::abs(x[0 * xs0]); - int imax = 0; + RealType amax = Kokkos::abs(x[0 * xs0]); + IndexType imax = 0; - for (int i = 1; i < n; ++i) { + for (IndexType i = 1; i < static_cast(n); ++i) { const RealType abs_x_i = Kokkos::abs(x[i * xs0]); if (abs_x_i > amax) { amax = abs_x_i; diff --git a/batched/dense/src/KokkosBatched_Iamax.hpp b/batched/dense/src/KokkosBatched_Iamax.hpp index cb37e6c23a..c388ca943f 100644 --- a/batched/dense/src/KokkosBatched_Iamax.hpp +++ b/batched/dense/src/KokkosBatched_Iamax.hpp @@ -28,13 +28,13 @@ namespace KokkosBatched { /// \param X [in]: Input view type /// /// \return The index of the first element having maximum absolute value -/// As well as Blas, this returns -1 (0 in Fortran) for an empty vector +/// As well as Blas, this returns 0 (0 in Fortran) for an empty vector /// No nested parallel_for is used inside of the function. /// struct SerialIamax { template - KOKKOS_INLINE_FUNCTION static int invoke(const XViewType &x); + KOKKOS_INLINE_FUNCTION static typename XViewType::size_type invoke(const XViewType &x); }; } // namespace KokkosBatched diff --git a/batched/dense/unit_test/Test_Batched_SerialIamax.hpp b/batched/dense/unit_test/Test_Batched_SerialIamax.hpp index 85bf1b95ad..38e9e78e04 100644 --- a/batched/dense/unit_test/Test_Batched_SerialIamax.hpp +++ b/batched/dense/unit_test/Test_Batched_SerialIamax.hpp @@ -39,7 +39,7 @@ struct Functor_BatchedSerialIamax { void operator()(const int k) const { auto sub_x = Kokkos::subview(m_x, k, Kokkos::ALL()); auto iamax = KokkosBatched::SerialIamax::invoke(sub_x); - m_r(k) = iamax; + m_r(k) = static_cast(iamax); } inline void run() { @@ -198,9 +198,9 @@ void impl_test_batched_iamax(const std::size_t N, const std::size_t BlkSize) { // Reference auto h_iamax_ref = Kokkos::create_mirror_view(iamax_ref); if (BlkSize == 0) { - // As well as blas, we store -1 (0 in Fortran) for empty matrix + // As well as blas, we store 0 (0 in Fortran) for empty matrix for (std::size_t k = 0; k < N; k++) { - h_iamax_ref(k) = -1; + h_iamax_ref(k) = 0; } } else { auto h_A = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A);