From 9549227b53f1cf93bcccbf53dea687fa2ca5ff91 Mon Sep 17 00:00:00 2001 From: "Luc Berger-Vergiat (-EXP)" Date: Tue, 21 Nov 2023 15:39:18 -0700 Subject: [PATCH] BLAS: nrm1 problems with ExecSpace template and lack of Kokkos::Threads Fix issue with Kokkos::Threads and Kokkos::HIP --- blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp | 89 +++++++++++--------- 1 file changed, 49 insertions(+), 40 deletions(-) diff --git a/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp b/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp index 79822b452e..a789e125e5 100644 --- a/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp +++ b/blas/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp @@ -111,6 +111,17 @@ KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) #endif +#if defined(KOKKOS_ENABLE_THREADS) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Threads, + Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Threads, + Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Threads, Kokkos::HostSpace) +KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Threads, Kokkos::HostSpace) +#endif + } // namespace Impl } // namespace KokkosBlas @@ -156,31 +167,31 @@ void cublasAsumWrapper(const ExecutionSpace& space, RViewType& R, KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); } -#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(SCALAR, LAYOUT, EXECSPACE, \ - MEMSPACE) \ +#define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_CUBLAS(SCALAR, LAYOUT, MEMSPACE) \ template <> \ struct Nrm1< \ - EXECSPACE, \ + Kokkos::Cuda, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ 1, true, \ nrm1_eti_spec_avail< \ - EXECSPACE, \ + Kokkos::Cuda, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>>::value> { \ - using execution_space = EXECSPACE; \ + using execution_space = Kokkos::Cuda; \ using RV = Kokkos::View::mag_type, \ LAYOUT, Kokkos::HostSpace, \ Kokkos::MemoryTraits>; \ using XV = Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>; \ using size_type = typename XV::size_type; \ \ @@ -192,8 +203,8 @@ void cublasAsumWrapper(const ExecutionSpace& space, RViewType& R, cublasAsumWrapper(space, R, X); \ } else { \ Nrm1::value>::nrm1(space, R, \ - X); \ + nrm1_eti_spec_avail::value>::nrm1(space, R, \ + X); \ } \ Kokkos::Profiling::popRegion(); \ } \ @@ -269,41 +280,42 @@ void rocblasAsumWrapper(const ExecutionSpace& space, RViewType& R, } #define KOKKOSBLAS1_NRM1_TPL_SPEC_DECL_ROCBLAS(SCALAR, LAYOUT, MEMSPACE) \ - template \ + template <> \ struct Nrm1< \ - ExecSpace, \ + Kokkos::HIP, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ 1, true, \ nrm1_eti_spec_avail< \ - ExecSpace, \ + Kokkos::HIP, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>>::value> { \ using RV = Kokkos::View::mag_type, \ LAYOUT, Kokkos::HostSpace, \ Kokkos::MemoryTraits>; \ using XV = Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>; \ using size_type = typename XV::size_type; \ \ - static void nrm1(const ExecSpace& space, RV& R, const XV& X) { \ + static void nrm1(const Kokkos::HIP& space, RV& R, const XV& X) { \ Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_ROCBLAS," #SCALAR \ "]"); \ const size_type numElems = X.extent(0); \ if (numElems < static_cast(INT_MAX)) { \ rocblasAsumWrapper(space, R, X); \ } else { \ - Nrm1::value>::nrm1(space, R, \ - X); \ + Nrm1::value>::nrm1(space, R, \ + X); \ } \ Kokkos::Profiling::popRegion(); \ } \ @@ -377,32 +389,33 @@ void onemklAsumWrapper(const ExecutionSpace& space, RViewType& R, Kokkos::deep_copy(space, R, res); } -#define KOKKOSBLAS1_NRM1_ONEMKL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE, \ - ETI_SPEC_AVAIL) \ +#define KOKKOSBLAS1_NRM1_ONEMKL(SCALAR, LAYOUT, MEMSPACE) \ template <> \ struct Nrm1< \ - EXECSPACE, \ + Kokkos::Experimental::SYCL, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ 1, true, \ nrm1_eti_spec_avail< \ - EXECSPACE, \ + Kokkos::Experimental::SYCL, \ Kokkos::View::mag_type, LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>>::value> { \ - using execution_space = EXECSPACE; \ + using execution_space = Kokkos::Experimental::SYCL; \ using RV = Kokkos::View::mag_type, \ LAYOUT, Kokkos::HostSpace, \ Kokkos::MemoryTraits>; \ - using XV = Kokkos::View, \ - Kokkos::MemoryTraits>; \ + using XV = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ using size_type = typename XV::size_type; \ \ static void nrm1(const execution_space& space, RV& R, const XV& X) { \ @@ -413,34 +426,30 @@ void onemklAsumWrapper(const ExecutionSpace& space, RViewType& R, onemklAsumWrapper(space, R, X); \ } else { \ Nrm1::value>::nrm1(space, R, \ - X); \ + nrm1_eti_spec_avail::value>::nrm1(space, R, X); \ } \ Kokkos::Profiling::popRegion(); \ } \ }; -KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, +KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace) -KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, +KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLDeviceUSMSpace) KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace) KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace) #if defined(KOKKOSKERNELS_INST_MEMSPACE_SYCLSHAREDSPACE) -KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, +KOKKOSBLAS1_NRM1_ONEMKL(float, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLSharedUSMSpace) -KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCL, +KOKKOSBLAS1_NRM1_ONEMKL(double, Kokkos::LayoutLeft, Kokkos::Experimental::SYCLSharedUSMSpace) KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLSharedUSMSpace) KOKKOSBLAS1_NRM1_ONEMKL(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLSharedUSMSpace) #endif