From 5b5c101956543ca4803584e4d27a329ee109b9fb Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Wed, 6 Sep 2023 20:30:14 -0600 Subject: [PATCH 01/22] Backup --- .../gesv/KokkosBlas_gesv_eti_spec_inst.cpp.in | 0 .../trtri/KokkosBlas_trtri_eti_spec_inst.cpp.in | 0 .../KokkosBlas_gesv_eti_spec_avail.hpp.in | 0 .../KokkosBlas_trtri_eti_spec_avail.hpp.in | 0 {blas => lapack}/impl/KokkosBlas_gesv_impl.hpp | 0 {blas => lapack}/impl/KokkosBlas_gesv_spec.hpp | 0 {blas => lapack}/impl/KokkosBlas_trtri_impl.hpp | 0 {blas => lapack}/impl/KokkosBlas_trtri_spec.hpp | 0 {blas => lapack}/src/KokkosBlas_gesv.hpp | 0 {blas => lapack}/src/KokkosBlas_trtri.hpp | 0 {blas => lapack}/tpls/KokkosBlas_gesv_tpl_spec_avail.hpp | 0 {blas => lapack}/tpls/KokkosBlas_gesv_tpl_spec_decl.hpp | 0 {blas => lapack}/tpls/KokkosBlas_trtri_tpl_spec_avail.hpp | 0 {blas => lapack}/tpls/KokkosBlas_trtri_tpl_spec_decl.hpp | 0 {blas => lapack}/unit_test/Test_Blas_gesv.hpp | 0 {blas => lapack}/unit_test/Test_Blas_trtri.hpp | 0 16 files changed, 0 insertions(+), 0 deletions(-) rename {blas => lapack}/eti/generated_specializations_cpp/gesv/KokkosBlas_gesv_eti_spec_inst.cpp.in (100%) rename {blas => lapack}/eti/generated_specializations_cpp/trtri/KokkosBlas_trtri_eti_spec_inst.cpp.in (100%) rename {blas => lapack}/eti/generated_specializations_hpp/KokkosBlas_gesv_eti_spec_avail.hpp.in (100%) rename {blas => lapack}/eti/generated_specializations_hpp/KokkosBlas_trtri_eti_spec_avail.hpp.in (100%) rename {blas => lapack}/impl/KokkosBlas_gesv_impl.hpp (100%) rename {blas => lapack}/impl/KokkosBlas_gesv_spec.hpp (100%) rename {blas => lapack}/impl/KokkosBlas_trtri_impl.hpp (100%) rename {blas => lapack}/impl/KokkosBlas_trtri_spec.hpp (100%) rename {blas => lapack}/src/KokkosBlas_gesv.hpp (100%) rename {blas => lapack}/src/KokkosBlas_trtri.hpp (100%) rename {blas => lapack}/tpls/KokkosBlas_gesv_tpl_spec_avail.hpp (100%) rename {blas => lapack}/tpls/KokkosBlas_gesv_tpl_spec_decl.hpp (100%) rename {blas => lapack}/tpls/KokkosBlas_trtri_tpl_spec_avail.hpp (100%) rename {blas => lapack}/tpls/KokkosBlas_trtri_tpl_spec_decl.hpp (100%) rename {blas => lapack}/unit_test/Test_Blas_gesv.hpp (100%) rename {blas => lapack}/unit_test/Test_Blas_trtri.hpp (100%) diff --git a/blas/eti/generated_specializations_cpp/gesv/KokkosBlas_gesv_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/gesv/KokkosBlas_gesv_eti_spec_inst.cpp.in similarity index 100% rename from blas/eti/generated_specializations_cpp/gesv/KokkosBlas_gesv_eti_spec_inst.cpp.in rename to lapack/eti/generated_specializations_cpp/gesv/KokkosBlas_gesv_eti_spec_inst.cpp.in diff --git a/blas/eti/generated_specializations_cpp/trtri/KokkosBlas_trtri_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/trtri/KokkosBlas_trtri_eti_spec_inst.cpp.in similarity index 100% rename from blas/eti/generated_specializations_cpp/trtri/KokkosBlas_trtri_eti_spec_inst.cpp.in rename to lapack/eti/generated_specializations_cpp/trtri/KokkosBlas_trtri_eti_spec_inst.cpp.in diff --git a/blas/eti/generated_specializations_hpp/KokkosBlas_gesv_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosBlas_gesv_eti_spec_avail.hpp.in similarity index 100% rename from blas/eti/generated_specializations_hpp/KokkosBlas_gesv_eti_spec_avail.hpp.in rename to lapack/eti/generated_specializations_hpp/KokkosBlas_gesv_eti_spec_avail.hpp.in diff --git a/blas/eti/generated_specializations_hpp/KokkosBlas_trtri_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosBlas_trtri_eti_spec_avail.hpp.in similarity index 100% rename from blas/eti/generated_specializations_hpp/KokkosBlas_trtri_eti_spec_avail.hpp.in rename to lapack/eti/generated_specializations_hpp/KokkosBlas_trtri_eti_spec_avail.hpp.in diff --git a/blas/impl/KokkosBlas_gesv_impl.hpp b/lapack/impl/KokkosBlas_gesv_impl.hpp similarity index 100% rename from blas/impl/KokkosBlas_gesv_impl.hpp rename to lapack/impl/KokkosBlas_gesv_impl.hpp diff --git a/blas/impl/KokkosBlas_gesv_spec.hpp b/lapack/impl/KokkosBlas_gesv_spec.hpp similarity index 100% rename from blas/impl/KokkosBlas_gesv_spec.hpp rename to lapack/impl/KokkosBlas_gesv_spec.hpp diff --git a/blas/impl/KokkosBlas_trtri_impl.hpp b/lapack/impl/KokkosBlas_trtri_impl.hpp similarity index 100% rename from blas/impl/KokkosBlas_trtri_impl.hpp rename to lapack/impl/KokkosBlas_trtri_impl.hpp diff --git a/blas/impl/KokkosBlas_trtri_spec.hpp b/lapack/impl/KokkosBlas_trtri_spec.hpp similarity index 100% rename from blas/impl/KokkosBlas_trtri_spec.hpp rename to lapack/impl/KokkosBlas_trtri_spec.hpp diff --git a/blas/src/KokkosBlas_gesv.hpp b/lapack/src/KokkosBlas_gesv.hpp similarity index 100% rename from blas/src/KokkosBlas_gesv.hpp rename to lapack/src/KokkosBlas_gesv.hpp diff --git a/blas/src/KokkosBlas_trtri.hpp b/lapack/src/KokkosBlas_trtri.hpp similarity index 100% rename from blas/src/KokkosBlas_trtri.hpp rename to lapack/src/KokkosBlas_trtri.hpp diff --git a/blas/tpls/KokkosBlas_gesv_tpl_spec_avail.hpp b/lapack/tpls/KokkosBlas_gesv_tpl_spec_avail.hpp similarity index 100% rename from blas/tpls/KokkosBlas_gesv_tpl_spec_avail.hpp rename to lapack/tpls/KokkosBlas_gesv_tpl_spec_avail.hpp diff --git a/blas/tpls/KokkosBlas_gesv_tpl_spec_decl.hpp b/lapack/tpls/KokkosBlas_gesv_tpl_spec_decl.hpp similarity index 100% rename from blas/tpls/KokkosBlas_gesv_tpl_spec_decl.hpp rename to lapack/tpls/KokkosBlas_gesv_tpl_spec_decl.hpp diff --git a/blas/tpls/KokkosBlas_trtri_tpl_spec_avail.hpp b/lapack/tpls/KokkosBlas_trtri_tpl_spec_avail.hpp similarity index 100% rename from blas/tpls/KokkosBlas_trtri_tpl_spec_avail.hpp rename to lapack/tpls/KokkosBlas_trtri_tpl_spec_avail.hpp diff --git a/blas/tpls/KokkosBlas_trtri_tpl_spec_decl.hpp b/lapack/tpls/KokkosBlas_trtri_tpl_spec_decl.hpp similarity index 100% rename from blas/tpls/KokkosBlas_trtri_tpl_spec_decl.hpp rename to lapack/tpls/KokkosBlas_trtri_tpl_spec_decl.hpp diff --git a/blas/unit_test/Test_Blas_gesv.hpp b/lapack/unit_test/Test_Blas_gesv.hpp similarity index 100% rename from blas/unit_test/Test_Blas_gesv.hpp rename to lapack/unit_test/Test_Blas_gesv.hpp diff --git a/blas/unit_test/Test_Blas_trtri.hpp b/lapack/unit_test/Test_Blas_trtri.hpp similarity index 100% rename from blas/unit_test/Test_Blas_trtri.hpp rename to lapack/unit_test/Test_Blas_trtri.hpp From 8f4914072cdeee50b39e92a6f1c8b596ffe66f53 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Wed, 6 Sep 2023 20:48:51 -0600 Subject: [PATCH 02/22] Backup --- CMakeLists.txt | 10 +++++- blas/tpls/KokkosBlas_Host_tpl.hpp | 6 ---- cmake/KokkosKernels_config.h.in | 2 ++ lapack/tpls/KokkosLapack_Host_tpl.hpp | 44 +++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 7 deletions(-) create mode 100644 lapack/tpls/KokkosLapack_Host_tpl.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 893e4239cd..812640374b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,6 +115,7 @@ IF (KokkosKernels_INSTALL_TESTING) KOKKOSKERNELS_ADD_TEST_DIRECTORIES(batched/dense/unit_test) KOKKOSKERNELS_ADD_TEST_DIRECTORIES(batched/sparse/unit_test) KOKKOSKERNELS_ADD_TEST_DIRECTORIES(blas/unit_test) + KOKKOSKERNELS_ADD_TEST_DIRECTORIES(lapack/unit_test) KOKKOSKERNELS_ADD_TEST_DIRECTORIES(graph/unit_test) KOKKOSKERNELS_ADD_TEST_DIRECTORIES(sparse/unit_test) KOKKOSKERNELS_ADD_TEST_DIRECTORIES(ode/unit_test) @@ -192,7 +193,7 @@ ELSE() "ALL" STRING "A list of components to enable in testing and building" - VALID_ENTRIES BATCHED BLAS GRAPH SPARSE ALL + VALID_ENTRIES BATCHED BLAS LAPACK GRAPH SPARSE ALL ) # ================================================================== @@ -243,6 +244,7 @@ ELSE() MESSAGE(" COMMON: ON") MESSAGE(" BATCHED: ${KokkosKernels_ENABLE_COMPONENT_BATCHED}") MESSAGE(" BLAS: ${KokkosKernels_ENABLE_COMPONENT_BLAS}") + MESSAGE(" LAPACK: ${KokkosKernels_ENABLE_COMPONENT_LAPACK}") MESSAGE(" GRAPH: ${KokkosKernels_ENABLE_COMPONENT_GRAPH}") MESSAGE(" SPARSE: ${KokkosKernels_ENABLE_COMPONENT_SPARSE}") MESSAGE(" ODE: ${KokkosKernels_ENABLE_COMPONENT_ODE}") @@ -287,6 +289,9 @@ ELSE() IF (KokkosKernels_ENABLE_COMPONENT_BLAS) INCLUDE(blas/CMakeLists.txt) ENDIF() + IF (KokkosKernels_ENABLE_COMPONENT_LAPACK) + INCLUDE(lapack/CMakeLists.txt) + ENDIF() IF (KokkosKernels_ENABLE_COMPONENT_GRAPH) INCLUDE(graph/CMakeLists.txt) ENDIF() @@ -405,6 +410,9 @@ ELSE() IF (KokkosKernels_ENABLE_COMPONENT_BLAS) KOKKOSKERNELS_ADD_TEST_DIRECTORIES(blas/unit_test) ENDIF() + IF (KokkosKernels_ENABLE_COMPONENT_LAPACK) + KOKKOSKERNELS_ADD_TEST_DIRECTORIES(lapack/unit_test) + ENDIF() IF (KokkosKernels_ENABLE_COMPONENT_GRAPH) KOKKOSKERNELS_ADD_TEST_DIRECTORIES(graph/unit_test) ENDIF() diff --git a/blas/tpls/KokkosBlas_Host_tpl.hpp b/blas/tpls/KokkosBlas_Host_tpl.hpp index 3b0c7f366e..29afff4d62 100644 --- a/blas/tpls/KokkosBlas_Host_tpl.hpp +++ b/blas/tpls/KokkosBlas_Host_tpl.hpp @@ -115,12 +115,6 @@ struct HostBlas { const char diag, int m, int n, const T alpha, const T *a, int lda, /* */ T *b, int ldb); - - static void gesv(int n, int rhs, T *a, int lda, int *ipiv, T *b, int ldb, - int info); - - static int trtri(const char uplo, const char diag, int n, const T *a, - int lda); }; } // namespace Impl } // namespace KokkosBlas diff --git a/cmake/KokkosKernels_config.h.in b/cmake/KokkosKernels_config.h.in index b8b66fffbb..621c78bfcc 100644 --- a/cmake/KokkosKernels_config.h.in +++ b/cmake/KokkosKernels_config.h.in @@ -109,6 +109,8 @@ /* BLAS library */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_BLAS +/* LAPACK library */ +#cmakedefine KOKKOSKERNELS_ENABLE_TPL_LAPACK /* MKL library */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_MKL /* CUSPARSE */ diff --git a/lapack/tpls/KokkosLapack_Host_tpl.hpp b/lapack/tpls/KokkosLapack_Host_tpl.hpp new file mode 100644 index 0000000000..d74099aaec --- /dev/null +++ b/lapack/tpls/KokkosLapack_Host_tpl.hpp @@ -0,0 +1,44 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_HOST_TPL_HPP_ +#define KOKKOSLAPACK_HOST_TPL_HPP_ + +/// \file KokkosLapack_Host_tpl.hpp +/// \brief LAPACK wrapper + +#include "KokkosKernels_config.h" +#include "Kokkos_ArithTraits.hpp" + +#if defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) + +namespace KokkosLapack { +namespace Impl { + +template +struct HostLapack { + static void gesv(int n, int rhs, T *a, int lda, int *ipiv, T *b, int ldb, + int info); + + static int trtri(const char uplo, const char diag, int n, const T *a, + int lda); +}; +} // namespace Impl +} // namespace KokkosLapack + +#endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK + +#endif // KOKKOSLAPACK_HOST_TPL_HPP_ From 845f7f2505503427325759496bf4f32f785eb6c9 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Wed, 6 Sep 2023 21:19:57 -0600 Subject: [PATCH 03/22] Backup --- ...=> KokkosLapack_gesv_eti_spec_inst.cpp.in} | 6 +- ...> KokkosLapack_trtri_eti_spec_inst.cpp.in} | 6 +- ...> KokkosLapack_gesv_eti_spec_avail.hpp.in} | 8 +- ... KokkosLapack_trtri_eti_spec_avail.hpp.in} | 12 +- ...sv_impl.hpp => KokkosLapack_gesv_impl.hpp} | 14 +- ...sv_spec.hpp => KokkosLapack_gesv_spec.hpp} | 38 +++--- ...i_impl.hpp => KokkosLapack_trtri_impl.hpp} | 12 +- ...i_spec.hpp => KokkosLapack_trtri_spec.hpp} | 32 ++--- ...kosBlas_gesv.hpp => KokkosLapack_gesv.hpp} | 52 ++++---- ...sBlas_trtri.hpp => KokkosLapack_trtri.hpp} | 22 ++-- ...p => KokkosLapack_gesv_tpl_spec_avail.hpp} | 48 +++---- ...pp => KokkosLapack_gesv_tpl_spec_decl.hpp} | 120 +++++++++--------- ... => KokkosLapack_trtri_tpl_spec_avail.hpp} | 76 +++++------ ...p => KokkosLapack_trtri_tpl_spec_decl.hpp} | 106 ++++++++-------- ...est_Blas_gesv.hpp => Test_Lapack_gesv.hpp} | 78 ++++++------ ...t_Blas_trtri.hpp => Test_Lapack_trtri.hpp} | 56 ++++---- 16 files changed, 344 insertions(+), 342 deletions(-) rename lapack/eti/generated_specializations_cpp/gesv/{KokkosBlas_gesv_eti_spec_inst.cpp.in => KokkosLapack_gesv_eti_spec_inst.cpp.in} (88%) rename lapack/eti/generated_specializations_cpp/trtri/{KokkosBlas_trtri_eti_spec_inst.cpp.in => KokkosLapack_trtri_eti_spec_inst.cpp.in} (88%) rename lapack/eti/generated_specializations_hpp/{KokkosBlas_gesv_eti_spec_avail.hpp.in => KokkosLapack_gesv_eti_spec_avail.hpp.in} (80%) rename lapack/eti/generated_specializations_hpp/{KokkosBlas_trtri_eti_spec_avail.hpp.in => KokkosLapack_trtri_eti_spec_avail.hpp.in} (73%) rename lapack/impl/{KokkosBlas_gesv_impl.hpp => KokkosLapack_gesv_impl.hpp} (73%) rename lapack/impl/{KokkosBlas_gesv_spec.hpp => KokkosLapack_gesv_spec.hpp} (83%) rename lapack/impl/{KokkosBlas_trtri_impl.hpp => KokkosLapack_trtri_impl.hpp} (91%) rename lapack/impl/{KokkosBlas_trtri_spec.hpp => KokkosLapack_trtri_spec.hpp} (83%) rename lapack/src/{KokkosBlas_gesv.hpp => KokkosLapack_gesv.hpp} (76%) rename lapack/src/{KokkosBlas_trtri.hpp => KokkosLapack_trtri.hpp} (88%) rename lapack/tpls/{KokkosBlas_gesv_tpl_spec_avail.hpp => KokkosLapack_gesv_tpl_spec_avail.hpp} (70%) rename lapack/tpls/{KokkosBlas_gesv_tpl_spec_decl.hpp => KokkosLapack_gesv_tpl_spec_decl.hpp} (89%) rename lapack/tpls/{KokkosBlas_trtri_tpl_spec_avail.hpp => KokkosLapack_trtri_tpl_spec_avail.hpp} (56%) rename lapack/tpls/{KokkosBlas_trtri_tpl_spec_decl.hpp => KokkosLapack_trtri_tpl_spec_decl.hpp} (73%) rename lapack/unit_test/{Test_Blas_gesv.hpp => Test_Lapack_gesv.hpp} (83%) rename lapack/unit_test/{Test_Blas_trtri.hpp => Test_Lapack_trtri.hpp} (88%) diff --git a/lapack/eti/generated_specializations_cpp/gesv/KokkosBlas_gesv_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/gesv/KokkosLapack_gesv_eti_spec_inst.cpp.in similarity index 88% rename from lapack/eti/generated_specializations_cpp/gesv/KokkosBlas_gesv_eti_spec_inst.cpp.in rename to lapack/eti/generated_specializations_cpp/gesv/KokkosLapack_gesv_eti_spec_inst.cpp.in index 32473be3ad..da521984a4 100644 --- a/lapack/eti/generated_specializations_cpp/gesv/KokkosBlas_gesv_eti_spec_inst.cpp.in +++ b/lapack/eti/generated_specializations_cpp/gesv/KokkosLapack_gesv_eti_spec_inst.cpp.in @@ -17,10 +17,10 @@ #define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true #include "KokkosKernels_config.h" -#include "KokkosBlas_gesv_spec.hpp" +#include "KokkosLapack_gesv_spec.hpp" -namespace KokkosBlas { +namespace KokkosLapack { namespace Impl { -@BLAS_GESV_ETI_INST_BLOCK@ +@LAPACK_GESV_ETI_INST_BLOCK@ } //IMPL } //Kokkos diff --git a/lapack/eti/generated_specializations_cpp/trtri/KokkosBlas_trtri_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/trtri/KokkosLapack_trtri_eti_spec_inst.cpp.in similarity index 88% rename from lapack/eti/generated_specializations_cpp/trtri/KokkosBlas_trtri_eti_spec_inst.cpp.in rename to lapack/eti/generated_specializations_cpp/trtri/KokkosLapack_trtri_eti_spec_inst.cpp.in index 64755f7a54..c4ab12f5a4 100644 --- a/lapack/eti/generated_specializations_cpp/trtri/KokkosBlas_trtri_eti_spec_inst.cpp.in +++ b/lapack/eti/generated_specializations_cpp/trtri/KokkosLapack_trtri_eti_spec_inst.cpp.in @@ -17,10 +17,10 @@ #define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true #include "KokkosKernels_config.h" -#include "KokkosBlas_trtri_spec.hpp" +#include "KokkosLapack_trtri_spec.hpp" -namespace KokkosBlas { +namespace KokkosLapack { namespace Impl { -@BLAS_TRTRI_ETI_INST_BLOCK@ +@LAPACK_TRTRI_ETI_INST_BLOCK@ } //IMPL } //Kokkos diff --git a/lapack/eti/generated_specializations_hpp/KokkosBlas_gesv_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_gesv_eti_spec_avail.hpp.in similarity index 80% rename from lapack/eti/generated_specializations_hpp/KokkosBlas_gesv_eti_spec_avail.hpp.in rename to lapack/eti/generated_specializations_hpp/KokkosLapack_gesv_eti_spec_avail.hpp.in index ae262c912e..d1f36e3069 100644 --- a/lapack/eti/generated_specializations_hpp/KokkosBlas_gesv_eti_spec_avail.hpp.in +++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_gesv_eti_spec_avail.hpp.in @@ -14,11 +14,11 @@ // //@HEADER -#ifndef KOKKOSBLAS_GESV_ETI_SPEC_AVAIL_HPP_ -#define KOKKOSBLAS_GESV_ETI_SPEC_AVAIL_HPP_ -namespace KokkosBlas { +#ifndef KOKKOSLAPACK_GESV_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_GESV_ETI_SPEC_AVAIL_HPP_ +namespace KokkosLapack { namespace Impl { -@BLAS_GESV_ETI_AVAIL_BLOCK@ +@LAPACK_GESV_ETI_AVAIL_BLOCK@ } //IMPL } //Kokkos #endif diff --git a/lapack/eti/generated_specializations_hpp/KokkosBlas_trtri_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_trtri_eti_spec_avail.hpp.in similarity index 73% rename from lapack/eti/generated_specializations_hpp/KokkosBlas_trtri_eti_spec_avail.hpp.in rename to lapack/eti/generated_specializations_hpp/KokkosLapack_trtri_eti_spec_avail.hpp.in index 3f669efa06..89443c2c9b 100644 --- a/lapack/eti/generated_specializations_hpp/KokkosBlas_trtri_eti_spec_avail.hpp.in +++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_trtri_eti_spec_avail.hpp.in @@ -14,13 +14,13 @@ // //@HEADER -#ifndef KOKKOSBLAS_TRTRI_ETI_SPEC_AVAIL_HPP_ -#define KOKKOSBLAS_TRTRI_ETI_SPEC_AVAIL_HPP_ -namespace KokkosBlas { +#ifndef KOKKOSLAPACK_TRTRI_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_TRTRI_ETI_SPEC_AVAIL_HPP_ +namespace KokkosLapack { namespace Impl { -@BLAS_TRTRI_ETI_AVAIL_BLOCK@ +@LAPACK_TRTRI_ETI_AVAIL_BLOCK@ } // Impl -} // KokkosBlas -#endif // KOKKOSBLAS_TRTRI_ETI_SPEC_AVAIL_HPP_ +} // KokkosLapack +#endif // KOKKOSLAPACK_TRTRI_ETI_SPEC_AVAIL_HPP_ diff --git a/lapack/impl/KokkosBlas_gesv_impl.hpp b/lapack/impl/KokkosLapack_gesv_impl.hpp similarity index 73% rename from lapack/impl/KokkosBlas_gesv_impl.hpp rename to lapack/impl/KokkosLapack_gesv_impl.hpp index e51e48309f..3a60f42171 100644 --- a/lapack/impl/KokkosBlas_gesv_impl.hpp +++ b/lapack/impl/KokkosLapack_gesv_impl.hpp @@ -14,21 +14,21 @@ // //@HEADER -#ifndef KOKKOSBLAS_IMPL_GESV_HPP_ -#define KOKKOSBLAS_IMPL_GESV_HPP_ +#ifndef KOKKOSLAPACK_IMPL_GESV_HPP_ +#define KOKKOSLAPACK_IMPL_GESV_HPP_ -/// \file KokkosBlas_gesv_impl.hpp +/// \file KokkosLapack_gesv_impl.hpp /// \brief Implementation(s) of dense linear solve. #include #include -namespace KokkosBlas { +namespace KokkosLapack { namespace Impl { -// NOTE: Might add the implementation of KokkosBlas::gesv later +// NOTE: Might add the implementation of KokkosLapack::gesv later } // namespace Impl -} // namespace KokkosBlas +} // namespace KokkosLapack -#endif // KOKKOSBLAS_IMPL_GESV_HPP +#endif // KOKKOSLAPACK_IMPL_GESV_HPP diff --git a/lapack/impl/KokkosBlas_gesv_spec.hpp b/lapack/impl/KokkosLapack_gesv_spec.hpp similarity index 83% rename from lapack/impl/KokkosBlas_gesv_spec.hpp rename to lapack/impl/KokkosLapack_gesv_spec.hpp index f1dff467c8..8ea1df03bf 100644 --- a/lapack/impl/KokkosBlas_gesv_spec.hpp +++ b/lapack/impl/KokkosLapack_gesv_spec.hpp @@ -13,8 +13,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER -#ifndef KOKKOSBLAS_IMPL_GESV_SPEC_HPP_ -#define KOKKOSBLAS_IMPL_GESV_SPEC_HPP_ +#ifndef KOKKOSLAPACK_IMPL_GESV_SPEC_HPP_ +#define KOKKOSLAPACK_IMPL_GESV_SPEC_HPP_ #include #include @@ -22,10 +22,10 @@ // Include the actual functors #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY -#include +#include #endif -namespace KokkosBlas { +namespace KokkosLapack { namespace Impl { // Specialization struct which defines whether a specialization exists template @@ -33,16 +33,16 @@ struct gesv_eti_spec_avail { enum : bool { value = false }; }; } // namespace Impl -} // namespace KokkosBlas +} // namespace KokkosLapack // // Macro for declaration of full specialization availability -// KokkosBlas::Impl::GESV. This is NOT for users!!! All +// KokkosLapack::Impl::GESV. This is NOT for users!!! All // the declarations of full specializations go in this header file. // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS_GESV_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, \ +#define KOKKOSLAPACK_GESV_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, \ EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ template <> \ struct gesv_eti_spec_avail< \ @@ -56,14 +56,14 @@ struct gesv_eti_spec_avail { }; // Include the actual specialization declarations -#include -#include +#include +#include -namespace KokkosBlas { +namespace KokkosLapack { namespace Impl { // Unification layer -/// \brief Implementation of KokkosBlas::gesv. +/// \brief Implementation of KokkosLapack::gesv. template ::value, @@ -79,25 +79,25 @@ template struct GESV { static void gesv(const AMatrix & /* A */, const BXMV & /* B */, const IPIVV & /* IPIV */) { - // NOTE: Might add the implementation of KokkosBlas::gesv later + // NOTE: Might add the implementation of KokkosLapack::gesv later throw std::runtime_error( "No fallback implementation of GESV (general LU factorization & solve) " - "exists. Enable BLAS and/or MAGMA TPL."); + "exists. Enable LAPACK and/or MAGMA TPL."); } }; #endif } // namespace Impl -} // namespace KokkosBlas +} // namespace KokkosLapack // // Macro for declaration of full specialization of -// KokkosBlas::Impl::GESV. This is NOT for users!!! All +// KokkosLapack::Impl::GESV. This is NOT for users!!! All // the declarations of full specializations go in this header file. // We may spread out definitions (see _DEF macro below) across one or // more .cpp files. // -#define KOKKOSBLAS_GESV_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE, \ +#define KOKKOSLAPACK_GESV_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE, \ EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ extern template struct GESV< \ Kokkos::View { Kokkos::MemoryTraits >, \ false, true>; -#define KOKKOSBLAS_GESV_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ +#define KOKKOSLAPACK_GESV_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ template struct GESV< \ Kokkos::View { Kokkos::MemoryTraits >, \ false, true>; -#include +#include -#endif // KOKKOSBLAS_IMPL_GESV_SPEC_HPP_ +#endif // KOKKOSLAPACK_IMPL_GESV_SPEC_HPP_ diff --git a/lapack/impl/KokkosBlas_trtri_impl.hpp b/lapack/impl/KokkosLapack_trtri_impl.hpp similarity index 91% rename from lapack/impl/KokkosBlas_trtri_impl.hpp rename to lapack/impl/KokkosLapack_trtri_impl.hpp index 4501763ea8..9f52c2d412 100644 --- a/lapack/impl/KokkosBlas_trtri_impl.hpp +++ b/lapack/impl/KokkosLapack_trtri_impl.hpp @@ -14,11 +14,11 @@ // //@HEADER -#ifndef KOKKOSBLAS_TRTRI_IMPL_HPP_ -#define KOKKOSBLAS_TRTRI_IMPL_HPP_ +#ifndef KOKKOSLAPACK_TRTRI_IMPL_HPP_ +#define KOKKOSLAPACK_TRTRI_IMPL_HPP_ /** - * \file KokkosBlas_trtri_impl.hpp + * \file KokkosLapack_trtri_impl.hpp * \brief Implementation of triangular matrix inverse */ @@ -27,7 +27,7 @@ #include "KokkosBatched_Trtri_Decl.hpp" #include "KokkosBatched_Trtri_Serial_Impl.hpp" -namespace KokkosBlas { +namespace KokkosLapack { namespace Impl { template @@ -65,5 +65,5 @@ void SerialTrtri_Invoke(const RViewType &R, const char uplo[], } } } // namespace Impl -} // namespace KokkosBlas -#endif // KOKKOSBLAS_TRTRI_IMPL_HPP_ +} // namespace KokkosLapack +#endif // KOKKOSLAPACK_TRTRI_IMPL_HPP_ diff --git a/lapack/impl/KokkosBlas_trtri_spec.hpp b/lapack/impl/KokkosLapack_trtri_spec.hpp similarity index 83% rename from lapack/impl/KokkosBlas_trtri_spec.hpp rename to lapack/impl/KokkosLapack_trtri_spec.hpp index 2a4d2db576..e48b37f7c2 100644 --- a/lapack/impl/KokkosBlas_trtri_spec.hpp +++ b/lapack/impl/KokkosLapack_trtri_spec.hpp @@ -13,17 +13,17 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER -#ifndef KOKKOSBLAS_TRTRI_SPEC_HPP_ -#define KOKKOSBLAS_TRTRI_SPEC_HPP_ +#ifndef KOKKOSLAPACK_TRTRI_SPEC_HPP_ +#define KOKKOSLAPACK_TRTRI_SPEC_HPP_ #include "KokkosKernels_config.h" #include "Kokkos_Core.hpp" #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY -#include +#include #endif -namespace KokkosBlas { +namespace KokkosLapack { namespace Impl { // Specialization struct which defines whether a specialization exists template @@ -31,13 +31,13 @@ struct trtri_eti_spec_avail { enum : bool { value = false }; }; } // namespace Impl -} // namespace KokkosBlas +} // namespace KokkosLapack // // This Macros provides the ETI specialization of trtri, currently not // available. // -#define KOKKOSBLAS_TRTRI_ETI_SPEC_AVAIL(SCALAR, LAYOUTA, EXEC_SPACE, \ +#define KOKKOSLAPACK_TRTRI_ETI_SPEC_AVAIL(SCALAR, LAYOUTA, EXEC_SPACE, \ MEM_SPACE) \ template <> \ struct trtri_eti_spec_avail< \ @@ -49,10 +49,10 @@ struct trtri_eti_spec_avail { }; // Include the actual specialization declarations -#include -#include +#include +#include -namespace KokkosBlas { +namespace KokkosLapack { namespace Impl { // @@ -77,8 +77,8 @@ struct TRTRI { static_assert(static_cast(AVIT::rank) == 2, "AVIT must have rank 2."); Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY - ? "KokkosBlas::trtri[ETI]" - : "KokkosBlas::trtri[noETI]"); + ? "KokkosLapack::trtri[ETI]" + : "KokkosLapack::trtri[noETI]"); typename AVIT::HostMirror host_A = Kokkos::create_mirror_view(A); typename RVIT::HostMirror host_R = Kokkos::create_mirror_view(R); @@ -97,7 +97,7 @@ struct TRTRI { //! KOKKOSKERNELS_IMPL_COMPILE_LIBRARY } // namespace Impl -} // namespace KokkosBlas +} // namespace KokkosLapack // // These Macros are only included when we are not compiling libkokkoskernels but @@ -106,7 +106,7 @@ struct TRTRI { // "extern template" skips the implicit instatiation step ensuring that the // callers code uses this explicit instantiation definition of TRTRI. // -#define KOKKOSBLAS_TRTRI_ETI_SPEC_DECL(SCALAR, LAYOUTA, EXEC_SPACE, MEM_SPACE) \ +#define KOKKOSLAPACK_TRTRI_ETI_SPEC_DECL(SCALAR, LAYOUTA, EXEC_SPACE, MEM_SPACE) \ extern template struct TRTRI< \ Kokkos::View >, \ @@ -114,7 +114,7 @@ struct TRTRI { Kokkos::MemoryTraits >, \ false, true>; -#define KOKKOSBLAS_TRTRI_ETI_SPEC_INST(SCALAR, LAYOUTA, EXEC_SPACE, MEM_SPACE) \ +#define KOKKOSLAPACK_TRTRI_ETI_SPEC_INST(SCALAR, LAYOUTA, EXEC_SPACE, MEM_SPACE) \ template struct TRTRI< \ Kokkos::View >, \ @@ -122,6 +122,6 @@ struct TRTRI { Kokkos::MemoryTraits >, \ false, true>; -#include +#include -#endif // KOKKOSBLAS_TRTRI_SPEC_HPP_ +#endif // KOKKOSLAPACK_TRTRI_SPEC_HPP_ diff --git a/lapack/src/KokkosBlas_gesv.hpp b/lapack/src/KokkosLapack_gesv.hpp similarity index 76% rename from lapack/src/KokkosBlas_gesv.hpp rename to lapack/src/KokkosLapack_gesv.hpp index 89b9d36c96..b08f523f6e 100644 --- a/lapack/src/KokkosBlas_gesv.hpp +++ b/lapack/src/KokkosLapack_gesv.hpp @@ -14,23 +14,23 @@ // //@HEADER -/// \file KokkosBlas_gesv.hpp +/// \file KokkosLapack_gesv.hpp /// \brief Local dense linear solve /// -/// This file provides KokkosBlas::gesv. This function performs a +/// This file provides KokkosLapack::gesv. This function performs a /// local (no MPI) dense linear solve on a system of linear equations /// A * X = B where A is a general N-by-N matrix and X and B are N-by-NRHS /// matrices. -#ifndef KOKKOSBLAS_GESV_HPP_ -#define KOKKOSBLAS_GESV_HPP_ +#ifndef KOKKOSLAPACK_GESV_HPP_ +#define KOKKOSLAPACK_GESV_HPP_ #include -#include "KokkosBlas_gesv_spec.hpp" +#include "KokkosLapack_gesv_spec.hpp" #include "KokkosKernels_Error.hpp" -namespace KokkosBlas { +namespace KokkosLapack { /// \brief Solve the dense linear equation system A*X = B. /// @@ -50,24 +50,24 @@ namespace KokkosBlas { /// template void gesv(const AMatrix& A, const BXMV& B, const IPIVV& IPIV) { - // NOTE: Currently, KokkosBlas::gesv only supports for MAGMA TPL and BLAS TPL. + // NOTE: Currently, KokkosLapack::gesv only supports for MAGMA TPL and LAPACK TPL. // MAGMA TPL should be enabled to call the MAGMA GPU interface for - // device views BLAS TPL should be enabled to call the BLAS interface + // device views LAPACK TPL should be enabled to call the LAPACK interface // for host views static_assert(Kokkos::is_view::value, - "KokkosBlas::gesv: A must be a Kokkos::View."); + "KokkosLapack::gesv: A must be a Kokkos::View."); static_assert(Kokkos::is_view::value, - "KokkosBlas::gesv: B must be a Kokkos::View."); + "KokkosLapack::gesv: B must be a Kokkos::View."); static_assert(Kokkos::is_view::value, - "KokkosBlas::gesv: IPIV must be a Kokkos::View."); + "KokkosLapack::gesv: IPIV must be a Kokkos::View."); static_assert(static_cast(AMatrix::rank) == 2, - "KokkosBlas::gesv: A must have rank 2."); + "KokkosLapack::gesv: A must have rank 2."); static_assert( static_cast(BXMV::rank) == 1 || static_cast(BXMV::rank) == 2, - "KokkosBlas::gesv: B must have either rank 1 or rank 2."); + "KokkosLapack::gesv: B must have either rank 1 or rank 2."); static_assert(static_cast(IPIVV::rank) == 1, - "KokkosBlas::gesv: IPIV must have rank 1."); + "KokkosLapack::gesv: IPIV must have rank 1."); int64_t IPIV0 = IPIV.extent(0); int64_t A0 = A.extent(0); @@ -79,7 +79,7 @@ void gesv(const AMatrix& A, const BXMV& B, const IPIVV& IPIV) { (IPIV0 == A1) || ((IPIV0 == 0) && (IPIV.data() == nullptr)); if (!(valid_pivot)) { std::ostringstream os; - os << "KokkosBlas::gesv: IPIV: " << IPIV0 << ". " + os << "KokkosLapack::gesv: IPIV: " << IPIV0 << ". " << "Valid options include zero-extent 1-D view (no pivoting), or 1-D " "View with size of " << A0 << " (partial pivoting)."; @@ -88,22 +88,22 @@ void gesv(const AMatrix& A, const BXMV& B, const IPIVV& IPIV) { // Check for no pivoting case. Only MAGMA supports no pivoting interface #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS // and have BLAS TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL if ((!std::is_same::value) && (IPIV0 == 0) && (IPIV.data() == nullptr)) { std::ostringstream os; - os << "KokkosBlas::gesv: IPIV: " << IPIV0 << ". " - << "BLAS TPL does not support no pivoting."; + os << "KokkosLapack::gesv: IPIV: " << IPIV0 << ". " + << "LAPACK TPL does not support no pivoting."; KokkosKernels::Impl::throw_runtime_exception(os.str()); } #endif #else // not have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS // but have BLAS TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // but have LAPACK TPL if ((IPIV0 == 0) && (IPIV.data() == nullptr)) { std::ostringstream os; - os << "KokkosBlas::gesv: IPIV: " << IPIV0 << ". " - << "BLAS TPL does not support no pivoting."; + os << "KokkosLapack::gesv: IPIV: " << IPIV0 << ". " + << "LAPACK TPL does not support no pivoting."; KokkosKernels::Impl::throw_runtime_exception(os.str()); } #endif @@ -112,7 +112,7 @@ void gesv(const AMatrix& A, const BXMV& B, const IPIVV& IPIV) { // Check compatibility of dimensions at run time. if ((A0 < A1) || (A0 != B0)) { std::ostringstream os; - os << "KokkosBlas::gesv: Dimensions of A, and B do not match: " + os << "KokkosLapack::gesv: Dimensions of A, and B do not match: " << " A: " << A.extent(0) << " x " << A.extent(1) << " B: " << B.extent(0) << " x " << B.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); @@ -136,15 +136,15 @@ void gesv(const AMatrix& A, const BXMV& B, const IPIVV& IPIV) { if (BXMV::rank == 1) { auto B_i = BXMV_Internal(B.data(), B.extent(0), 1); - KokkosBlas::Impl::GESV::gesv(A_i, B_i, IPIV_i); } else { // BXMV::rank == 2 auto B_i = BXMV_Internal(B.data(), B.extent(0), B.extent(1)); - KokkosBlas::Impl::GESV::gesv(A_i, B_i, IPIV_i); } } -} // namespace KokkosBlas +} // namespace KokkosLapack -#endif // KOKKOSBLAS_GESV_HPP_ +#endif // KOKKOSLAPACK_GESV_HPP_ diff --git a/lapack/src/KokkosBlas_trtri.hpp b/lapack/src/KokkosLapack_trtri.hpp similarity index 88% rename from lapack/src/KokkosBlas_trtri.hpp rename to lapack/src/KokkosLapack_trtri.hpp index b1a34f0483..44e8fc9f65 100644 --- a/lapack/src/KokkosBlas_trtri.hpp +++ b/lapack/src/KokkosLapack_trtri.hpp @@ -13,19 +13,19 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER -#ifndef KOKKOSBLAS_TRTRI_HPP_ -#define KOKKOSBLAS_TRTRI_HPP_ +#ifndef KOKKOSLAPACK_TRTRI_HPP_ +#define KOKKOSLAPACK_TRTRI_HPP_ -/// \file KokkosBlas_trtri.hpp +/// \file KokkosLapack_trtri.hpp #include "KokkosKernels_Macros.hpp" -#include "KokkosBlas_trtri_spec.hpp" +#include "KokkosLapack_trtri_spec.hpp" #include "KokkosKernels_helpers.hpp" #include #include #include "KokkosKernels_Error.hpp" -namespace KokkosBlas { +namespace KokkosLapack { /// \brief Find the inverse of the triangular matrix, A /// @@ -62,14 +62,14 @@ int trtri(const char uplo[], const char diag[], const AViewType& A) { if (!valid_uplo) { std::ostringstream os; - os << "KokkosBlas::trtri: uplo = '" << uplo[0] << "'. " + os << "KokkosLapack::trtri: uplo = '" << uplo[0] << "'. " << "Valid values include 'U' or 'u' (A is upper triangular), " "'L' or 'l' (A is lower triangular)."; KokkosKernels::Impl::throw_runtime_exception(os.str()); } if (!valid_diag) { std::ostringstream os; - os << "KokkosBlas::trtri: diag = '" << diag[0] << "'. " + os << "KokkosLapack::trtri: diag = '" << diag[0] << "'. " << "Valid values include 'U' or 'u' (the diagonal of A is assumed to be " "unit), " "'N' or 'n' (the diagonal of A is assumed to be non-unit)."; @@ -88,7 +88,7 @@ int trtri(const char uplo[], const char diag[], const AViewType& A) { // or B*A if (A_m != A_n) { std::ostringstream os; - os << "KokkosBlas::trtri: Dimensions of A do not match," + os << "KokkosLapack::trtri: Dimensions of A do not match," << " A: " << A.extent(0) << " x " << A.extent(1); KokkosKernels::Impl::throw_runtime_exception(os.str()); } @@ -108,12 +108,12 @@ int trtri(const char uplo[], const char diag[], const AViewType& A) { int result; RViewInternalType R = RViewInternalType(&result); - KokkosBlas::Impl::TRTRI::trtri(R, uplo, + KokkosLapack::Impl::TRTRI::trtri(R, uplo, diag, A); return result; } -} // namespace KokkosBlas +} // namespace KokkosLapack -#endif // KOKKOS_BLASLAPACK_TRTRI_HPP_ +#endif // KOKKOSLAPACK_TRTRI_HPP_ diff --git a/lapack/tpls/KokkosBlas_gesv_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp similarity index 70% rename from lapack/tpls/KokkosBlas_gesv_tpl_spec_avail.hpp rename to lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp index f909b4a295..74a65d4cf9 100644 --- a/lapack/tpls/KokkosBlas_gesv_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp @@ -14,10 +14,10 @@ // //@HEADER -#ifndef KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_HPP_ -#define KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_HPP_ +#ifndef KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_HPP_ -namespace KokkosBlas { +namespace KokkosLapack { namespace Impl { // Specialization struct which defines whether a specialization exists template @@ -25,10 +25,10 @@ struct gesv_tpl_spec_avail { enum : bool { value = false }; }; -// Generic Host side BLAS (could be MKL or whatever) -#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS +// Generic Host side LAPACK (could be MKL or whatever) +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK -#define KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, MEMSPACE) \ +#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUT, MEMSPACE) \ template \ struct gesv_tpl_spec_avail< \ Kokkos::View, \ @@ -38,30 +38,30 @@ struct gesv_tpl_spec_avail { enum : bool { value = true }; \ }; -KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) /* #if defined (KOKKOSKERNELS_INST_DOUBLE) \ && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) - KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_BLAS( double, Kokkos::LayoutRight, + KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK( double, Kokkos::LayoutRight, Kokkos::HostSpace) #endif #if defined (KOKKOSKERNELS_INST_FLOAT) \ && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) - KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_BLAS( float, Kokkos::LayoutRight, + KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK( float, Kokkos::LayoutRight, Kokkos::HostSpace) #endif #if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) \ && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) - KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_BLAS( Kokkos::complex, + KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK( Kokkos::complex, Kokkos::LayoutRight, Kokkos::HostSpace) #endif #if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) \ && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) - KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_BLAS( Kokkos::complex, + KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK( Kokkos::complex, Kokkos::LayoutRight, Kokkos::HostSpace) #endif */ #endif @@ -69,7 +69,7 @@ Kokkos::LayoutRight, Kokkos::HostSpace) #endif // MAGMA #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA -#define KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_MAGMA(SCALAR, LAYOUT, MEMSPACE) \ +#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(SCALAR, LAYOUT, MEMSPACE) \ template \ struct gesv_tpl_spec_avail< \ Kokkos::View, \ @@ -79,36 +79,36 @@ Kokkos::LayoutRight, Kokkos::HostSpace) #endif enum : bool { value = true }; \ }; -KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) /* #if defined (KOKKOSKERNELS_INST_DOUBLE) \ && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) - KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_MAGMA( double, Kokkos::LayoutRight, + KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA( double, Kokkos::LayoutRight, Kokkos::CudaSpace) #endif #if defined (KOKKOSKERNELS_INST_FLOAT) \ && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) - KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_MAGMA( float, Kokkos::LayoutRight, + KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA( float, Kokkos::LayoutRight, Kokkos::CudaSpace) #endif #if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) \ && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) - KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_MAGMA( + KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA( Kokkos::complex,Kokkos::LayoutRight, Kokkos::CudaSpace) #endif #if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) \ && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) - KOKKOSBLAS_GESV_TPL_SPEC_AVAIL_MAGMA( Kokkos::complex, + KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA( Kokkos::complex, Kokkos::LayoutRight, Kokkos::CudaSpace) #endif */ #endif } // namespace Impl -} // namespace KokkosBlas +} // namespace KokkosLapack #endif diff --git a/lapack/tpls/KokkosBlas_gesv_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp similarity index 89% rename from lapack/tpls/KokkosBlas_gesv_tpl_spec_decl.hpp rename to lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp index 7d8f0a8a2b..dcab48f07b 100644 --- a/lapack/tpls/KokkosBlas_gesv_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp @@ -14,21 +14,21 @@ // //@HEADER -#ifndef KOKKOSBLAS_GESV_TPL_SPEC_DECL_HPP_ -#define KOKKOSBLAS_GESV_TPL_SPEC_DECL_HPP_ +#ifndef KOKKOSLAPACK_GESV_TPL_SPEC_DECL_HPP_ +#define KOKKOSLAPACK_GESV_TPL_SPEC_DECL_HPP_ -namespace KokkosBlas { +namespace KokkosLapack { namespace Impl { template inline void gesv_print_specialization() { #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA - printf("KokkosBlas::gesv<> TPL MAGMA specialization for < %s , %s, %s >\n", + printf("KokkosLapack::gesv<> TPL MAGMA specialization for < %s , %s, %s >\n", typeid(AViewType).name(), typeid(BViewType).name(), typeid(PViewType).name()); #else -#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS - printf("KokkosBlas::gesv<> TPL Blas specialization for < %s , %s, %s >\n", +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK + printf("KokkosLapack::gesv<> TPL Lapack specialization for < %s , %s, %s >\n", typeid(AViewType).name(), typeid(BViewType).name(), typeid(PViewType).name()); #endif @@ -36,16 +36,16 @@ inline void gesv_print_specialization() { #endif } } // namespace Impl -} // namespace KokkosBlas +} // namespace KokkosLapack -// Generic Host side BLAS (could be MKL or whatever) -#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -#include +// Generic Host side LAPACK (could be MKL or whatever) +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK +#include -namespace KokkosBlas { +namespace KokkosLapack { namespace Impl { -#define KOKKOSBLAS_DGESV_BLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_DGESV_LAPACK(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV< \ Kokkos::View, \ @@ -74,7 +74,7 @@ namespace Impl { \ static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::gesv[TPL_BLAS,double]"); \ + Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_LAPACK,double]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -89,14 +89,14 @@ namespace Impl { int info = 0; \ \ if (with_pivot) { \ - HostBlas::gesv(N, NRHS, A.data(), LDA, IPIV.data(), B.data(), \ + HostLapack::gesv(N, NRHS, A.data(), LDA, IPIV.data(), B.data(), \ LDB, info); \ } \ Kokkos::Profiling::popRegion(); \ } \ }; -#define KOKKOSBLAS_SGESV_BLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_SGESV_LAPACK(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV< \ Kokkos::View, \ @@ -125,7 +125,7 @@ namespace Impl { \ static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::gesv[TPL_BLAS,float]"); \ + Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_LAPACK,float]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -140,14 +140,14 @@ namespace Impl { int info = 0; \ \ if (with_pivot) { \ - HostBlas::gesv(N, NRHS, A.data(), LDA, IPIV.data(), B.data(), \ + HostLapack::gesv(N, NRHS, A.data(), LDA, IPIV.data(), B.data(), \ LDB, info); \ } \ Kokkos::Profiling::popRegion(); \ } \ }; -#define KOKKOSBLAS_ZGESV_BLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_ZGESV_LAPACK(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV**, LAYOUT, \ Kokkos::Device, \ @@ -178,7 +178,7 @@ namespace Impl { static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ Kokkos::Profiling::pushRegion( \ - "KokkosBlas::gesv[TPL_BLAS,complex]"); \ + "KokkosLapack::gesv[TPL_LAPACK,complex]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -193,7 +193,7 @@ namespace Impl { int info = 0; \ \ if (with_pivot) { \ - HostBlas >::gesv( \ + HostLapack >::gesv( \ N, NRHS, reinterpret_cast*>(A.data()), LDA, \ IPIV.data(), reinterpret_cast*>(B.data()), \ LDB, info); \ @@ -202,7 +202,7 @@ namespace Impl { } \ }; -#define KOKKOSBLAS_CGESV_BLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_CGESV_LAPACK(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV**, LAYOUT, \ Kokkos::Device, \ @@ -233,7 +233,7 @@ namespace Impl { static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ Kokkos::Profiling::pushRegion( \ - "KokkosBlas::gesv[TPL_BLAS,complex]"); \ + "KokkosLapack::gesv[TPL_LAPACK,complex]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -248,7 +248,7 @@ namespace Impl { int info = 0; \ \ if (with_pivot) { \ - HostBlas >::gesv( \ + HostLapack >::gesv( \ N, NRHS, reinterpret_cast*>(A.data()), LDA, \ IPIV.data(), reinterpret_cast*>(B.data()), \ LDB, info); \ @@ -257,30 +257,30 @@ namespace Impl { } \ }; -KOKKOSBLAS_DGESV_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) -KOKKOSBLAS_DGESV_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSLAPACK_DGESV_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSLAPACK_DGESV_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, false) -KOKKOSBLAS_SGESV_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) -KOKKOSBLAS_SGESV_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSLAPACK_SGESV_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSLAPACK_SGESV_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, false) -KOKKOSBLAS_ZGESV_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) -KOKKOSBLAS_ZGESV_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSLAPACK_ZGESV_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSLAPACK_ZGESV_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, false) -KOKKOSBLAS_CGESV_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, true) -KOKKOSBLAS_CGESV_BLAS(Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSLAPACK_CGESV_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSLAPACK_CGESV_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, false) } // namespace Impl -} // namespace KokkosBlas -#endif // KOKKOSKERNELS_ENABLE_TPL_BLAS +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK // MAGMA #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA -#include +#include -namespace KokkosBlas { +namespace KokkosLapack { namespace Impl { -#define KOKKOSBLAS_DGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_DGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV< \ Kokkos::View, \ @@ -309,7 +309,7 @@ namespace Impl { \ static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::gesv[TPL_MAGMA,double]"); \ + Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_MAGMA,double]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -321,8 +321,8 @@ namespace Impl { magma_int_t LDB = (BST == 0) ? 1 : BST; \ magma_int_t NRHS = static_cast(B.extent(1)); \ \ - KokkosBlas::Impl::MagmaSingleton& s = \ - KokkosBlas::Impl::MagmaSingleton::singleton(); \ + KokkosLapack::Impl::MagmaSingleton& s = \ + KokkosLapack::Impl::MagmaSingleton::singleton(); \ magma_int_t info = 0; \ \ if (with_pivot) { \ @@ -339,7 +339,7 @@ namespace Impl { } \ }; -#define KOKKOSBLAS_SGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_SGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV< \ Kokkos::View, \ @@ -368,7 +368,7 @@ namespace Impl { \ static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::gesv[TPL_MAGMA,float]"); \ + Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_MAGMA,float]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -380,8 +380,8 @@ namespace Impl { magma_int_t LDB = (BST == 0) ? 1 : BST; \ magma_int_t NRHS = static_cast(B.extent(1)); \ \ - KokkosBlas::Impl::MagmaSingleton& s = \ - KokkosBlas::Impl::MagmaSingleton::singleton(); \ + KokkosLapack::Impl::MagmaSingleton& s = \ + KokkosLapack::Impl::MagmaSingleton::singleton(); \ magma_int_t info = 0; \ \ if (with_pivot) { \ @@ -398,7 +398,7 @@ namespace Impl { } \ }; -#define KOKKOSBLAS_ZGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_ZGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV**, LAYOUT, \ Kokkos::Device, \ @@ -429,7 +429,7 @@ namespace Impl { static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ Kokkos::Profiling::pushRegion( \ - "KokkosBlas::gesv[TPL_MAGMA,complex]"); \ + "KokkosLapack::gesv[TPL_MAGMA,complex]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -441,8 +441,8 @@ namespace Impl { magma_int_t LDB = (BST == 0) ? 1 : BST; \ magma_int_t NRHS = static_cast(B.extent(1)); \ \ - KokkosBlas::Impl::MagmaSingleton& s = \ - KokkosBlas::Impl::MagmaSingleton::singleton(); \ + KokkosLapack::Impl::MagmaSingleton& s = \ + KokkosLapack::Impl::MagmaSingleton::singleton(); \ magma_int_t info = 0; \ \ if (with_pivot) { \ @@ -459,7 +459,7 @@ namespace Impl { } \ }; -#define KOKKOSBLAS_CGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_CGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV**, LAYOUT, \ Kokkos::Device, \ @@ -490,7 +490,7 @@ namespace Impl { static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ Kokkos::Profiling::pushRegion( \ - "KokkosBlas::gesv[TPL_MAGMA,complex]"); \ + "KokkosLapack::gesv[TPL_MAGMA,complex]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -502,8 +502,8 @@ namespace Impl { magma_int_t LDB = (BST == 0) ? 1 : BST; \ magma_int_t NRHS = static_cast(B.extent(1)); \ \ - KokkosBlas::Impl::MagmaSingleton& s = \ - KokkosBlas::Impl::MagmaSingleton::singleton(); \ + KokkosLapack::Impl::MagmaSingleton& s = \ + KokkosLapack::Impl::MagmaSingleton::singleton(); \ magma_int_t info = 0; \ \ if (with_pivot) { \ @@ -520,20 +520,20 @@ namespace Impl { } \ }; -KOKKOSBLAS_DGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS_DGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSLAPACK_DGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSLAPACK_DGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) -KOKKOSBLAS_SGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS_SGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSLAPACK_SGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSLAPACK_SGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) -KOKKOSBLAS_ZGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS_ZGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSLAPACK_ZGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSLAPACK_ZGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) -KOKKOSBLAS_CGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS_CGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSLAPACK_CGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSLAPACK_CGESV_MAGMA(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) } // namespace Impl -} // namespace KokkosBlas +} // namespace KokkosLapack #endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA #endif diff --git a/lapack/tpls/KokkosBlas_trtri_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp similarity index 56% rename from lapack/tpls/KokkosBlas_trtri_tpl_spec_avail.hpp rename to lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp index de9fc08c99..d723cef260 100644 --- a/lapack/tpls/KokkosBlas_trtri_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp @@ -14,10 +14,10 @@ // //@HEADER -#ifndef KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_HPP_ -#define KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_HPP_ +#ifndef KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_HPP_ -namespace KokkosBlas { +namespace KokkosLapack { namespace Impl { // Specialization struct which defines whether a specialization exists @@ -27,7 +27,7 @@ struct trtri_tpl_spec_avail { }; // Generic Host side LAPACK (could be MKL or whatever) -#define KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL(SCALAR, LAYOUTA, MEMSPACE) \ +#define KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL(SCALAR, LAYOUTA, MEMSPACE) \ template \ struct trtri_tpl_spec_avail< \ Kokkos::View, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutRight, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutRight, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutRight, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutRight, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutRight, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutRight, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HostSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutRight, Kokkos::CudaSpace) -KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) } // namespace Impl -} // namespace KokkosBlas +} // namespace KokkosLapack -#endif // KOKKOSBLAS_TRTRI_TPL_SPEC_AVAIL_HPP_ +#endif // KOKKOSLAPACKy_TRTRI_TPL_SPEC_AVAIL_HPP_ diff --git a/lapack/tpls/KokkosBlas_trtri_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp similarity index 73% rename from lapack/tpls/KokkosBlas_trtri_tpl_spec_decl.hpp rename to lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp index 46ec894547..9f79ad2eb5 100644 --- a/lapack/tpls/KokkosBlas_trtri_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp @@ -14,17 +14,17 @@ // //@HEADER -#ifndef KOKKOSBLAS_TRTRI_TPL_SPEC_DECL_HPP_ -#define KOKKOSBLAS_TRTRI_TPL_SPEC_DECL_HPP_ +#ifndef KOKKOSLAPACK_TRTRI_TPL_SPEC_DECL_HPP_ +#define KOKKOSLAPACK_TRTRI_TPL_SPEC_DECL_HPP_ -#include "KokkosBlas_Host_tpl.hpp" // trtri prototype -#include "KokkosBlas_tpl_spec.hpp" +#include "KokkosLapack_Host_tpl.hpp" // trtri prototype +#include "KokkosLapack_tpl_spec.hpp" -namespace KokkosBlas { +namespace KokkosLapack { namespace Impl { -#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -#define KOKKOSBLAS_TRTRI_BLAS_HOST(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, \ +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK +#define KOKKOSLAPACK_TRTRI_LAPACK_HOST(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, \ MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct TRTRI(A.extent(0)); \ \ @@ -61,19 +61,19 @@ namespace Impl { else \ uplo_ = A_is_layout_left ? 'U' : 'L'; \ \ - R() = HostBlas::trtri( \ + R() = HostLapack::trtri( \ uplo_, diag[0], M, \ reinterpret_cast(A.data()), LDA); \ Kokkos::Profiling::popRegion(); \ } \ }; #else -#define KOKKOSBLAS_TRTRI_BLAS_HOST(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, \ +#define KOKKOSLAPACK_TRTRI_LAPACK_HOST(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, \ MEM_SPACE, ETI_SPEC_AVAIL) -#endif // KOKKOSKERNELS_ENABLE_TPL_BLAS +#endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA -#define KOKKOSBLAS_TRTRI_BLAS_MAGMA(SCALAR_TYPE, BASE_SCALAR_TYPE, MAGMA_FN, \ +#define KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(SCALAR_TYPE, BASE_SCALAR_TYPE, MAGMA_FN, \ LAYOUTA, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct TRTRI(A.extent(0)); \ \ @@ -116,8 +116,8 @@ namespace Impl { else \ diag_ = MagmaNonUnit; \ \ - KokkosBlas::Impl::MagmaSingleton& s = \ - KokkosBlas::Impl::MagmaSingleton::singleton(); \ + KokkosLapack::Impl::MagmaSingleton& s = \ + KokkosLapack::Impl::MagmaSingleton::singleton(); \ R() = MAGMA_FN(uplo_, diag_, M, \ reinterpret_cast( \ const_cast(A.data())), \ @@ -126,71 +126,71 @@ namespace Impl { } \ }; #else -#define KOKKOSBLAS_TRTRI_BLAS_MAGMA(SCALAR_TYPE, BASE_SCALAR_TYPE, MAGMA_FN, \ +#define KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(SCALAR_TYPE, BASE_SCALAR_TYPE, MAGMA_FN, \ LAYOUTA, MEM_SPACE, ETI_SPEC_AVAIL) #endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA // Explicitly define the TRTRI class for all permutations listed below // Handle type and space permutations -#define KOKKOSBLAS_DTRTRI_BLAS(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSBLAS_TRTRI_BLAS_HOST(double, double, LAYOUTA, Kokkos::HostSpace, \ +#define KOKKOSLAPACK_DTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(double, double, LAYOUTA, Kokkos::HostSpace, \ ETI_SPEC_AVAIL) \ - KOKKOSBLAS_TRTRI_BLAS_MAGMA(double, magmaDouble_ptr, magma_dtrtri_gpu, \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(double, magmaDouble_ptr, magma_dtrtri_gpu, \ LAYOUTA, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ - KOKKOSBLAS_TRTRI_BLAS_MAGMA(double, magmaDouble_ptr, magma_dtrtri_gpu, \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(double, magmaDouble_ptr, magma_dtrtri_gpu, \ LAYOUTA, Kokkos::CudaUVMSpace, ETI_SPEC_AVAIL) -#define KOKKOSBLAS_STRTRI_BLAS(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSBLAS_TRTRI_BLAS_HOST(float, float, LAYOUTA, Kokkos::HostSpace, \ +#define KOKKOSLAPACK_STRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(float, float, LAYOUTA, Kokkos::HostSpace, \ ETI_SPEC_AVAIL) \ - KOKKOSBLAS_TRTRI_BLAS_MAGMA(float, magmaFloat_ptr, magma_strtri_gpu, \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(float, magmaFloat_ptr, magma_strtri_gpu, \ LAYOUTA, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ - KOKKOSBLAS_TRTRI_BLAS_MAGMA(float, magmaFloat_ptr, magma_strtri_gpu, \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(float, magmaFloat_ptr, magma_strtri_gpu, \ LAYOUTA, Kokkos::CudaUVMSpace, ETI_SPEC_AVAIL) -#define KOKKOSBLAS_ZTRTRI_BLAS(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSBLAS_TRTRI_BLAS_HOST(Kokkos::complex, std::complex, \ +#define KOKKOSLAPACK_ZTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, std::complex, \ LAYOUTA, Kokkos::HostSpace, ETI_SPEC_AVAIL) \ - KOKKOSBLAS_TRTRI_BLAS_MAGMA(Kokkos::complex, magmaDoubleComplex_ptr, \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, magmaDoubleComplex_ptr, \ magma_ztrtri_gpu, LAYOUTA, Kokkos::CudaSpace, \ ETI_SPEC_AVAIL) \ - KOKKOSBLAS_TRTRI_BLAS_MAGMA(Kokkos::complex, magmaDoubleComplex_ptr, \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, magmaDoubleComplex_ptr, \ magma_ztrtri_gpu, LAYOUTA, Kokkos::CudaUVMSpace, \ ETI_SPEC_AVAIL) -#define KOKKOSBLAS_CTRTRI_BLAS(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSBLAS_TRTRI_BLAS_HOST(Kokkos::complex, std::complex, \ +#define KOKKOSLAPACK_CTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, std::complex, \ LAYOUTA, Kokkos::HostSpace, ETI_SPEC_AVAIL) \ - KOKKOSBLAS_TRTRI_BLAS_MAGMA(Kokkos::complex, magmaFloatComplex_ptr, \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, magmaFloatComplex_ptr, \ magma_ctrtri_gpu, LAYOUTA, Kokkos::CudaSpace, \ ETI_SPEC_AVAIL) \ - KOKKOSBLAS_TRTRI_BLAS_MAGMA(Kokkos::complex, magmaFloatComplex_ptr, \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, magmaFloatComplex_ptr, \ magma_ctrtri_gpu, LAYOUTA, Kokkos::CudaUVMSpace, \ ETI_SPEC_AVAIL) // Handle layout permutations -KOKKOSBLAS_DTRTRI_BLAS(Kokkos::LayoutLeft, true) -KOKKOSBLAS_DTRTRI_BLAS(Kokkos::LayoutLeft, false) -KOKKOSBLAS_DTRTRI_BLAS(Kokkos::LayoutRight, true) -KOKKOSBLAS_DTRTRI_BLAS(Kokkos::LayoutRight, false) - -KOKKOSBLAS_STRTRI_BLAS(Kokkos::LayoutLeft, true) -KOKKOSBLAS_STRTRI_BLAS(Kokkos::LayoutLeft, false) -KOKKOSBLAS_STRTRI_BLAS(Kokkos::LayoutRight, true) -KOKKOSBLAS_STRTRI_BLAS(Kokkos::LayoutRight, false) - -KOKKOSBLAS_ZTRTRI_BLAS(Kokkos::LayoutLeft, true) -KOKKOSBLAS_ZTRTRI_BLAS(Kokkos::LayoutLeft, false) -KOKKOSBLAS_ZTRTRI_BLAS(Kokkos::LayoutRight, true) -KOKKOSBLAS_ZTRTRI_BLAS(Kokkos::LayoutRight, false) - -KOKKOSBLAS_CTRTRI_BLAS(Kokkos::LayoutLeft, true) -KOKKOSBLAS_CTRTRI_BLAS(Kokkos::LayoutLeft, false) -KOKKOSBLAS_CTRTRI_BLAS(Kokkos::LayoutRight, true) -KOKKOSBLAS_CTRTRI_BLAS(Kokkos::LayoutRight, false) +KOKKOSLAPACK_DTRTRI_LAPACK(Kokkos::LayoutLeft, true) +KOKKOSLAPACK_DTRTRI_LAPACK(Kokkos::LayoutLeft, false) +KOKKOSLAPACK_DTRTRI_LAPACK(Kokkos::LayoutRight, true) +KOKKOSLAPACK_DTRTRI_LAPACK(Kokkos::LayoutRight, false) + +KOKKOSLAPACK_STRTRI_LAPACK(Kokkos::LayoutLeft, true) +KOKKOSLAPACK_STRTRI_LAPACK(Kokkos::LayoutLeft, false) +KOKKOSLAPACK_STRTRI_LAPACK(Kokkos::LayoutRight, true) +KOKKOSLAPACK_STRTRI_LAPACK(Kokkos::LayoutRight, false) + +KOKKOSLAPACK_ZTRTRI_LAPACK(Kokkos::LayoutLeft, true) +KOKKOSLAPACK_ZTRTRI_LAPACK(Kokkos::LayoutLeft, false) +KOKKOSLAPACK_ZTRTRI_LAPACK(Kokkos::LayoutRight, true) +KOKKOSLAPACK_ZTRTRI_LAPACK(Kokkos::LayoutRight, false) + +KOKKOSLAPACK_CTRTRI_LAPACK(Kokkos::LayoutLeft, true) +KOKKOSLAPACK_CTRTRI_LAPACK(Kokkos::LayoutLeft, false) +KOKKOSLAPACK_CTRTRI_LAPACK(Kokkos::LayoutRight, true) +KOKKOSLAPACK_CTRTRI_LAPACK(Kokkos::LayoutRight, false) } // namespace Impl -} // nameSpace KokkosBlas +} // nameSpace KokkosLapack -#endif // KOKKOSBLAS_TRTRI_TPL_SPEC_DECL_HPP_ +#endif // KOKKOSLAPACK_TRTRI_TPL_SPEC_DECL_HPP_ diff --git a/lapack/unit_test/Test_Blas_gesv.hpp b/lapack/unit_test/Test_Lapack_gesv.hpp similarity index 83% rename from lapack/unit_test/Test_Blas_gesv.hpp rename to lapack/unit_test/Test_Lapack_gesv.hpp index 57ee6373bf..25d5089a58 100644 --- a/lapack/unit_test/Test_Blas_gesv.hpp +++ b/lapack/unit_test/Test_Lapack_gesv.hpp @@ -14,19 +14,19 @@ // //@HEADER -// only enable this test where KokkosBlas supports gesv: -// CUDA+MAGMA and HOST+BLAS -#if (defined(TEST_CUDA_BLAS_CPP) && \ +// only enable this test where KokkosLapack supports gesv: +// CUDA+MAGMA and HOST+LAPACK +#if (defined(TEST_CUDA_LAPACK_CPP) && \ defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA)) || \ - (defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) && \ - (defined(TEST_OPENMP_BLAS_CPP) || defined(TEST_OPENMPTARGET_BLAS_CPP) || \ - defined(TEST_SERIAL_BLAS_CPP) || defined(TEST_THREADS_BLAS_CPP))) + (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ + (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_OPENMPTARGET_LAPACK_CPP) || \ + defined(TEST_SERIAL_LAPACK_CPP) || defined(TEST_THREADS_LAPACK_CPP))) #include #include #include -#include +#include #include #include #include @@ -89,7 +89,7 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { // Solve. try { - KokkosBlas::gesv(A, B, ipiv); + KokkosLapack::gesv(A, B, ipiv); } catch (const std::runtime_error& error) { // Check for expected runtime errors due to: // no-pivoting case (note: only MAGMA supports no-pivoting interface) @@ -97,7 +97,7 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { bool nopivot_runtime_err = false; bool notpl_runtime_err = false; #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS // and have BLAS TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL nopivot_runtime_err = (!std::is_same::value) && (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); @@ -106,7 +106,7 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { notpl_runtime_err = true; #endif #else // not have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS // but have BLAS TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // but have LAPACK TPL nopivot_runtime_err = (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); notpl_runtime_err = false; #else @@ -194,7 +194,7 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, // Solve. try { - KokkosBlas::gesv(A, B, ipiv); + KokkosLapack::gesv(A, B, ipiv); } catch (const std::runtime_error& error) { // Check for expected runtime errors due to: // no-pivoting case (note: only MAGMA supports no-pivoting interface) @@ -202,7 +202,7 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, bool nopivot_runtime_err = false; bool notpl_runtime_err = false; #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS // and have BLAS TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL nopivot_runtime_err = (!std::is_same::value) && (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); @@ -211,7 +211,7 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, notpl_runtime_err = true; #endif #else // not have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS // but have BLAS TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // but have LAPACK TPL nopivot_runtime_err = (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); notpl_runtime_err = false; #else @@ -342,16 +342,16 @@ int test_gesv_mrhs(const char* mode) { (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gesv_float) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::gesv_float"); - test_gesv("N"); // No pivoting - test_gesv("Y"); // Partial pivoting + Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_float"); + test_gesv("N"); // No pivoting + test_gesv("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, gesv_mrhs_float) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::gesv_mrhs_float"); - test_gesv_mrhs("N"); // No pivoting - test_gesv_mrhs("Y"); // Partial pivoting + Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_mrhs_float"); + test_gesv_mrhs("N"); // No pivoting + test_gesv_mrhs("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } #endif @@ -360,16 +360,16 @@ TEST_F(TestCategory, gesv_mrhs_float) { (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gesv_double) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::gesv_double"); - test_gesv("N"); // No pivoting - test_gesv("Y"); // Partial pivoting + Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_double"); + test_gesv("N"); // No pivoting + test_gesv("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, gesv_mrhs_double) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::gesv_mrhs_double"); - test_gesv_mrhs("N"); // No pivoting - test_gesv_mrhs("Y"); // Partial pivoting + Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_mrhs_double"); + test_gesv_mrhs("N"); // No pivoting + test_gesv_mrhs("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } #endif @@ -378,16 +378,17 @@ TEST_F(TestCategory, gesv_mrhs_double) { (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gesv_complex_double) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::gesv_complex_double"); - test_gesv, TestDevice>("N"); // No pivoting - test_gesv, TestDevice>("Y"); // Partial pivoting + Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_complex_double"); + test_gesv, TestExecSpace>("N"); // No pivoting + test_gesv, TestExecSpace>("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, gesv_mrhs_complex_double) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::gesv_mrhs_complex_double"); - test_gesv_mrhs, TestDevice>("N"); // No pivoting - test_gesv_mrhs, TestDevice>("Y"); // Partial pivoting + Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_mrhs_complex_double"); + test_gesv_mrhs, TestExecSpace>("N"); // No pivoting + test_gesv_mrhs, TestExecSpace>( + "Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } #endif @@ -396,18 +397,19 @@ TEST_F(TestCategory, gesv_mrhs_complex_double) { (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gesv_complex_float) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::gesv_complex_float"); - test_gesv, TestDevice>("N"); // No pivoting - test_gesv, TestDevice>("Y"); // Partial pivoting + Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_complex_float"); + test_gesv, TestExecSpace>("N"); // No pivoting + test_gesv, TestExecSpace>("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, gesv_mrhs_complex_float) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::gesv_mrhs_complex_float"); - test_gesv_mrhs, TestDevice>("N"); // No pivoting - test_gesv_mrhs, TestDevice>("Y"); // Partial pivoting + Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_mrhs_complex_float"); + test_gesv_mrhs, TestExecSpace>("N"); // No pivoting + test_gesv_mrhs, TestExecSpace>( + "Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } #endif -#endif // CUDA+MAGMA or BLAS+HOST +#endif // CUDA+MAGMA or LAPACK+HOST diff --git a/lapack/unit_test/Test_Blas_trtri.hpp b/lapack/unit_test/Test_Lapack_trtri.hpp similarity index 88% rename from lapack/unit_test/Test_Blas_trtri.hpp rename to lapack/unit_test/Test_Lapack_trtri.hpp index aa12fa959b..498b1248f3 100644 --- a/lapack/unit_test/Test_Blas_trtri.hpp +++ b/lapack/unit_test/Test_Lapack_trtri.hpp @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include @@ -118,7 +118,7 @@ int impl_test_trtri(int bad_diag_idx, const char* uplo, const char* diag, // const int As0 = A.stride(0), As1 = A.stride(1); // const int Ae0 = A.extent(0), Ae1 = A.extent(1); - // printf("KokkosBlas::trtri test for %c %c, M %d, N %d, eps %g, ViewType: %s, + // printf("KokkosLapack::trtri test for %c %c, M %d, N %d, eps %g, ViewType: %s, // A.stride(0): %d, A.stride(1): %d, A.extent(0): %d, A.extent(1): %d // START\n", uplo[0],diag[0],M,N,eps,typeid(ViewTypeA).name(), As0, As1, Ae0, // Ae1); fflush(stdout); @@ -141,7 +141,7 @@ int impl_test_trtri(int bad_diag_idx, const char* uplo, const char* diag, host_A(bad_diag_idx - 1, bad_diag_idx - 1) = ScalarA(0); Kokkos::deep_copy(A, host_A); } - return KokkosBlas::trtri(uplo, diag, A); + return KokkosLapack::trtri(uplo, diag, A); } // If M is greater than 100 and A is an unit triangluar matrix, make A the @@ -158,13 +158,13 @@ int impl_test_trtri(int bad_diag_idx, const char* uplo, const char* diag, using functor_type = UnitDiagTRTRI; functor_type udtrtri(A); // Initialize As diag with 1s - Kokkos::parallel_for("KokkosBlas::Test::UnitDiagTRTRI", + Kokkos::parallel_for("KokkosLapack::Test::UnitDiagTRTRI", Kokkos::RangePolicy(0, M), udtrtri); } else { //(diag[0]=='N')||(diag[0]=='n') using functor_type = NonUnitDiagTRTRI; functor_type nudtrtri(A); // Initialize As diag with A(i,i)+10 - Kokkos::parallel_for("KokkosBlas::Test::NonUnitDiagTRTRI", + Kokkos::parallel_for("KokkosLapack::Test::NonUnitDiagTRTRI", Kokkos::RangePolicy(0, M), nudtrtri); } Kokkos::fence(); @@ -195,11 +195,11 @@ int impl_test_trtri(int bad_diag_idx, const char* uplo, const char* diag, #endif // A = A^-1 - ret = KokkosBlas::trtri(uplo, diag, A); + ret = KokkosLapack::trtri(uplo, diag, A); Kokkos::fence(); if (ret) { - printf("KokkosBlas::trtri(%c, %c, %s) returned %d\n", uplo[0], diag[0], + printf("KokkosLapack::trtri(%c, %c, %s) returned %d\n", uplo[0], diag[0], typeid(ViewTypeA).name(), ret); return ret; } @@ -229,7 +229,7 @@ int impl_test_trtri(int bad_diag_idx, const char* uplo, const char* diag, vgemm.alpha = ScalarA(1); vgemm.beta = beta; Kokkos::parallel_for( - "KokkosBlas::Test::VanillaGEMM", + "KokkosLapack::Test::VanillaGEMM", Kokkos::TeamPolicy( M, Kokkos::AUTO, KokkosKernels::Impl::kk_get_max_vector_size()), @@ -362,11 +362,11 @@ int test_trtri(const char* mode) { (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trtri_float) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::trtri_float"); - test_trtri("UN"); - test_trtri("UU"); - test_trtri("LN"); - test_trtri("LU"); + Kokkos::Profiling::pushRegion("KokkosLapack::Test::trtri_float"); + test_trtri("UN"); + test_trtri("UU"); + test_trtri("LN"); + test_trtri("LU"); Kokkos::Profiling::popRegion(); } #endif @@ -375,11 +375,11 @@ TEST_F(TestCategory, trtri_float) { (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trtri_double) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::trtri_double"); - test_trtri("UN"); - test_trtri("UU"); - test_trtri("LN"); - test_trtri("LU"); + Kokkos::Profiling::pushRegion("KokkosLapack::Test::trtri_double"); + test_trtri("UN"); + test_trtri("UU"); + test_trtri("LN"); + test_trtri("LU"); Kokkos::Profiling::popRegion(); } #endif @@ -388,11 +388,11 @@ TEST_F(TestCategory, trtri_double) { (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trtri_complex_double) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::trtri_complex_double"); - test_trtri, TestDevice>("UN"); - test_trtri, TestDevice>("UU"); - test_trtri, TestDevice>("LN"); - test_trtri, TestDevice>("LU"); + Kokkos::Profiling::pushRegion("KokkosLapack::Test::trtri_complex_double"); + test_trtri, TestExecSpace>("UN"); + test_trtri, TestExecSpace>("UU"); + test_trtri, TestExecSpace>("LN"); + test_trtri, TestExecSpace>("LU"); Kokkos::Profiling::popRegion(); } #endif @@ -401,11 +401,11 @@ TEST_F(TestCategory, trtri_complex_double) { (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trtri_complex_float) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::trtri_complex_float"); - test_trtri, TestDevice>("UN"); - test_trtri, TestDevice>("UU"); - test_trtri, TestDevice>("LN"); - test_trtri, TestDevice>("LU"); + Kokkos::Profiling::pushRegion("KokkosLapack::Test::trtri_complex_float"); + test_trtri, TestExecSpace>("UN"); + test_trtri, TestExecSpace>("UU"); + test_trtri, TestExecSpace>("LN"); + test_trtri, TestExecSpace>("LU"); Kokkos::Profiling::popRegion(); } #endif From 05afd000f0b76eac0af6143b7d1150329482d484 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Wed, 6 Sep 2023 22:10:12 -0600 Subject: [PATCH 04/22] Backup --- blas/CMakeLists.txt | 14 --- lapack/CMakeLists.txt | 67 ++++++++++++ lapack/tpls/KokkosLapack_Host_tpl.cpp | 151 ++++++++++++++++++++++++++ 3 files changed, 218 insertions(+), 14 deletions(-) create mode 100644 lapack/CMakeLists.txt create mode 100644 lapack/tpls/KokkosLapack_Host_tpl.cpp diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index 816d68e443..5bc7217cfd 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt @@ -101,13 +101,6 @@ KOKKOSKERNELS_GENERATE_ETI(Blas1_dot_mv dot TYPE_LISTS FLOATS LAYOUTS DEVICES ) -KOKKOSKERNELS_GENERATE_ETI(Blas_gesv gesv - COMPONENTS blas - HEADER_LIST ETI_HEADERS - SOURCE_LIST SOURCES - TYPE_LISTS FLOATS LAYOUTS DEVICES -) - KOKKOSKERNELS_GENERATE_ETI(Blas1_axpby axpby COMPONENTS blas HEADER_LIST ETI_HEADERS @@ -331,10 +324,3 @@ KOKKOSKERNELS_GENERATE_ETI(Blas3_trmm trmm SOURCE_LIST SOURCES TYPE_LISTS FLOATS LAYOUTS DEVICES ) - -KOKKOSKERNELS_GENERATE_ETI(Blas_trtri trtri - COMPONENTS blas - HEADER_LIST ETI_HEADERS - SOURCE_LIST SOURCES - TYPE_LISTS FLOATS LAYOUTS DEVICES -) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt new file mode 100644 index 0000000000..0f38d0aa50 --- /dev/null +++ b/lapack/CMakeLists.txt @@ -0,0 +1,67 @@ +LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/lapack/src) +LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/lapack/impl) +LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/lapack/eti) +LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/lapack/eti) +LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/lapack/tpls) + +# Adding unit-tests +KOKKOSKERNELS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/lapack) +KOKKOSKERNELS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}/lapack) + +####################### +# # +# Logic for LAPACK TPLs # +# # +####################### + +#Include LAPACK, Lapack host wrapper +IF (KOKKOSKERNELS_ENABLE_TPL_LAPACK OR KOKKOSKERNELS_ENABLE_TPL_MKL OR KOKKOSKERNELS_ENABLE_TPL_ARMPL) + #Do NOT add this to include path + APPEND_GLOB(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/tpls/KokkosLapack_Host_tpl.cpp) +ENDIF() + +# Include host lapack TPL source file +IF (KOKKOSKERNELS_ENABLE_TPL_LAPACK OR KOKKOSKERNELS_ENABLE_TPL_MKL OR KOKKOSKERNELS_ENABLE_TPL_ARMPL) + LIST(APPEND SOURCES + lapack/tpls/KokkosLapack_Host_tpl.cpp + ) +ENDIF() + +# Include cuda lapack TPL source file +IF (KOKKOSKERNELS_ENABLE_TPL_CULAPACK) + LIST(APPEND SOURCES + lapack/tpls/KokkosLapack_Cuda_tpl.cpp + ) +ENDIF() + +# Include rocm lapack TPL source file +IF (KOKKOSKERNELS_ENABLE_TPL_ROCLAPACK) + LIST(APPEND SOURCES + lapack/tpls/KokkosLapack_Rocm_tpl.cpp + ) +ENDIF() + +################## +# # +# ETI generation # +# # +################## + +#Build up a list of DECL, AVAIL, and INST macros +#that should be instantiated based on input options +#Generate @X@ variables in the template X.hpp.in and X.cpp.in +#files containing the list of all needed macros + +KOKKOSKERNELS_GENERATE_ETI(Lapack_gesv gesv + COMPONENTS lapack + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS LAYOUTS DEVICES +) + +KOKKOSKERNELS_GENERATE_ETI(Lapack_trtri trtri + COMPONENTS lapack + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS LAYOUTS DEVICES +) diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp new file mode 100644 index 0000000000..8e7158528e --- /dev/null +++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp @@ -0,0 +1,151 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +/// \file KokkosLapack_Host_tpl.cpp +/// \brief LAPACK wrapper for host tpls +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "KokkosKernels_config.h" +#include "KokkosLapack_Host_tpl.hpp" + +#if defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) + +/// Fortran headers +extern "C" { + +/// +/// Gesv +/// + +void F77_LAPACK_MANGLE(sgesv, SGESV)(int*, int*, float*, int*, int*, float*, int*, + int*); +void F77_LAPACK_MANGLE(dgesv, DGESV)(int*, int*, double*, int*, int*, double*, + int*, int*); +void F77_LAPACK_MANGLE(cgesv, CGESV)(int*, int*, std::complex*, int*, int*, + std::complex*, int*, int*); +void F77_LAPACK_MANGLE(zgesv, ZGESV)(int*, int*, std::complex*, int*, + int*, std::complex*, int*, int*); + +/// +/// Trtri +/// +/* + HostLapack::trtri(const char uplo, const char diag, + int n, const float *a, int lda) { + int info = 0; + F77_FUNC_STRTRI(&uplo, + &diag, &n, + a, &lda, &info); +*/ +void F77_LAPACK_MANGLE(strtri, STRTRI)(const char*, const char*, int*, + const float*, int*, int*); +void F77_LAPACK_MANGLE(dtrtri, DTRTRI)(const char*, const char*, int*, + const double*, int*, int*); +void F77_LAPACK_MANGLE(ctrtri, CTRTRI)(const char*, const char*, int*, + const std::complex*, int*, int*); +void F77_LAPACK_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, + const std::complex*, int*, int*); +} + +#define F77_FUNC_SGESV F77_LAPACK_MANGLE(sgesv, SGESV) +#define F77_FUNC_DGESV F77_LAPACK_MANGLE(dgesv, DGESV) +#define F77_FUNC_CGESV F77_LAPACK_MANGLE(cgesv, CGESV) +#define F77_FUNC_ZGESV F77_LAPACK_MANGLE(zgesv, ZGESV) + +#define F77_FUNC_STRTRI F77_LAPACK_MANGLE(strtri, STRTRI) +#define F77_FUNC_DTRTRI F77_LAPACK_MANGLE(dtrtri, DTRTRI) +#define F77_FUNC_CTRTRI F77_LAPACK_MANGLE(ctrtri, CTRTRI) +#define F77_FUNC_ZTRTRI F77_LAPACK_MANGLE(ztrtri, ZTRTRI) + +namespace KokkosLapack { +namespace Impl { + +/// +/// float +/// + +template <> +void HostLapack::gesv(int n, int rhs, float* a, int lda, int* ipiv, + float* b, int ldb, int info) { + F77_FUNC_SGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); +} +template <> +int HostLapack::trtri(const char uplo, const char diag, int n, + const float* a, int lda) { + int info = 0; + F77_FUNC_STRTRI(&uplo, &diag, &n, a, &lda, &info); + return info; +} + +/// +/// double +/// + +template <> +void HostLapack::gesv(int n, int rhs, double* a, int lda, int* ipiv, + double* b, int ldb, int info) { + F77_FUNC_DGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); +} +template <> +int HostLapack::trtri(const char uplo, const char diag, int n, + const double* a, int lda) { + int info = 0; + F77_FUNC_DTRTRI(&uplo, &diag, &n, a, &lda, &info); + return info; +} + +/// +/// std::complex +/// + +template <> +void HostLapack >::gesv(int n, int rhs, + std::complex* a, int lda, + int* ipiv, std::complex* b, + int ldb, int info) { + F77_FUNC_CGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); +} +template <> +int HostLapack >::trtri(const char uplo, const char diag, + int n, const std::complex* a, + int lda) { + int info = 0; + F77_FUNC_CTRTRI(&uplo, &diag, &n, a, &lda, &info); + return info; +} + +/// +/// std::complex +/// + +template <> +void HostLapack >::gesv(int n, int rhs, + std::complex* a, int lda, + int* ipiv, std::complex* b, + int ldb, int info) { + F77_FUNC_ZGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); +} +template <> +int HostLapack >::trtri(const char uplo, const char diag, + int n, const std::complex* a, + int lda) { + int info = 0; + F77_FUNC_ZTRTRI(&uplo, &diag, &n, a, &lda, &info); + return info; +} + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK From e455f377431168d213b023e703d8111e6cbd765d Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Wed, 6 Sep 2023 22:25:25 -0600 Subject: [PATCH 05/22] Backup --- blas/tpls/KokkosBlas_Host_tpl.cpp | 96 ------------------------------- blas/unit_test/Test_Blas.hpp | 3 - lapack/unit_test/CMakeLists.txt | 94 ++++++++++++++++++++++++++++++ lapack/unit_test/Test_Lapack.hpp | 22 +++++++ 4 files changed, 116 insertions(+), 99 deletions(-) create mode 100644 lapack/unit_test/CMakeLists.txt create mode 100644 lapack/unit_test/Test_Lapack.hpp diff --git a/blas/tpls/KokkosBlas_Host_tpl.cpp b/blas/tpls/KokkosBlas_Host_tpl.cpp index a7be0d31ab..88c3ef7bbd 100644 --- a/blas/tpls/KokkosBlas_Host_tpl.cpp +++ b/blas/tpls/KokkosBlas_Host_tpl.cpp @@ -412,38 +412,6 @@ void F77_BLAS_MANGLE(ztrsm, ZTRSM)(const char*, const char*, const char*, const std::complex*, int*, /* */ std::complex*, int*); -/// -/// Gesv -/// - -void F77_BLAS_MANGLE(sgesv, SGESV)(int*, int*, float*, int*, int*, float*, int*, - int*); -void F77_BLAS_MANGLE(dgesv, DGESV)(int*, int*, double*, int*, int*, double*, - int*, int*); -void F77_BLAS_MANGLE(cgesv, CGESV)(int*, int*, std::complex*, int*, int*, - std::complex*, int*, int*); -void F77_BLAS_MANGLE(zgesv, ZGESV)(int*, int*, std::complex*, int*, - int*, std::complex*, int*, int*); - -/// -/// Trtri -/// -/* - HostBlas::trtri(const char uplo, const char diag, - int n, const float *a, int lda) { - int info = 0; - F77_FUNC_STRTRI(&uplo, - &diag, &n, - a, &lda, &info); -*/ -void F77_BLAS_MANGLE(strtri, STRTRI)(const char*, const char*, int*, - const float*, int*, int*); -void F77_BLAS_MANGLE(dtrtri, DTRTRI)(const char*, const char*, int*, - const double*, int*, int*); -void F77_BLAS_MANGLE(ctrtri, CTRTRI)(const char*, const char*, int*, - const std::complex*, int*, int*); -void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, - const std::complex*, int*, int*); } void F77_BLAS_MANGLE(sscal, SSCAL)(const int* N, const float* alpha, @@ -559,16 +527,6 @@ void F77_BLAS_MANGLE(zscal, #define F77_FUNC_CTRSM F77_BLAS_MANGLE(ctrsm, CTRSM) #define F77_FUNC_ZTRSM F77_BLAS_MANGLE(ztrsm, ZTRSM) -#define F77_FUNC_SGESV F77_BLAS_MANGLE(sgesv, SGESV) -#define F77_FUNC_DGESV F77_BLAS_MANGLE(dgesv, DGESV) -#define F77_FUNC_CGESV F77_BLAS_MANGLE(cgesv, CGESV) -#define F77_FUNC_ZGESV F77_BLAS_MANGLE(zgesv, ZGESV) - -#define F77_FUNC_STRTRI F77_BLAS_MANGLE(strtri, STRTRI) -#define F77_FUNC_DTRTRI F77_BLAS_MANGLE(dtrtri, DTRTRI) -#define F77_FUNC_CTRTRI F77_BLAS_MANGLE(ctrtri, CTRTRI) -#define F77_FUNC_ZTRTRI F77_BLAS_MANGLE(ztrtri, ZTRTRI) - namespace KokkosBlas { namespace Impl { @@ -688,18 +646,6 @@ void HostBlas::trsm(const char side, const char uplo, const char transa, F77_FUNC_STRSM(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, &ldb); } -template <> -void HostBlas::gesv(int n, int rhs, float* a, int lda, int* ipiv, - float* b, int ldb, int info) { - F77_FUNC_SGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); -} -template <> -int HostBlas::trtri(const char uplo, const char diag, int n, - const float* a, int lda) { - int info = 0; - F77_FUNC_STRTRI(&uplo, &diag, &n, a, &lda, &info); - return info; -} /// /// double @@ -818,18 +764,6 @@ void HostBlas::trsm(const char side, const char uplo, const char transa, F77_FUNC_DTRSM(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, &ldb); } -template <> -void HostBlas::gesv(int n, int rhs, double* a, int lda, int* ipiv, - double* b, int ldb, int info) { - F77_FUNC_DGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); -} -template <> -int HostBlas::trtri(const char uplo, const char diag, int n, - const double* a, int lda) { - int info = 0; - F77_FUNC_DTRTRI(&uplo, &diag, &n, a, &lda, &info); - return info; -} /// /// std::complex @@ -1000,21 +934,6 @@ void HostBlas >::trsm(const char side, const char uplo, (const std::complex*)a, &lda, (std::complex*)b, &ldb); } -template <> -void HostBlas >::gesv(int n, int rhs, - std::complex* a, int lda, - int* ipiv, std::complex* b, - int ldb, int info) { - F77_FUNC_CGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); -} -template <> -int HostBlas >::trtri(const char uplo, const char diag, - int n, const std::complex* a, - int lda) { - int info = 0; - F77_FUNC_CTRTRI(&uplo, &diag, &n, a, &lda, &info); - return info; -} /// /// std::complex @@ -1183,21 +1102,6 @@ void HostBlas >::trsm( (const std::complex*)a, &lda, (std::complex*)b, &ldb); } -template <> -void HostBlas >::gesv(int n, int rhs, - std::complex* a, int lda, - int* ipiv, std::complex* b, - int ldb, int info) { - F77_FUNC_ZGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); -} -template <> -int HostBlas >::trtri(const char uplo, const char diag, - int n, const std::complex* a, - int lda) { - int info = 0; - F77_FUNC_ZTRTRI(&uplo, &diag, &n, a, &lda, &info); - return info; -} } // namespace Impl } // namespace KokkosBlas diff --git a/blas/unit_test/Test_Blas.hpp b/blas/unit_test/Test_Blas.hpp index b370436391..9bb37d8d95 100644 --- a/blas/unit_test/Test_Blas.hpp +++ b/blas/unit_test/Test_Blas.hpp @@ -16,9 +16,6 @@ #ifndef TEST_BLAS_HPP #define TEST_BLAS_HPP -#include "Test_Blas_gesv.hpp" -#include "Test_Blas_trtri.hpp" - // Blas 1 #include "Test_Blas1_abs.hpp" #include "Test_Blas1_asum.hpp" diff --git a/lapack/unit_test/CMakeLists.txt b/lapack/unit_test/CMakeLists.txt new file mode 100644 index 0000000000..b0ccaf8e7e --- /dev/null +++ b/lapack/unit_test/CMakeLists.txt @@ -0,0 +1,94 @@ +KOKKOSKERNELS_INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/test_common) +KOKKOSKERNELS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${PACKAGE_SOURCE_DIR}/test_common) + +KOKKOSKERNELS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOSKERNELS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) + +##################### +# # +# Define unit-tests # +# # +##################### + +##################### +# # +# Add GPU backends # +# # +##################### +IF (KOKKOS_ENABLE_CUDA) + KOKKOSKERNELS_ADD_UNIT_TEST( + blas_cuda + SOURCES + ${PACKAGE_SOURCE_DIR}/test_common/Test_Main.cpp + backends/Test_Cuda_Blas.cpp + COMPONENTS blas + ) +ENDIF () + +IF (KOKKOS_ENABLE_HIP) + KOKKOSKERNELS_ADD_UNIT_TEST( + blas_hip + SOURCES + ${PACKAGE_SOURCE_DIR}/test_common/Test_Main.cpp + backends/Test_HIP_Blas.cpp + COMPONENTS blas + ) +ENDIF () + +IF (KOKKOS_ENABLE_SYCL) + KOKKOSKERNELS_ADD_UNIT_TEST( + blas_sycl + SOURCES + ${PACKAGE_SOURCE_DIR}/test_common/Test_Main.cpp + backends/Test_SYCL_Blas.cpp + COMPONENTS blas + ) +ENDIF () + +IF (KOKKOS_ENABLE_OPENMPTARGET) + # KOKKOSKERNELS_ADD_UNIT_TEST( + # blas_openmptarget + # SOURCES + # ${PACKAGE_SOURCE_DIR}/test_common/Test_Main.cpp + # backends/Test_OpenMPTarget_Blas.cpp + # COMPONENTS blas + # ) +ENDIF () + + + +##################### +# # +# Add CPU backends # +# # +##################### +IF (KOKKOS_ENABLE_SERIAL) + KOKKOSKERNELS_ADD_UNIT_TEST( + blas_serial + SOURCES + ${PACKAGE_SOURCE_DIR}/test_common/Test_Main.cpp + backends/Test_Serial_Blas.cpp + COMPONENTS blas + ) +ENDIF () + +IF (KOKKOS_ENABLE_OPENMP) + KOKKOSKERNELS_ADD_UNIT_TEST( + blas_openmp + SOURCES + ${PACKAGE_SOURCE_DIR}/test_common/Test_Main.cpp + backends/Test_OpenMP_Blas.cpp + COMPONENTS blas + ) +ENDIF () + +IF (KOKKOS_ENABLE_THREADS) + KOKKOSKERNELS_ADD_UNIT_TEST( + blas_threads + SOURCES + ${PACKAGE_SOURCE_DIR}/test_common/Test_Main.cpp + backends/Test_Threads_Blas.cpp + COMPONENTS blas + ) +ENDIF () + diff --git a/lapack/unit_test/Test_Lapack.hpp b/lapack/unit_test/Test_Lapack.hpp new file mode 100644 index 0000000000..815c442884 --- /dev/null +++ b/lapack/unit_test/Test_Lapack.hpp @@ -0,0 +1,22 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef TEST_LAPACK_HPP +#define TEST_LAPACK_HPP + +#include "Test_Lapack_gesv.hpp" +#include "Test_Lapack_trtri.hpp" + +#endif // TEST_LAPACK_HPP From a63d094422b98c1dc7709d654f6f895448781786 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Thu, 7 Sep 2023 01:24:46 -0600 Subject: [PATCH 06/22] Backup --- cmake/kokkoskernels_components.cmake | 10 + cmake/kokkoskernels_tpls.cmake | 15 ++ lapack/tpls/KokkosLapack_Host_tpl.cpp | 33 +-- lapack/tpls/KokkosLapack_tpl_spec.hpp | 234 ++++++++++++++++++ lapack/unit_test/CMakeLists.txt | 42 ++-- .../unit_test/backends/Test_Serial_Lapack.cpp | 22 ++ sparse/src/KokkosSparse_sptrsv_supernode.hpp | 6 +- 7 files changed, 322 insertions(+), 40 deletions(-) create mode 100644 lapack/tpls/KokkosLapack_tpl_spec.hpp create mode 100644 lapack/unit_test/backends/Test_Serial_Lapack.cpp diff --git a/cmake/kokkoskernels_components.cmake b/cmake/kokkoskernels_components.cmake index 1feb5bb8b8..16a784bd1f 100644 --- a/cmake/kokkoskernels_components.cmake +++ b/cmake/kokkoskernels_components.cmake @@ -29,6 +29,13 @@ KOKKOSKERNELS_ADD_OPTION( "Whether to build the blas component. Default: OFF" ) +KOKKOSKERNELS_ADD_OPTION( + "ENABLE_COMPONENT_LAPACK" + OFF + BOOL + "Whether to build the lapack component. Default: OFF" +) + # SPARSE depends on everything else at the moment. KOKKOSKERNELS_ADD_OPTION( "ENABLE_COMPONENT_SPARSE" @@ -67,6 +74,7 @@ ENDIF() IF (KokkosKernels_ENABLE_COMPONENT_SPARSE) SET(KokkosKernels_ENABLE_COMPONENT_BATCHED ON CACHE BOOL "" FORCE) SET(KokkosKernels_ENABLE_COMPONENT_BLAS ON CACHE BOOL "" FORCE) + SET(KokkosKernels_ENABLE_COMPONENT_LAPACK ON CACHE BOOL "" FORCE) SET(KokkosKernels_ENABLE_COMPONENT_GRAPH ON CACHE BOOL "" FORCE) ENDIF() @@ -74,6 +82,7 @@ ENDIF() IF (KokkosKernels_ENABLE_ALL_COMPONENTS) SET(KokkosKernels_ENABLE_COMPONENT_BATCHED ON CACHE BOOL "" FORCE) SET(KokkosKernels_ENABLE_COMPONENT_BLAS ON CACHE BOOL "" FORCE) + SET(KokkosKernels_ENABLE_COMPONENT_LAPACK ON CACHE BOOL "" FORCE) SET(KokkosKernels_ENABLE_COMPONENT_SPARSE ON CACHE BOOL "" FORCE) SET(KokkosKernels_ENABLE_COMPONENT_GRAPH ON CACHE BOOL "" FORCE) SET(KokkosKernels_ENABLE_COMPONENT_ODE ON CACHE BOOL "" FORCE) @@ -85,6 +94,7 @@ ENDIF() # but marking it as advanced should hide it from GUIs IF ( KokkosKernels_ENABLE_COMPONENT_BATCHED AND KokkosKernels_ENABLE_COMPONENT_BLAS + AND KokkosKernels_ENABLE_COMPONENT_LAPACK AND KokkosKernels_ENABLE_COMPONENT_GRAPH AND KokkosKernels_ENABLE_COMPONENT_SPARSE AND KokkosKernels_ENABLE_COMPONENT_ODE) diff --git a/cmake/kokkoskernels_tpls.cmake b/cmake/kokkoskernels_tpls.cmake index f650168757..6496487081 100644 --- a/cmake/kokkoskernels_tpls.cmake +++ b/cmake/kokkoskernels_tpls.cmake @@ -440,6 +440,20 @@ IF ("${F77_BLAS_MANGLE}" STREQUAL "") ENDIF() ENDIF() +# AquiEEP +IF ("${F77_LAPACK_MANGLE}" STREQUAL "") + IF (KOKKOSKERNELS_ENABLE_TPL_LAPACK OR KOKKOSKERNELS_ENABLE_TPL_MKL OR KOKKOSKERNELS_ENABLE_TPL_MAGMA OR KOKKOSKERNELS_ENABLE_TPL_ARMPL) + ENABLE_LANGUAGE(C) + ENABLE_LANGUAGE(Fortran) + INCLUDE(FortranCInterface) + IF (FortranCInterface_GLOBAL_SUFFIX STREQUAL "") + SET(F77_LAPACK_MANGLE "(name,NAME) ${FortranCInterface_GLOBAL_PREFIX}name") + ELSE () + SET(F77_LAPACK_MANGLE "(name,NAME) ${FortranCInterface_GLOBAL_PREFIX}name ## ${FortranCInterface_GLOBAL_SUFFIX}") + ENDIF () + ENDIF() +ENDIF() + KOKKOSKERNELS_ADD_OPTION(NO_DEFAULT_CUDA_TPLS OFF BOOL "Whether CUDA TPLs should be enabled by default. Default: OFF") SET(CUBLAS_DEFAULT ${KOKKOS_ENABLE_CUDA}) SET(CUSPARSE_DEFAULT ${KOKKOS_ENABLE_CUDA}) @@ -466,6 +480,7 @@ KOKKOSKERNELS_ADD_TPL_OPTION(ROCBLAS ${ROCBLAS_DEFAULT} "Whether to enable KOKKOSKERNELS_ADD_TPL_OPTION(ROCSPARSE ${ROCSPARSE_DEFAULT} "Whether to enable ROCSPARSE" DEFAULT_DOCSTRING "ON if HIP-enabled Kokkos, otherwise OFF") +#AquiEEP IF (KOKKOSKERNELS_ENABLE_TPL_MAGMA) IF (F77_BLAS_MANGLE STREQUAL "(name,NAME) name ## _") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DADD_ -fopenmp -lgfortran") diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp index 8e7158528e..6ece9fe914 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.cpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp @@ -29,13 +29,14 @@ extern "C" { /// Gesv /// -void F77_LAPACK_MANGLE(sgesv, SGESV)(int*, int*, float*, int*, int*, float*, int*, +// AquiEEP +void F77_BLAS_MANGLE(sgesv, SGESV)(int*, int*, float*, int*, int*, float*, int*, int*); -void F77_LAPACK_MANGLE(dgesv, DGESV)(int*, int*, double*, int*, int*, double*, +void F77_BLAS_MANGLE(dgesv, DGESV)(int*, int*, double*, int*, int*, double*, int*, int*); -void F77_LAPACK_MANGLE(cgesv, CGESV)(int*, int*, std::complex*, int*, int*, +void F77_BLAS_MANGLE(cgesv, CGESV)(int*, int*, std::complex*, int*, int*, std::complex*, int*, int*); -void F77_LAPACK_MANGLE(zgesv, ZGESV)(int*, int*, std::complex*, int*, +void F77_BLAS_MANGLE(zgesv, ZGESV)(int*, int*, std::complex*, int*, int*, std::complex*, int*, int*); /// @@ -49,25 +50,25 @@ void F77_LAPACK_MANGLE(zgesv, ZGESV)(int*, int*, std::complex*, int*, &diag, &n, a, &lda, &info); */ -void F77_LAPACK_MANGLE(strtri, STRTRI)(const char*, const char*, int*, +void F77_BLAS_MANGLE(strtri, STRTRI)(const char*, const char*, int*, const float*, int*, int*); -void F77_LAPACK_MANGLE(dtrtri, DTRTRI)(const char*, const char*, int*, +void F77_BLAS_MANGLE(dtrtri, DTRTRI)(const char*, const char*, int*, const double*, int*, int*); -void F77_LAPACK_MANGLE(ctrtri, CTRTRI)(const char*, const char*, int*, +void F77_BLAS_MANGLE(ctrtri, CTRTRI)(const char*, const char*, int*, const std::complex*, int*, int*); -void F77_LAPACK_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, +void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, const std::complex*, int*, int*); } -#define F77_FUNC_SGESV F77_LAPACK_MANGLE(sgesv, SGESV) -#define F77_FUNC_DGESV F77_LAPACK_MANGLE(dgesv, DGESV) -#define F77_FUNC_CGESV F77_LAPACK_MANGLE(cgesv, CGESV) -#define F77_FUNC_ZGESV F77_LAPACK_MANGLE(zgesv, ZGESV) +#define F77_FUNC_SGESV F77_BLAS_MANGLE(sgesv, SGESV) +#define F77_FUNC_DGESV F77_BLAS_MANGLE(dgesv, DGESV) +#define F77_FUNC_CGESV F77_BLAS_MANGLE(cgesv, CGESV) +#define F77_FUNC_ZGESV F77_BLAS_MANGLE(zgesv, ZGESV) -#define F77_FUNC_STRTRI F77_LAPACK_MANGLE(strtri, STRTRI) -#define F77_FUNC_DTRTRI F77_LAPACK_MANGLE(dtrtri, DTRTRI) -#define F77_FUNC_CTRTRI F77_LAPACK_MANGLE(ctrtri, CTRTRI) -#define F77_FUNC_ZTRTRI F77_LAPACK_MANGLE(ztrtri, ZTRTRI) +#define F77_FUNC_STRTRI F77_BLAS_MANGLE(strtri, STRTRI) +#define F77_FUNC_DTRTRI F77_BLAS_MANGLE(dtrtri, DTRTRI) +#define F77_FUNC_CTRTRI F77_BLAS_MANGLE(ctrtri, CTRTRI) +#define F77_FUNC_ZTRTRI F77_BLAS_MANGLE(ztrtri, ZTRTRI) namespace KokkosLapack { namespace Impl { diff --git a/lapack/tpls/KokkosLapack_tpl_spec.hpp b/lapack/tpls/KokkosLapack_tpl_spec.hpp new file mode 100644 index 0000000000..a20c5d9a92 --- /dev/null +++ b/lapack/tpls/KokkosLapack_tpl_spec.hpp @@ -0,0 +1,234 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_TPL_SPEC_HPP_ +#define KOKKOSLAPACK_TPL_SPEC_HPP_ + +#ifdef KOKKOSKERNELS_ENABLE_TPL_CULAPACK +#include "cuda_runtime.h" +#include "culapack_v2.h" + +namespace KokkosLapack { +namespace Impl { + +struct CudaLapackSingleton { + culapackHandle_t handle; + + CudaLapackSingleton(); + + static CudaLapackSingleton& singleton(); +}; + +inline void culapack_internal_error_throw(culapackStatus_t culapackState, + const char* name, const char* file, + const int line) { + std::ostringstream out; + // out << name << " error( " << culapackGetStatusName(culapackState) + // << "): " << culapackGetStatusString(culapackState); + out << name << " error( "; + switch (culapackState) { + case CULAPACK_STATUS_NOT_INITIALIZED: + out << "CULAPACK_STATUS_NOT_INITIALIZED): the library was not initialized."; + break; + case CULAPACK_STATUS_ALLOC_FAILED: + out << "CULAPACK_STATUS_ALLOC_FAILED): the resource allocation failed."; + break; + case CULAPACK_STATUS_INVALID_VALUE: + out << "CULAPACK_STATUS_INVALID_VALUE): an invalid numerical value was " + "used as an argument."; + break; + case CULAPACK_STATUS_ARCH_MISMATCH: + out << "CULAPACK_STATUS_ARCH_MISMATCH): an absent device architectural " + "feature is required."; + break; + case CULAPACK_STATUS_MAPPING_ERROR: + out << "CULAPACK_STATUS_MAPPING_ERROR): an access to GPU memory space " + "failed."; + break; + case CULAPACK_STATUS_EXECUTION_FAILED: + out << "CULAPACK_STATUS_EXECUTION_FAILED): the GPU program failed to " + "execute."; + break; + case CULAPACK_STATUS_INTERNAL_ERROR: + out << "CULAPACK_STATUS_INTERNAL_ERROR): an internal operation failed."; + break; + case CULAPACK_STATUS_NOT_SUPPORTED: + out << "CULAPACK_STATUS_NOT_SUPPORTED): the feature required is not " + "supported."; + break; + default: out << "unrecognized error code): this is bad!"; break; + } + if (file) { + out << " " << file << ":" << line; + } + throw std::runtime_error(out.str()); +} + +inline void culapack_internal_safe_call(culapackStatus_t culapackState, + const char* name, + const char* file = nullptr, + const int line = 0) { + if (CULAPACK_STATUS_SUCCESS != culapackState) { + culapack_internal_error_throw(culapackState, name, file, line); + } +} + +// The macro below defines the interface for the safe culapack calls. +// The functions themselves are protected by impl namespace and this +// is not meant to be used by external application or libraries. +#define KOKKOS_CULAPACK_SAFE_CALL_IMPL(call) \ + KokkosLapack::Impl::culapack_internal_safe_call(call, #call, __FILE__, __LINE__) + +/// \brief This function converts KK transpose mode to cuLAPACK transpose mode +inline culapackOperation_t trans_mode_kk_to_culapack(const char kkMode[]) { + culapackOperation_t trans; + if ((kkMode[0] == 'N') || (kkMode[0] == 'n')) + trans = CULAPACK_OP_N; + else if ((kkMode[0] == 'T') || (kkMode[0] == 't')) + trans = CULAPACK_OP_T; + else + trans = CULAPACK_OP_C; + return trans; +} + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_CULAPACK + +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCLAPACK +#include + +namespace KokkosLapack { +namespace Impl { + +struct RocLapackSingleton { + roclapack_handle handle; + + RocLapackSingleton(); + + static RocLapackSingleton& singleton(); +}; + +inline void roclapack_internal_error_throw(roclapack_status roclapackState, + const char* name, const char* file, + const int line) { + std::ostringstream out; + out << name << " error( "; + switch (roclapackState) { + case roclapack_status_invalid_handle: + out << "roclapack_status_invalid_handle): handle not initialized, invalid " + "or null."; + break; + case roclapack_status_not_implemented: + out << "roclapack_status_not_implemented): function is not implemented."; + break; + case roclapack_status_invalid_pointer: + out << "roclapack_status_invalid_pointer): invalid pointer argument."; + break; + case roclapack_status_invalid_size: + out << "roclapack_status_invalid_size): invalid size argument."; + break; + case roclapack_status_memory_error: + out << "roclapack_status_memory_error): failed internal memory allocation, " + "copy or dealloc."; + break; + case roclapack_status_internal_error: + out << "roclapack_status_internal_error): other internal library failure."; + break; + case roclapack_status_perf_degraded: + out << "roclapack_status_perf_degraded): performance degraded due to low " + "device memory."; + break; + case roclapack_status_size_query_mismatch: + out << "unmatched start/stop size query): ."; + break; + case roclapack_status_size_increased: + out << "roclapack_status_size_increased): queried device memory size " + "increased."; + break; + case roclapack_status_size_unchanged: + out << "roclapack_status_size_unchanged): queried device memory size " + "unchanged."; + break; + case roclapack_status_invalid_value: + out << "roclapack_status_invalid_value): passed argument not valid."; + break; + case roclapack_status_continue: + out << "roclapack_status_continue): nothing preventing function to " + "proceed."; + break; + case roclapack_status_check_numerics_fail: + out << "roclapack_status_check_numerics_fail): will be set if the " + "vector/matrix has a NaN or an Infinity."; + break; + default: out << "unrecognized error code): this is bad!"; break; + } + if (file) { + out << " " << file << ":" << line; + } + throw std::runtime_error(out.str()); +} + +inline void roclapack_internal_safe_call(roclapack_status roclapackState, + const char* name, + const char* file = nullptr, + const int line = 0) { + if (roclapack_status_success != roclapackState) { + roclapack_internal_error_throw(roclapackState, name, file, line); + } +} + +// The macro below defines the interface for the safe roclapack calls. +// The functions themselves are protected by impl namespace and this +// is not meant to be used by external application or libraries. +#define KOKKOS_ROCLAPACK_SAFE_CALL_IMPL(call) \ + KokkosLapack::Impl::roclapack_internal_safe_call(call, #call, __FILE__, __LINE__) + +/// \brief This function converts KK transpose mode to rocLAPACK transpose mode +inline roclapack_operation trans_mode_kk_to_roclapack(const char kkMode[]) { + roclapack_operation trans; + if ((kkMode[0] == 'N') || (kkMode[0] == 'n')) + trans = roclapack_operation_none; + else if ((kkMode[0] == 'T') || (kkMode[0] == 't')) + trans = roclapack_operation_transpose; + else + trans = roclapack_operation_conjugate_transpose; + return trans; +} + +} // namespace Impl +} // namespace KokkosLapack + +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCLAPACK + +// If LAPACK TPL is enabled, it is preferred over magma's LAPACK +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA +#include "magma_v2.h" + +namespace KokkosLapack { +namespace Impl { + +struct MagmaSingleton { + MagmaSingleton(); + + static MagmaSingleton& singleton(); +}; + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA + +#endif // KOKKOSLAPACK_TPL_SPEC_HPP_ diff --git a/lapack/unit_test/CMakeLists.txt b/lapack/unit_test/CMakeLists.txt index b0ccaf8e7e..a2c2305a12 100644 --- a/lapack/unit_test/CMakeLists.txt +++ b/lapack/unit_test/CMakeLists.txt @@ -17,41 +17,41 @@ KOKKOSKERNELS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_C ##################### IF (KOKKOS_ENABLE_CUDA) KOKKOSKERNELS_ADD_UNIT_TEST( - blas_cuda + lapack_cuda SOURCES ${PACKAGE_SOURCE_DIR}/test_common/Test_Main.cpp - backends/Test_Cuda_Blas.cpp - COMPONENTS blas + backends/Test_Cuda_Lapack.cpp + COMPONENTS lapack ) ENDIF () IF (KOKKOS_ENABLE_HIP) KOKKOSKERNELS_ADD_UNIT_TEST( - blas_hip + lapack_hip SOURCES ${PACKAGE_SOURCE_DIR}/test_common/Test_Main.cpp - backends/Test_HIP_Blas.cpp - COMPONENTS blas + backends/Test_HIP_Lapack.cpp + COMPONENTS lapack ) ENDIF () IF (KOKKOS_ENABLE_SYCL) KOKKOSKERNELS_ADD_UNIT_TEST( - blas_sycl + lapack_sycl SOURCES ${PACKAGE_SOURCE_DIR}/test_common/Test_Main.cpp - backends/Test_SYCL_Blas.cpp - COMPONENTS blas + backends/Test_SYCL_Lapack.cpp + COMPONENTS lapack ) ENDIF () IF (KOKKOS_ENABLE_OPENMPTARGET) # KOKKOSKERNELS_ADD_UNIT_TEST( - # blas_openmptarget + # lapack_openmptarget # SOURCES # ${PACKAGE_SOURCE_DIR}/test_common/Test_Main.cpp - # backends/Test_OpenMPTarget_Blas.cpp - # COMPONENTS blas + # backends/Test_OpenMPTarget_Lapack.cpp + # COMPONENTS lapack # ) ENDIF () @@ -64,31 +64,31 @@ ENDIF () ##################### IF (KOKKOS_ENABLE_SERIAL) KOKKOSKERNELS_ADD_UNIT_TEST( - blas_serial + lapack_serial SOURCES ${PACKAGE_SOURCE_DIR}/test_common/Test_Main.cpp - backends/Test_Serial_Blas.cpp - COMPONENTS blas + backends/Test_Serial_Lapack.cpp + COMPONENTS lapack ) ENDIF () IF (KOKKOS_ENABLE_OPENMP) KOKKOSKERNELS_ADD_UNIT_TEST( - blas_openmp + lapack_openmp SOURCES ${PACKAGE_SOURCE_DIR}/test_common/Test_Main.cpp - backends/Test_OpenMP_Blas.cpp - COMPONENTS blas + backends/Test_OpenMP_Lapack.cpp + COMPONENTS lapack ) ENDIF () IF (KOKKOS_ENABLE_THREADS) KOKKOSKERNELS_ADD_UNIT_TEST( - blas_threads + lapack_threads SOURCES ${PACKAGE_SOURCE_DIR}/test_common/Test_Main.cpp - backends/Test_Threads_Blas.cpp - COMPONENTS blas + backends/Test_Threads_Lapack.cpp + COMPONENTS lapack ) ENDIF () diff --git a/lapack/unit_test/backends/Test_Serial_Lapack.cpp b/lapack/unit_test/backends/Test_Serial_Lapack.cpp new file mode 100644 index 0000000000..d0324b9642 --- /dev/null +++ b/lapack/unit_test/backends/Test_Serial_Lapack.cpp @@ -0,0 +1,22 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef TEST_SERIAL_LAPACK_CPP +#define TEST_SERIAL_LAPACK_CPP + +#include +#include + +#endif // TEST_SERIAL_LAPACK_CPP diff --git a/sparse/src/KokkosSparse_sptrsv_supernode.hpp b/sparse/src/KokkosSparse_sptrsv_supernode.hpp index 0be3abac08..c6e5d406a7 100644 --- a/sparse/src/KokkosSparse_sptrsv_supernode.hpp +++ b/sparse/src/KokkosSparse_sptrsv_supernode.hpp @@ -27,7 +27,7 @@ #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) #include "KokkosBlas3_trmm.hpp" -#include "KokkosBlas_trtri.hpp" +#include "KokkosLapack_trtri.hpp" #include "KokkosBatched_Trtri_Decl.hpp" #include "KokkosBatched_Trtri_Serial_Impl.hpp" @@ -1472,12 +1472,12 @@ void invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, // call trtri on device auto dViewLjj = Kokkos::subview(dViewL, range_type(0, nscol), Kokkos::ALL()); - KokkosBlas::trtri(&uplo_char, &diag_char, dViewLjj); + KokkosLapack::trtri(&uplo_char, &diag_char, dViewLjj); } else #endif { // call trtri on host - KokkosBlas::trtri(&uplo_char, &diag_char, Ljj); + KokkosLapack::trtri(&uplo_char, &diag_char, Ljj); } #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time1 += timer.seconds(); From 8aecf38948f9c4e42804674a0afb9c44d5d677b8 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Thu, 7 Sep 2023 01:59:16 -0600 Subject: [PATCH 07/22] Backup --- .../unit_test/backends/Test_Cuda_Lapack.cpp | 22 +++++++++++++++++++ .../unit_test/backends/Test_OpenMP_Lapack.cpp | 22 +++++++++++++++++++ .../backends/Test_Threads_Lapack.cpp | 22 +++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 lapack/unit_test/backends/Test_Cuda_Lapack.cpp create mode 100644 lapack/unit_test/backends/Test_OpenMP_Lapack.cpp create mode 100644 lapack/unit_test/backends/Test_Threads_Lapack.cpp diff --git a/lapack/unit_test/backends/Test_Cuda_Lapack.cpp b/lapack/unit_test/backends/Test_Cuda_Lapack.cpp new file mode 100644 index 0000000000..d75988ef81 --- /dev/null +++ b/lapack/unit_test/backends/Test_Cuda_Lapack.cpp @@ -0,0 +1,22 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef TEST_CUDA_LAPACK_CPP +#define TEST_CUDA_LAPACK_CPP + +#include +#include + +#endif // TEST_CUDA_LAPACK_CPP diff --git a/lapack/unit_test/backends/Test_OpenMP_Lapack.cpp b/lapack/unit_test/backends/Test_OpenMP_Lapack.cpp new file mode 100644 index 0000000000..533580fd23 --- /dev/null +++ b/lapack/unit_test/backends/Test_OpenMP_Lapack.cpp @@ -0,0 +1,22 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef TEST_OPENMP_LAPACK_CPP +#define TEST_OPENMP_LAPACK_CPP + +#include +#include + +#endif // TEST_OPENMP_LAPACK_CPP diff --git a/lapack/unit_test/backends/Test_Threads_Lapack.cpp b/lapack/unit_test/backends/Test_Threads_Lapack.cpp new file mode 100644 index 0000000000..aa1acbcf6c --- /dev/null +++ b/lapack/unit_test/backends/Test_Threads_Lapack.cpp @@ -0,0 +1,22 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef TEST_THREADS_LAPACK_CPP +#define TEST_THREADS_LAPACK_CPP + +#include +#include + +#endif // TEST_THREADS_LAPACK_CPP From 872a553e01981cd213806406b452bf900caf0b9e Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Tue, 3 Oct 2023 11:43:16 -0600 Subject: [PATCH 08/22] Backup --- lapack/unit_test/Test_Lapack_gesv.hpp | 32 +++++++++++++------------- lapack/unit_test/Test_Lapack_trtri.hpp | 32 +++++++++++++------------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/lapack/unit_test/Test_Lapack_gesv.hpp b/lapack/unit_test/Test_Lapack_gesv.hpp index 25d5089a58..f37770c812 100644 --- a/lapack/unit_test/Test_Lapack_gesv.hpp +++ b/lapack/unit_test/Test_Lapack_gesv.hpp @@ -343,15 +343,15 @@ int test_gesv_mrhs(const char* mode) { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gesv_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_float"); - test_gesv("N"); // No pivoting - test_gesv("Y"); // Partial pivoting + test_gesv("N"); // No pivoting + test_gesv("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, gesv_mrhs_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_mrhs_float"); - test_gesv_mrhs("N"); // No pivoting - test_gesv_mrhs("Y"); // Partial pivoting + test_gesv_mrhs("N"); // No pivoting + test_gesv_mrhs("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } #endif @@ -361,15 +361,15 @@ TEST_F(TestCategory, gesv_mrhs_float) { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gesv_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_double"); - test_gesv("N"); // No pivoting - test_gesv("Y"); // Partial pivoting + test_gesv("N"); // No pivoting + test_gesv("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, gesv_mrhs_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_mrhs_double"); - test_gesv_mrhs("N"); // No pivoting - test_gesv_mrhs("Y"); // Partial pivoting + test_gesv_mrhs("N"); // No pivoting + test_gesv_mrhs("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } #endif @@ -379,15 +379,15 @@ TEST_F(TestCategory, gesv_mrhs_double) { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gesv_complex_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_complex_double"); - test_gesv, TestExecSpace>("N"); // No pivoting - test_gesv, TestExecSpace>("Y"); // Partial pivoting + test_gesv, TestDevice>("N"); // No pivoting + test_gesv, TestDevice>("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, gesv_mrhs_complex_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_mrhs_complex_double"); - test_gesv_mrhs, TestExecSpace>("N"); // No pivoting - test_gesv_mrhs, TestExecSpace>( + test_gesv_mrhs, TestDevice>("N"); // No pivoting + test_gesv_mrhs, TestDevice>( "Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } @@ -398,15 +398,15 @@ TEST_F(TestCategory, gesv_mrhs_complex_double) { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, gesv_complex_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_complex_float"); - test_gesv, TestExecSpace>("N"); // No pivoting - test_gesv, TestExecSpace>("Y"); // Partial pivoting + test_gesv, TestDevice>("N"); // No pivoting + test_gesv, TestDevice>("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } TEST_F(TestCategory, gesv_mrhs_complex_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_mrhs_complex_float"); - test_gesv_mrhs, TestExecSpace>("N"); // No pivoting - test_gesv_mrhs, TestExecSpace>( + test_gesv_mrhs, TestDevice>("N"); // No pivoting + test_gesv_mrhs, TestDevice>( "Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } diff --git a/lapack/unit_test/Test_Lapack_trtri.hpp b/lapack/unit_test/Test_Lapack_trtri.hpp index 498b1248f3..0105803567 100644 --- a/lapack/unit_test/Test_Lapack_trtri.hpp +++ b/lapack/unit_test/Test_Lapack_trtri.hpp @@ -363,10 +363,10 @@ int test_trtri(const char* mode) { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trtri_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::trtri_float"); - test_trtri("UN"); - test_trtri("UU"); - test_trtri("LN"); - test_trtri("LU"); + test_trtri("UN"); + test_trtri("UU"); + test_trtri("LN"); + test_trtri("LU"); Kokkos::Profiling::popRegion(); } #endif @@ -376,10 +376,10 @@ TEST_F(TestCategory, trtri_float) { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trtri_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::trtri_double"); - test_trtri("UN"); - test_trtri("UU"); - test_trtri("LN"); - test_trtri("LU"); + test_trtri("UN"); + test_trtri("UU"); + test_trtri("LN"); + test_trtri("LU"); Kokkos::Profiling::popRegion(); } #endif @@ -389,10 +389,10 @@ TEST_F(TestCategory, trtri_double) { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trtri_complex_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::trtri_complex_double"); - test_trtri, TestExecSpace>("UN"); - test_trtri, TestExecSpace>("UU"); - test_trtri, TestExecSpace>("LN"); - test_trtri, TestExecSpace>("LU"); + test_trtri, TestDevice>("UN"); + test_trtri, TestDevice>("UU"); + test_trtri, TestDevice>("LN"); + test_trtri, TestDevice>("LU"); Kokkos::Profiling::popRegion(); } #endif @@ -402,10 +402,10 @@ TEST_F(TestCategory, trtri_complex_double) { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F(TestCategory, trtri_complex_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::trtri_complex_float"); - test_trtri, TestExecSpace>("UN"); - test_trtri, TestExecSpace>("UU"); - test_trtri, TestExecSpace>("LN"); - test_trtri, TestExecSpace>("LU"); + test_trtri, TestDevice>("UN"); + test_trtri, TestDevice>("UU"); + test_trtri, TestDevice>("LN"); + test_trtri, TestDevice>("LU"); Kokkos::Profiling::popRegion(); } #endif From 3223af39c87db07b89959bde8b4491939da89d48 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Tue, 10 Oct 2023 11:23:11 -0600 Subject: [PATCH 09/22] Backup --- .github/workflows/osx.yml | 1 + CMakeLists.txt | 18 +++++++++++++++++- batched/KokkosBatched_Util.hpp | 1 + ...okkosBatched_HostLevel_Gemm_Serial_Impl.hpp | 3 ++- cm_generate_makefile.bash | 16 +++++++++++++++- cmake/KokkosKernels_config.h.in | 4 ++++ cmake/Modules/FindTPLCUSOLVER.cmake | 18 ++++++++++++++++++ cmake/Modules/FindTPLROCSOLVER.cmake | 12 ++++++++++++ cmake/kokkoskernels_tpls.cmake | 10 ++++++++++ example/half/xpy.cpp | 2 +- lapack/CMakeLists.txt | 12 ++++++------ 11 files changed, 87 insertions(+), 10 deletions(-) create mode 100644 cmake/Modules/FindTPLCUSOLVER.cmake create mode 100644 cmake/Modules/FindTPLROCSOLVER.cmake diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index 8d9f7123f8..10688ddb70 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -103,6 +103,7 @@ jobs: -DKokkosKernels_INST_OFFSET_SIZE_T=ON \ -DKokkosKernels_ENABLE_TPL_CUSPARSE=OFF \ -DKokkosKernels_ENABLE_TPL_CUBLAS=OFF \ + -DKokkosKernels_ENABLE_TPL_CUSOLVER=OFF \ .. - name: build_kokkos_kernels diff --git a/CMakeLists.txt b/CMakeLists.txt index 812640374b..85ce79d9ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -193,7 +193,7 @@ ELSE() "ALL" STRING "A list of components to enable in testing and building" - VALID_ENTRIES BATCHED BLAS LAPACK GRAPH SPARSE ALL + VALID_ENTRIES BATCHED BLAS LAPACK CUSOLVER ROCSOLVER GRAPH SPARSE ALL ) # ================================================================== @@ -245,6 +245,8 @@ ELSE() MESSAGE(" BATCHED: ${KokkosKernels_ENABLE_COMPONENT_BATCHED}") MESSAGE(" BLAS: ${KokkosKernels_ENABLE_COMPONENT_BLAS}") MESSAGE(" LAPACK: ${KokkosKernels_ENABLE_COMPONENT_LAPACK}") + MESSAGE(" CUSOLVER: ${KokkosKernels_ENABLE_COMPONENT_CUSOLVER}") + MESSAGE(" ROCSOLVER: ${KokkosKernels_ENABLE_COMPONENT_ROCSOLVER}") MESSAGE(" GRAPH: ${KokkosKernels_ENABLE_COMPONENT_GRAPH}") MESSAGE(" SPARSE: ${KokkosKernels_ENABLE_COMPONENT_SPARSE}") MESSAGE(" ODE: ${KokkosKernels_ENABLE_COMPONENT_ODE}") @@ -292,6 +294,12 @@ ELSE() IF (KokkosKernels_ENABLE_COMPONENT_LAPACK) INCLUDE(lapack/CMakeLists.txt) ENDIF() + IF (KokkosKernels_ENABLE_COMPONENT_CUSOLVER) + INCLUDE(lapack/CMakeLists.txt) + ENDIF() + IF (KokkosKernels_ENABLE_COMPONENT_ROCSOLVER) + INCLUDE(lapack/CMakeLists.txt) + ENDIF() IF (KokkosKernels_ENABLE_COMPONENT_GRAPH) INCLUDE(graph/CMakeLists.txt) ENDIF() @@ -374,8 +382,10 @@ ELSE() KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CHOLMOD) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC MKL) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CUBLAS) + KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CUSOLVER) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CUSPARSE) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC ROCBLAS) + KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC ROCSOLVER) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC ROCSPARSE) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC METIS) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC ARMPL) @@ -413,6 +423,12 @@ ELSE() IF (KokkosKernels_ENABLE_COMPONENT_LAPACK) KOKKOSKERNELS_ADD_TEST_DIRECTORIES(lapack/unit_test) ENDIF() + IF (KokkosKernels_ENABLE_COMPONENT_CUSOLVER) + KOKKOSKERNELS_ADD_TEST_DIRECTORIES(lapack/unit_test) + ENDIF() + IF (KokkosKernels_ENABLE_COMPONENT_ROCSOLVER) + KOKKOSKERNELS_ADD_TEST_DIRECTORIES(lapack/unit_test) + ENDIF() IF (KokkosKernels_ENABLE_COMPONENT_GRAPH) KOKKOSKERNELS_ADD_TEST_DIRECTORIES(graph/unit_test) ENDIF() diff --git a/batched/KokkosBatched_Util.hpp b/batched/KokkosBatched_Util.hpp index 9078281e59..04e48f8c92 100644 --- a/batched/KokkosBatched_Util.hpp +++ b/batched/KokkosBatched_Util.hpp @@ -21,6 +21,7 @@ // no experimental name space guard for trilinos #define __KOKKOSBATCHED_PROMOTION__ 1 +#include #include #include #include diff --git a/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp b/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp index 5ff581bb64..fd63180cf7 100644 --- a/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp +++ b/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp @@ -16,6 +16,7 @@ #ifndef __KOKKOSBATCHED_HOSTLEVEL_GEMM_SERIAL_IMPL_HPP__ #define __KOKKOSBATCHED_HOSTLEVEL_GEMM_SERIAL_IMPL_HPP__ #include "KokkosBatched_Gemm_Decl.hpp" +#include namespace KokkosBatched { namespace Impl { @@ -181,4 +182,4 @@ class BatchedSerialGemm { }; } // namespace Impl } // namespace KokkosBatched -#endif \ No newline at end of file +#endif diff --git a/cm_generate_makefile.bash b/cm_generate_makefile.bash index 3358ae2eb8..4347eb4b3b 100755 --- a/cm_generate_makefile.bash +++ b/cm_generate_makefile.bash @@ -177,8 +177,10 @@ get_kernels_tpls_list() { KOKKOSKERNELS_USER_TPL_PATH_CMD= KOKKOSKERNELS_USER_TPL_LIBNAME_CMD= CUBLAS_DEFAULT=OFF + CUSOLVER_DEFAULT=OFF CUSPARSE_DEFAULT=OFF ROCBLAS_DEFAULT=OFF + ROCSOLVER_DEFAULT=OFF ROCSPARSE_DEFAULT=OFF PARSE_TPLS_LIST=$(echo $KOKKOSKERNELS_TPLS | tr "," "\n") for TPLS_ in $PARSE_TPLS_LIST @@ -188,12 +190,18 @@ get_kernels_tpls_list() { if [ "$UC_TPLS" == "CUBLAS" ]; then CUBLAS_DEFAULT=ON fi + if [ "$UC_TPLS" == "CUSOLVER" ]; then + CUSOLVER_DEFAULT=ON + fi if [ "$UC_TPLS" == "CUSPARSE" ]; then CUSPARSE_DEFAULT=ON fi if [ "$UC_TPLS" == "ROCBLAS" ]; then ROCBLAS_DEFAULT=ON fi + if [ "$UC_TPLS" == "ROCSOLVER" ]; then + ROCSOLVER_DEFAULT=ON + fi if [ "$UC_TPLS" == "ROCSPARSE" ]; then ROCSPARSE_DEFAULT=ON fi @@ -221,12 +229,18 @@ get_kernels_tpls_list() { if [ "$CUBLAS_DEFAULT" == "OFF" ]; then KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_CUBLAS=OFF ${KOKKOSKERNELS_TPLS_CMD}" fi + if [ "$CUSOLVER_DEFAULT" == "OFF" ]; then + KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_CUSOLVER=OFF ${KOKKOSKERNELS_TPLS_CMD}" + fi if [ "$CUSPARSE_DEFAULT" == "OFF" ]; then KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_CUSPARSE=OFF ${KOKKOSKERNELS_TPLS_CMD}" fi if [ "$ROCBLAS_DEFAULT" == "OFF" ]; then KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_ROCBLAS=OFF ${KOKKOSKERNELS_TPLS_CMD}" fi + if [ "$ROCSOLVER_DEFAULT" == "OFF" ]; then + KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_ROCSOLVER=OFF ${KOKKOSKERNELS_TPLS_CMD}" + fi if [ "$ROCSPARSE_DEFAULT" == "OFF" ]; then KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_ROCSPARSE=OFF ${KOKKOSKERNELS_TPLS_CMD}" fi @@ -345,7 +359,7 @@ display_help_text() { echo "--with-tpls=[TPLS]: Set tpls to be instantiated (Proper support requies that appropriate compiler and device must be enabled)." echo " This may require providing paths and the library name if using custom installs not on a default path" echo " that CMake searches" - echo " Options: blas, mkl, cublas, cusparse, magma, armpl, rocblas, rocsparse" + echo " Options: blas, mkl, cublas, cusolver, cusparse, magma, armpl, rocblas, rocsolver, rocsparse" echo "--user-blas-path=[PATH]: Set path to location of user-specified BLAS library." echo "--user-blas-lib=[LIB]: Library name of desired BLAS install." echo " Example: For the typical \"libblas.a\" provide \"blas\"" diff --git a/cmake/KokkosKernels_config.h.in b/cmake/KokkosKernels_config.h.in index 621c78bfcc..bf063e7b63 100644 --- a/cmake/KokkosKernels_config.h.in +++ b/cmake/KokkosKernels_config.h.in @@ -117,6 +117,8 @@ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_CUSPARSE /* CUBLAS */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_CUBLAS +/* CUSOLVER */ +#cmakedefine KOKKOSKERNELS_ENABLE_TPL_CUSOLVER /* MAGMA */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_MAGMA /* SuperLU */ @@ -135,6 +137,8 @@ #cmakedefine ARMPL_BUILD @ARMPL_BUILD@ /* ROCBLAS */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_ROCBLAS +/* ROCSOLVER */ +#cmakedefine KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER /* ROCSPARSE */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE diff --git a/cmake/Modules/FindTPLCUSOLVER.cmake b/cmake/Modules/FindTPLCUSOLVER.cmake new file mode 100644 index 0000000000..e10d46e58c --- /dev/null +++ b/cmake/Modules/FindTPLCUSOLVER.cmake @@ -0,0 +1,18 @@ +FIND_PACKAGE(CUDA) + +INCLUDE(FindPackageHandleStandardArgs) +IF (NOT CUDA_FOUND) + #Important note here: this find Module is named TPLCUSOLVER + #The eventual target is named CUSOLVER. To avoid naming conflicts + #the find module is called TPLCUSOLVER. This call will cause + #the find_package call to fail in a "standard" CMake way + FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUSOLVER REQUIRED_VARS CUDA_FOUND) +ELSE() + #The libraries might be empty - OR they might explicitly be not found + IF("${CUDA_CUSOLVER_LIBRARIES}" MATCHES "NOTFOUND") + FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUSOLVER REQUIRED_VARS CUDA_CUSOLVER_LIBRARIES) + ELSE() + KOKKOSKERNELS_CREATE_IMPORTED_TPL(CUSOLVER INTERFACE + LINK_LIBRARIES "${CUDA_CUSOLVER_LIBRARIES}") + ENDIF() +ENDIF() diff --git a/cmake/Modules/FindTPLROCSOLVER.cmake b/cmake/Modules/FindTPLROCSOLVER.cmake new file mode 100644 index 0000000000..c4389f7bae --- /dev/null +++ b/cmake/Modules/FindTPLROCSOLVER.cmake @@ -0,0 +1,12 @@ +FIND_PACKAGE(ROCSOLVER) +if(TARGET roc::rocsolver) +## MPL: 12/29/2022: Variable TPL_ROCSOLVER_IMPORTED_NAME follows the requested convention +## of KokkosKernel (method kokkoskernels_import_tpl of kokkoskernels_tpls.cmake) + SET(TPL_ROCSOLVER_IMPORTED_NAME roc::rocsolver) + SET(TPL_IMPORTED_NAME roc::rocsolver) +## MPL: 12/29/2022: A target comming from a TPL must follows the requested convention +## of KokkosKernel (method kokkoskernels_link_tpl of kokkoskernels_tpls.cmake) + ADD_LIBRARY(KokkosKernels::ROCSOLVER ALIAS roc::rocsolver) +ELSE() + MESSAGE(FATAL_ERROR "Package ROCSOLVER requested but not found") +ENDIF() diff --git a/cmake/kokkoskernels_tpls.cmake b/cmake/kokkoskernels_tpls.cmake index 6496487081..9584e028cd 100644 --- a/cmake/kokkoskernels_tpls.cmake +++ b/cmake/kokkoskernels_tpls.cmake @@ -456,27 +456,35 @@ ENDIF() KOKKOSKERNELS_ADD_OPTION(NO_DEFAULT_CUDA_TPLS OFF BOOL "Whether CUDA TPLs should be enabled by default. Default: OFF") SET(CUBLAS_DEFAULT ${KOKKOS_ENABLE_CUDA}) +SET(CUSOLVER_DEFAULT ${KOKKOS_ENABLE_CUDA}) SET(CUSPARSE_DEFAULT ${KOKKOS_ENABLE_CUDA}) IF(KOKKOSKERNELS_NO_DEFAULT_CUDA_TPLS) SET(CUBLAS_DEFAULT OFF) + SET(CUSOLVER_DEFAULT OFF) SET(CUSPARSE_DEFAULT OFF) ENDIF() KOKKOSKERNELS_ADD_TPL_OPTION(CUBLAS ${CUBLAS_DEFAULT} "Whether to enable CUBLAS" DEFAULT_DOCSTRING "ON if CUDA-enabled Kokkos, otherwise OFF") +KOKKOSKERNELS_ADD_TPL_OPTION(CUSOLVER ${CUSOLVER_DEFAULT} "Whether to enable CUSOLVER" + DEFAULT_DOCSTRING "ON if CUDA-enabled Kokkos, otherwise OFF") KOKKOSKERNELS_ADD_TPL_OPTION(CUSPARSE ${CUSPARSE_DEFAULT} "Whether to enable CUSPARSE" DEFAULT_DOCSTRING "ON if CUDA-enabled Kokkos, otherwise OFF") KOKKOSKERNELS_ADD_OPTION(NO_DEFAULT_ROCM_TPLS OFF BOOL "Whether ROCM TPLs should be enabled by default. Default: OFF") # Unlike CUDA, ROCm does not automatically install these TPLs SET(ROCBLAS_DEFAULT OFF) +SET(ROCSOLVER_DEFAULT OFF) SET(ROCSPARSE_DEFAULT OFF) # Since the default is OFF we do not really need this piece of logic here. # IF(KOKKOSKERNELS_NO_DEFAULT_ROCM_TPLS) # SET(ROCBLAS_DEFAULT OFF) +# SET(ROCSOLVER_DEFAULT OFF) # SET(ROCSPARSE_DEFAULT OFF) # ENDIF() KOKKOSKERNELS_ADD_TPL_OPTION(ROCBLAS ${ROCBLAS_DEFAULT} "Whether to enable ROCBLAS" DEFAULT_DOCSTRING "ON if HIP-enabled Kokkos, otherwise OFF") +KOKKOSKERNELS_ADD_TPL_OPTION(ROCSOLVER ${ROCSOLVER_DEFAULT} "Whether to enable ROCSOLVER" + DEFAULT_DOCSTRING "ON if HIP-enabled Kokkos, otherwise OFF") KOKKOSKERNELS_ADD_TPL_OPTION(ROCSPARSE ${ROCSPARSE_DEFAULT} "Whether to enable ROCSPARSE" DEFAULT_DOCSTRING "ON if HIP-enabled Kokkos, otherwise OFF") @@ -508,6 +516,7 @@ IF (NOT KOKKOSKERNELS_HAS_TRILINOS) KOKKOSKERNELS_IMPORT_TPL(LAPACK) KOKKOSKERNELS_IMPORT_TPL(MKL) KOKKOSKERNELS_IMPORT_TPL(CUBLAS) + KOKKOSKERNELS_IMPORT_TPL(CUSOLVER) KOKKOSKERNELS_IMPORT_TPL(CUSPARSE) KOKKOSKERNELS_IMPORT_TPL(CBLAS) KOKKOSKERNELS_IMPORT_TPL(LAPACKE) @@ -517,6 +526,7 @@ IF (NOT KOKKOSKERNELS_HAS_TRILINOS) KOKKOSKERNELS_IMPORT_TPL(ARMPL) KOKKOSKERNELS_IMPORT_TPL(MAGMA) KOKKOSKERNELS_IMPORT_TPL(ROCBLAS) + KOKKOSKERNELS_IMPORT_TPL(ROCSOLVER) KOKKOSKERNELS_IMPORT_TPL(ROCSPARSE) ELSE () IF (Trilinos_ENABLE_SuperLU5_API) diff --git a/example/half/xpy.cpp b/example/half/xpy.cpp index 238fdef187..92c422cfe8 100644 --- a/example/half/xpy.cpp +++ b/example/half/xpy.cpp @@ -109,4 +109,4 @@ int main(int argc, char **argv) { do_xpy(n, time_only); Kokkos::finalize(); return 0; -} \ No newline at end of file +} diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index 0f38d0aa50..8ab784a325 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -8,11 +8,11 @@ LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/lapack/tpls) KOKKOSKERNELS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/lapack) KOKKOSKERNELS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}/lapack) -####################### -# # +######################### +# # # Logic for LAPACK TPLs # -# # -####################### +# # +######################### #Include LAPACK, Lapack host wrapper IF (KOKKOSKERNELS_ENABLE_TPL_LAPACK OR KOKKOSKERNELS_ENABLE_TPL_MKL OR KOKKOSKERNELS_ENABLE_TPL_ARMPL) @@ -28,14 +28,14 @@ IF (KOKKOSKERNELS_ENABLE_TPL_LAPACK OR KOKKOSKERNELS_ENABLE_TPL_MKL OR KOKKOSKER ENDIF() # Include cuda lapack TPL source file -IF (KOKKOSKERNELS_ENABLE_TPL_CULAPACK) +IF (KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) LIST(APPEND SOURCES lapack/tpls/KokkosLapack_Cuda_tpl.cpp ) ENDIF() # Include rocm lapack TPL source file -IF (KOKKOSKERNELS_ENABLE_TPL_ROCLAPACK) +IF (KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER) LIST(APPEND SOURCES lapack/tpls/KokkosLapack_Rocm_tpl.cpp ) From b06bf0f370ab88276807f245c33bb7cb87c7d3d8 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Tue, 10 Oct 2023 13:22:48 -0600 Subject: [PATCH 10/22] Backup --- lapack/tpls/KokkosLapack_Cuda_tpl.cpp | 18 ++ lapack/tpls/KokkosLapack_Cuda_tpl.hpp | 64 +++++++ lapack/tpls/KokkosLapack_tpl_spec.hpp | 165 +++++++++--------- .../KokkosSparse_spmv_mv_tpl_spec_decl.hpp | 1 + 4 files changed, 166 insertions(+), 82 deletions(-) create mode 100644 lapack/tpls/KokkosLapack_Cuda_tpl.cpp create mode 100644 lapack/tpls/KokkosLapack_Cuda_tpl.hpp diff --git a/lapack/tpls/KokkosLapack_Cuda_tpl.cpp b/lapack/tpls/KokkosLapack_Cuda_tpl.cpp new file mode 100644 index 0000000000..2ac28871a4 --- /dev/null +++ b/lapack/tpls/KokkosLapack_Cuda_tpl.cpp @@ -0,0 +1,18 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#include +#include +#include diff --git a/lapack/tpls/KokkosLapack_Cuda_tpl.hpp b/lapack/tpls/KokkosLapack_Cuda_tpl.hpp new file mode 100644 index 0000000000..b59d6d99c8 --- /dev/null +++ b/lapack/tpls/KokkosLapack_Cuda_tpl.hpp @@ -0,0 +1,64 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSLAPACK_CUDA_TPL_HPP_ +#define KOKKOSLAPACK_CUDA_TPL_HPP_ + +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) +#include + +namespace KokkosLapack { +namespace Impl { + +CudaLapackSingleton::CudaLapackSingleton() { + cusolverStatus_t stat = cusolverDnCreate(&handle); + if (stat != CUSOLVER_STATUS_SUCCESS) + Kokkos::abort("CUSOLVER initialization failed\n"); + + Kokkos::push_finalize_hook([&]() { cusolverDnDestroy(handle); }); +} + +CudaLapackSingleton& CudaLapackSingleton::singleton() { + static CudaLapackSingleton s; + return s; +} + +} // namespace Impl +} // namespace KokkosLapack +#endif // defined (KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) + +#if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) +#include + +namespace KokkosLapack { +namespace Impl { + +MagmaSingleton::MagmaSingleton() { + magma_int_t stat = magma_init(); + if (stat != MAGMA_SUCCESS) Kokkos::abort("MAGMA initialization failed\n"); + + Kokkos::push_finalize_hook([&]() { magma_finalize(); }); +} + +MagmaSingleton& MagmaSingleton::singleton() { + static MagmaSingleton s; + return s; +} + +} // namespace Impl +} // namespace KokkosLapack +#endif // defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) + +#endif // KOKKOSLAPACK_CUDA_TPL_HPP_ diff --git a/lapack/tpls/KokkosLapack_tpl_spec.hpp b/lapack/tpls/KokkosLapack_tpl_spec.hpp index a20c5d9a92..7bfffc780f 100644 --- a/lapack/tpls/KokkosLapack_tpl_spec.hpp +++ b/lapack/tpls/KokkosLapack_tpl_spec.hpp @@ -17,56 +17,57 @@ #ifndef KOKKOSLAPACK_TPL_SPEC_HPP_ #define KOKKOSLAPACK_TPL_SPEC_HPP_ -#ifdef KOKKOSKERNELS_ENABLE_TPL_CULAPACK +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER #include "cuda_runtime.h" -#include "culapack_v2.h" +//#include "cublas_v2.h" +#include "cusolverDn.h" namespace KokkosLapack { namespace Impl { struct CudaLapackSingleton { - culapackHandle_t handle; + cusolverDnHandle_t handle; CudaLapackSingleton(); static CudaLapackSingleton& singleton(); }; -inline void culapack_internal_error_throw(culapackStatus_t culapackState, +inline void cusolver_internal_error_throw(cusolverStatus_t cusolverState, const char* name, const char* file, const int line) { std::ostringstream out; - // out << name << " error( " << culapackGetStatusName(culapackState) - // << "): " << culapackGetStatusString(culapackState); + // out << name << " error( " << cusolverGetStatusName(cusolverState) + // << "): " << cusolverGetStatusString(cusolverState); out << name << " error( "; - switch (culapackState) { - case CULAPACK_STATUS_NOT_INITIALIZED: - out << "CULAPACK_STATUS_NOT_INITIALIZED): the library was not initialized."; + switch (cusolverState) { + case CUSOLVER_STATUS_NOT_INITIALIZED: + out << "CUSOLVER_STATUS_NOT_INITIALIZED): the library was not initialized."; break; - case CULAPACK_STATUS_ALLOC_FAILED: - out << "CULAPACK_STATUS_ALLOC_FAILED): the resource allocation failed."; + case CUSOLVER_STATUS_ALLOC_FAILED: + out << "CUSOLVER_STATUS_ALLOC_FAILED): the resource allocation failed."; break; - case CULAPACK_STATUS_INVALID_VALUE: - out << "CULAPACK_STATUS_INVALID_VALUE): an invalid numerical value was " + case CUSOLVER_STATUS_INVALID_VALUE: + out << "CUSOLVER_STATUS_INVALID_VALUE): an invalid numerical value was " "used as an argument."; break; - case CULAPACK_STATUS_ARCH_MISMATCH: - out << "CULAPACK_STATUS_ARCH_MISMATCH): an absent device architectural " + case CUSOLVER_STATUS_ARCH_MISMATCH: + out << "CUSOLVER_STATUS_ARCH_MISMATCH): an absent device architectural " "feature is required."; break; - case CULAPACK_STATUS_MAPPING_ERROR: - out << "CULAPACK_STATUS_MAPPING_ERROR): an access to GPU memory space " + case CUSOLVER_STATUS_MAPPING_ERROR: + out << "CUSOLVER_STATUS_MAPPING_ERROR): an access to GPU memory space " "failed."; break; - case CULAPACK_STATUS_EXECUTION_FAILED: - out << "CULAPACK_STATUS_EXECUTION_FAILED): the GPU program failed to " + case CUSOLVER_STATUS_EXECUTION_FAILED: + out << "CUSOLVER_STATUS_EXECUTION_FAILED): the GPU program failed to " "execute."; break; - case CULAPACK_STATUS_INTERNAL_ERROR: - out << "CULAPACK_STATUS_INTERNAL_ERROR): an internal operation failed."; + case CUSOLVER_STATUS_INTERNAL_ERROR: + out << "CUSOLVER_STATUS_INTERNAL_ERROR): an internal operation failed."; break; - case CULAPACK_STATUS_NOT_SUPPORTED: - out << "CULAPACK_STATUS_NOT_SUPPORTED): the feature required is not " + case CUSOLVER_STATUS_NOT_SUPPORTED: + out << "CUSOLVER_STATUS_NOT_SUPPORTED): the feature required is not " "supported."; break; default: out << "unrecognized error code): this is bad!"; break; @@ -77,101 +78,101 @@ inline void culapack_internal_error_throw(culapackStatus_t culapackState, throw std::runtime_error(out.str()); } -inline void culapack_internal_safe_call(culapackStatus_t culapackState, +inline void cusolver_internal_safe_call(cusolverStatus_t cusolverState, const char* name, const char* file = nullptr, const int line = 0) { - if (CULAPACK_STATUS_SUCCESS != culapackState) { - culapack_internal_error_throw(culapackState, name, file, line); + if (CUSOLVER_STATUS_SUCCESS != cusolverState) { + cusolver_internal_error_throw(cusolverState, name, file, line); } } -// The macro below defines the interface for the safe culapack calls. +// The macro below defines the interface for the safe cusolver calls. // The functions themselves are protected by impl namespace and this // is not meant to be used by external application or libraries. -#define KOKKOS_CULAPACK_SAFE_CALL_IMPL(call) \ - KokkosLapack::Impl::culapack_internal_safe_call(call, #call, __FILE__, __LINE__) +#define KOKKOS_CUSOLVER_SAFE_CALL_IMPL(call) \ + KokkosLapack::Impl::cusolver_internal_safe_call(call, #call, __FILE__, __LINE__) -/// \brief This function converts KK transpose mode to cuLAPACK transpose mode -inline culapackOperation_t trans_mode_kk_to_culapack(const char kkMode[]) { - culapackOperation_t trans; +/// \brief This function converts KK transpose mode to cusolver transpose mode +inline cublasOperation_t trans_mode_kk_to_cusolver(const char kkMode[]) { + cublasOperation_t trans; if ((kkMode[0] == 'N') || (kkMode[0] == 'n')) - trans = CULAPACK_OP_N; + trans = CUBLAS_OP_N; else if ((kkMode[0] == 'T') || (kkMode[0] == 't')) - trans = CULAPACK_OP_T; + trans = CUBLAS_OP_T; else - trans = CULAPACK_OP_C; + trans = CUBLAS_OP_C; return trans; } } // namespace Impl } // namespace KokkosLapack -#endif // KOKKOSKERNELS_ENABLE_TPL_CULAPACK +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER -#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCLAPACK -#include +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER +#include namespace KokkosLapack { namespace Impl { -struct RocLapackSingleton { - roclapack_handle handle; +struct RocsolverSingleton { + rocsolver_handle handle; - RocLapackSingleton(); + RocsolverSingleton(); - static RocLapackSingleton& singleton(); + static RocsolverSingleton& singleton(); }; -inline void roclapack_internal_error_throw(roclapack_status roclapackState, +inline void rocsolver_internal_error_throw(rocsolver_status rocsolverState, const char* name, const char* file, const int line) { std::ostringstream out; out << name << " error( "; - switch (roclapackState) { - case roclapack_status_invalid_handle: - out << "roclapack_status_invalid_handle): handle not initialized, invalid " + switch (rocsolverState) { + case rocsolver_status_invalid_handle: + out << "rocsolver_status_invalid_handle): handle not initialized, invalid " "or null."; break; - case roclapack_status_not_implemented: - out << "roclapack_status_not_implemented): function is not implemented."; + case rocsolver_status_not_implemented: + out << "rocsolver_status_not_implemented): function is not implemented."; break; - case roclapack_status_invalid_pointer: - out << "roclapack_status_invalid_pointer): invalid pointer argument."; + case rocsolver_status_invalid_pointer: + out << "rocsolver_status_invalid_pointer): invalid pointer argument."; break; - case roclapack_status_invalid_size: - out << "roclapack_status_invalid_size): invalid size argument."; + case rocsolver_status_invalid_size: + out << "rocsolver_status_invalid_size): invalid size argument."; break; - case roclapack_status_memory_error: - out << "roclapack_status_memory_error): failed internal memory allocation, " + case rocsolver_status_memory_error: + out << "rocsolver_status_memory_error): failed internal memory allocation, " "copy or dealloc."; break; - case roclapack_status_internal_error: - out << "roclapack_status_internal_error): other internal library failure."; + case rocsolver_status_internal_error: + out << "rocsolver_status_internal_error): other internal library failure."; break; - case roclapack_status_perf_degraded: - out << "roclapack_status_perf_degraded): performance degraded due to low " + case rocsolver_status_perf_degraded: + out << "rocsolver_status_perf_degraded): performance degraded due to low " "device memory."; break; - case roclapack_status_size_query_mismatch: + case rocsolver_status_size_query_mismatch: out << "unmatched start/stop size query): ."; break; - case roclapack_status_size_increased: - out << "roclapack_status_size_increased): queried device memory size " + case rocsolver_status_size_increased: + out << "rocsolver_status_size_increased): queried device memory size " "increased."; break; - case roclapack_status_size_unchanged: - out << "roclapack_status_size_unchanged): queried device memory size " + case rocsolver_status_size_unchanged: + out << "rocsolver_status_size_unchanged): queried device memory size " "unchanged."; break; - case roclapack_status_invalid_value: - out << "roclapack_status_invalid_value): passed argument not valid."; + case rocsolver_status_invalid_value: + out << "rocsolver_status_invalid_value): passed argument not valid."; break; - case roclapack_status_continue: - out << "roclapack_status_continue): nothing preventing function to " + case rocsolver_status_continue: + out << "rocsolver_status_continue): nothing preventing function to " "proceed."; break; - case roclapack_status_check_numerics_fail: - out << "roclapack_status_check_numerics_fail): will be set if the " + case rocsolver_status_check_numerics_fail: + out << "rocsolver_status_check_numerics_fail): will be set if the " "vector/matrix has a NaN or an Infinity."; break; default: out << "unrecognized error code): this is bad!"; break; @@ -182,37 +183,37 @@ inline void roclapack_internal_error_throw(roclapack_status roclapackState, throw std::runtime_error(out.str()); } -inline void roclapack_internal_safe_call(roclapack_status roclapackState, +inline void rocsolver_internal_safe_call(rocsolver_status rocsolverState, const char* name, const char* file = nullptr, const int line = 0) { - if (roclapack_status_success != roclapackState) { - roclapack_internal_error_throw(roclapackState, name, file, line); + if (rocsolver_status_success != rocsolverState) { + rocsolver_internal_error_throw(rocsolverState, name, file, line); } } -// The macro below defines the interface for the safe roclapack calls. +// The macro below defines the interface for the safe rocsolver calls. // The functions themselves are protected by impl namespace and this // is not meant to be used by external application or libraries. -#define KOKKOS_ROCLAPACK_SAFE_CALL_IMPL(call) \ - KokkosLapack::Impl::roclapack_internal_safe_call(call, #call, __FILE__, __LINE__) +#define KOKKOS_ROCSOLVER_SAFE_CALL_IMPL(call) \ + KokkosLapack::Impl::rocsolver_internal_safe_call(call, #call, __FILE__, __LINE__) -/// \brief This function converts KK transpose mode to rocLAPACK transpose mode -inline roclapack_operation trans_mode_kk_to_roclapack(const char kkMode[]) { - roclapack_operation trans; +/// \brief This function converts KK transpose mode to rocsolver transpose mode +inline rocsolver_operation trans_mode_kk_to_rocsolver(const char kkMode[]) { + rocsolver_operation trans; if ((kkMode[0] == 'N') || (kkMode[0] == 'n')) - trans = roclapack_operation_none; + trans = rocsolver_operation_none; else if ((kkMode[0] == 'T') || (kkMode[0] == 't')) - trans = roclapack_operation_transpose; + trans = rocsolver_operation_transpose; else - trans = roclapack_operation_conjugate_transpose; + trans = rocsolver_operation_conjugate_transpose; return trans; } } // namespace Impl } // namespace KokkosLapack -#endif // KOKKOSKERNELS_ENABLE_TPL_ROCLAPACK +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER // If LAPACK TPL is enabled, it is preferred over magma's LAPACK #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA diff --git a/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp index f28e04e26b..157e21dca7 100644 --- a/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp @@ -20,6 +20,7 @@ #include #include "KokkosKernels_Controls.hpp" +#include #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE From ca4a9431015b2df307419bddcb460a859e04a5e2 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Sat, 14 Oct 2023 14:04:02 -0600 Subject: [PATCH 11/22] Backup --- batched/KokkosBatched_Util.hpp | 1 - batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp | 1 - 2 files changed, 2 deletions(-) diff --git a/batched/KokkosBatched_Util.hpp b/batched/KokkosBatched_Util.hpp index 04e48f8c92..9078281e59 100644 --- a/batched/KokkosBatched_Util.hpp +++ b/batched/KokkosBatched_Util.hpp @@ -21,7 +21,6 @@ // no experimental name space guard for trilinos #define __KOKKOSBATCHED_PROMOTION__ 1 -#include #include #include #include diff --git a/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp b/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp index fd63180cf7..35821318b6 100644 --- a/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp +++ b/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp @@ -16,7 +16,6 @@ #ifndef __KOKKOSBATCHED_HOSTLEVEL_GEMM_SERIAL_IMPL_HPP__ #define __KOKKOSBATCHED_HOSTLEVEL_GEMM_SERIAL_IMPL_HPP__ #include "KokkosBatched_Gemm_Decl.hpp" -#include namespace KokkosBatched { namespace Impl { From db8d1c0351db9065304898632ae67cdab69f69ef Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Sat, 14 Oct 2023 14:17:13 -0600 Subject: [PATCH 12/22] Backup --- sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp index 157e21dca7..f28e04e26b 100644 --- a/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp @@ -20,7 +20,6 @@ #include #include "KokkosKernels_Controls.hpp" -#include #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE From 6d582b62d9b136cb91d0065ed44642617c79c3cc Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Tue, 24 Oct 2023 22:13:00 -0600 Subject: [PATCH 13/22] Some cleanup on current pull request, making it more related to 'just' the creation of the lapack subdirectory and the moving of some files to there --- .github/workflows/osx.yml | 1 - CMakeLists.txt | 18 +----------------- cm_generate_makefile.bash | 16 +--------------- cmake/KokkosKernels_config.h.in | 6 ------ cmake/Modules/FindTPLCUSOLVER.cmake | 18 ------------------ cmake/Modules/FindTPLROCSOLVER.cmake | 12 ------------ cmake/kokkoskernels_tpls.cmake | 11 ----------- 7 files changed, 2 insertions(+), 80 deletions(-) delete mode 100644 cmake/Modules/FindTPLCUSOLVER.cmake delete mode 100644 cmake/Modules/FindTPLROCSOLVER.cmake diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index 10688ddb70..8d9f7123f8 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -103,7 +103,6 @@ jobs: -DKokkosKernels_INST_OFFSET_SIZE_T=ON \ -DKokkosKernels_ENABLE_TPL_CUSPARSE=OFF \ -DKokkosKernels_ENABLE_TPL_CUBLAS=OFF \ - -DKokkosKernels_ENABLE_TPL_CUSOLVER=OFF \ .. - name: build_kokkos_kernels diff --git a/CMakeLists.txt b/CMakeLists.txt index 85ce79d9ab..812640374b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -193,7 +193,7 @@ ELSE() "ALL" STRING "A list of components to enable in testing and building" - VALID_ENTRIES BATCHED BLAS LAPACK CUSOLVER ROCSOLVER GRAPH SPARSE ALL + VALID_ENTRIES BATCHED BLAS LAPACK GRAPH SPARSE ALL ) # ================================================================== @@ -245,8 +245,6 @@ ELSE() MESSAGE(" BATCHED: ${KokkosKernels_ENABLE_COMPONENT_BATCHED}") MESSAGE(" BLAS: ${KokkosKernels_ENABLE_COMPONENT_BLAS}") MESSAGE(" LAPACK: ${KokkosKernels_ENABLE_COMPONENT_LAPACK}") - MESSAGE(" CUSOLVER: ${KokkosKernels_ENABLE_COMPONENT_CUSOLVER}") - MESSAGE(" ROCSOLVER: ${KokkosKernels_ENABLE_COMPONENT_ROCSOLVER}") MESSAGE(" GRAPH: ${KokkosKernels_ENABLE_COMPONENT_GRAPH}") MESSAGE(" SPARSE: ${KokkosKernels_ENABLE_COMPONENT_SPARSE}") MESSAGE(" ODE: ${KokkosKernels_ENABLE_COMPONENT_ODE}") @@ -294,12 +292,6 @@ ELSE() IF (KokkosKernels_ENABLE_COMPONENT_LAPACK) INCLUDE(lapack/CMakeLists.txt) ENDIF() - IF (KokkosKernels_ENABLE_COMPONENT_CUSOLVER) - INCLUDE(lapack/CMakeLists.txt) - ENDIF() - IF (KokkosKernels_ENABLE_COMPONENT_ROCSOLVER) - INCLUDE(lapack/CMakeLists.txt) - ENDIF() IF (KokkosKernels_ENABLE_COMPONENT_GRAPH) INCLUDE(graph/CMakeLists.txt) ENDIF() @@ -382,10 +374,8 @@ ELSE() KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CHOLMOD) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC MKL) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CUBLAS) - KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CUSOLVER) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CUSPARSE) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC ROCBLAS) - KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC ROCSOLVER) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC ROCSPARSE) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC METIS) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC ARMPL) @@ -423,12 +413,6 @@ ELSE() IF (KokkosKernels_ENABLE_COMPONENT_LAPACK) KOKKOSKERNELS_ADD_TEST_DIRECTORIES(lapack/unit_test) ENDIF() - IF (KokkosKernels_ENABLE_COMPONENT_CUSOLVER) - KOKKOSKERNELS_ADD_TEST_DIRECTORIES(lapack/unit_test) - ENDIF() - IF (KokkosKernels_ENABLE_COMPONENT_ROCSOLVER) - KOKKOSKERNELS_ADD_TEST_DIRECTORIES(lapack/unit_test) - ENDIF() IF (KokkosKernels_ENABLE_COMPONENT_GRAPH) KOKKOSKERNELS_ADD_TEST_DIRECTORIES(graph/unit_test) ENDIF() diff --git a/cm_generate_makefile.bash b/cm_generate_makefile.bash index 4347eb4b3b..3358ae2eb8 100755 --- a/cm_generate_makefile.bash +++ b/cm_generate_makefile.bash @@ -177,10 +177,8 @@ get_kernels_tpls_list() { KOKKOSKERNELS_USER_TPL_PATH_CMD= KOKKOSKERNELS_USER_TPL_LIBNAME_CMD= CUBLAS_DEFAULT=OFF - CUSOLVER_DEFAULT=OFF CUSPARSE_DEFAULT=OFF ROCBLAS_DEFAULT=OFF - ROCSOLVER_DEFAULT=OFF ROCSPARSE_DEFAULT=OFF PARSE_TPLS_LIST=$(echo $KOKKOSKERNELS_TPLS | tr "," "\n") for TPLS_ in $PARSE_TPLS_LIST @@ -190,18 +188,12 @@ get_kernels_tpls_list() { if [ "$UC_TPLS" == "CUBLAS" ]; then CUBLAS_DEFAULT=ON fi - if [ "$UC_TPLS" == "CUSOLVER" ]; then - CUSOLVER_DEFAULT=ON - fi if [ "$UC_TPLS" == "CUSPARSE" ]; then CUSPARSE_DEFAULT=ON fi if [ "$UC_TPLS" == "ROCBLAS" ]; then ROCBLAS_DEFAULT=ON fi - if [ "$UC_TPLS" == "ROCSOLVER" ]; then - ROCSOLVER_DEFAULT=ON - fi if [ "$UC_TPLS" == "ROCSPARSE" ]; then ROCSPARSE_DEFAULT=ON fi @@ -229,18 +221,12 @@ get_kernels_tpls_list() { if [ "$CUBLAS_DEFAULT" == "OFF" ]; then KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_CUBLAS=OFF ${KOKKOSKERNELS_TPLS_CMD}" fi - if [ "$CUSOLVER_DEFAULT" == "OFF" ]; then - KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_CUSOLVER=OFF ${KOKKOSKERNELS_TPLS_CMD}" - fi if [ "$CUSPARSE_DEFAULT" == "OFF" ]; then KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_CUSPARSE=OFF ${KOKKOSKERNELS_TPLS_CMD}" fi if [ "$ROCBLAS_DEFAULT" == "OFF" ]; then KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_ROCBLAS=OFF ${KOKKOSKERNELS_TPLS_CMD}" fi - if [ "$ROCSOLVER_DEFAULT" == "OFF" ]; then - KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_ROCSOLVER=OFF ${KOKKOSKERNELS_TPLS_CMD}" - fi if [ "$ROCSPARSE_DEFAULT" == "OFF" ]; then KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_ROCSPARSE=OFF ${KOKKOSKERNELS_TPLS_CMD}" fi @@ -359,7 +345,7 @@ display_help_text() { echo "--with-tpls=[TPLS]: Set tpls to be instantiated (Proper support requies that appropriate compiler and device must be enabled)." echo " This may require providing paths and the library name if using custom installs not on a default path" echo " that CMake searches" - echo " Options: blas, mkl, cublas, cusolver, cusparse, magma, armpl, rocblas, rocsolver, rocsparse" + echo " Options: blas, mkl, cublas, cusparse, magma, armpl, rocblas, rocsparse" echo "--user-blas-path=[PATH]: Set path to location of user-specified BLAS library." echo "--user-blas-lib=[LIB]: Library name of desired BLAS install." echo " Example: For the typical \"libblas.a\" provide \"blas\"" diff --git a/cmake/KokkosKernels_config.h.in b/cmake/KokkosKernels_config.h.in index bf063e7b63..b8b66fffbb 100644 --- a/cmake/KokkosKernels_config.h.in +++ b/cmake/KokkosKernels_config.h.in @@ -109,16 +109,12 @@ /* BLAS library */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_BLAS -/* LAPACK library */ -#cmakedefine KOKKOSKERNELS_ENABLE_TPL_LAPACK /* MKL library */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_MKL /* CUSPARSE */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_CUSPARSE /* CUBLAS */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_CUBLAS -/* CUSOLVER */ -#cmakedefine KOKKOSKERNELS_ENABLE_TPL_CUSOLVER /* MAGMA */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_MAGMA /* SuperLU */ @@ -137,8 +133,6 @@ #cmakedefine ARMPL_BUILD @ARMPL_BUILD@ /* ROCBLAS */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_ROCBLAS -/* ROCSOLVER */ -#cmakedefine KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER /* ROCSPARSE */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE diff --git a/cmake/Modules/FindTPLCUSOLVER.cmake b/cmake/Modules/FindTPLCUSOLVER.cmake deleted file mode 100644 index e10d46e58c..0000000000 --- a/cmake/Modules/FindTPLCUSOLVER.cmake +++ /dev/null @@ -1,18 +0,0 @@ -FIND_PACKAGE(CUDA) - -INCLUDE(FindPackageHandleStandardArgs) -IF (NOT CUDA_FOUND) - #Important note here: this find Module is named TPLCUSOLVER - #The eventual target is named CUSOLVER. To avoid naming conflicts - #the find module is called TPLCUSOLVER. This call will cause - #the find_package call to fail in a "standard" CMake way - FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUSOLVER REQUIRED_VARS CUDA_FOUND) -ELSE() - #The libraries might be empty - OR they might explicitly be not found - IF("${CUDA_CUSOLVER_LIBRARIES}" MATCHES "NOTFOUND") - FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUSOLVER REQUIRED_VARS CUDA_CUSOLVER_LIBRARIES) - ELSE() - KOKKOSKERNELS_CREATE_IMPORTED_TPL(CUSOLVER INTERFACE - LINK_LIBRARIES "${CUDA_CUSOLVER_LIBRARIES}") - ENDIF() -ENDIF() diff --git a/cmake/Modules/FindTPLROCSOLVER.cmake b/cmake/Modules/FindTPLROCSOLVER.cmake deleted file mode 100644 index c4389f7bae..0000000000 --- a/cmake/Modules/FindTPLROCSOLVER.cmake +++ /dev/null @@ -1,12 +0,0 @@ -FIND_PACKAGE(ROCSOLVER) -if(TARGET roc::rocsolver) -## MPL: 12/29/2022: Variable TPL_ROCSOLVER_IMPORTED_NAME follows the requested convention -## of KokkosKernel (method kokkoskernels_import_tpl of kokkoskernels_tpls.cmake) - SET(TPL_ROCSOLVER_IMPORTED_NAME roc::rocsolver) - SET(TPL_IMPORTED_NAME roc::rocsolver) -## MPL: 12/29/2022: A target comming from a TPL must follows the requested convention -## of KokkosKernel (method kokkoskernels_link_tpl of kokkoskernels_tpls.cmake) - ADD_LIBRARY(KokkosKernels::ROCSOLVER ALIAS roc::rocsolver) -ELSE() - MESSAGE(FATAL_ERROR "Package ROCSOLVER requested but not found") -ENDIF() diff --git a/cmake/kokkoskernels_tpls.cmake b/cmake/kokkoskernels_tpls.cmake index 9584e028cd..be1488e051 100644 --- a/cmake/kokkoskernels_tpls.cmake +++ b/cmake/kokkoskernels_tpls.cmake @@ -456,39 +456,30 @@ ENDIF() KOKKOSKERNELS_ADD_OPTION(NO_DEFAULT_CUDA_TPLS OFF BOOL "Whether CUDA TPLs should be enabled by default. Default: OFF") SET(CUBLAS_DEFAULT ${KOKKOS_ENABLE_CUDA}) -SET(CUSOLVER_DEFAULT ${KOKKOS_ENABLE_CUDA}) SET(CUSPARSE_DEFAULT ${KOKKOS_ENABLE_CUDA}) IF(KOKKOSKERNELS_NO_DEFAULT_CUDA_TPLS) SET(CUBLAS_DEFAULT OFF) - SET(CUSOLVER_DEFAULT OFF) SET(CUSPARSE_DEFAULT OFF) ENDIF() KOKKOSKERNELS_ADD_TPL_OPTION(CUBLAS ${CUBLAS_DEFAULT} "Whether to enable CUBLAS" DEFAULT_DOCSTRING "ON if CUDA-enabled Kokkos, otherwise OFF") -KOKKOSKERNELS_ADD_TPL_OPTION(CUSOLVER ${CUSOLVER_DEFAULT} "Whether to enable CUSOLVER" - DEFAULT_DOCSTRING "ON if CUDA-enabled Kokkos, otherwise OFF") KOKKOSKERNELS_ADD_TPL_OPTION(CUSPARSE ${CUSPARSE_DEFAULT} "Whether to enable CUSPARSE" DEFAULT_DOCSTRING "ON if CUDA-enabled Kokkos, otherwise OFF") KOKKOSKERNELS_ADD_OPTION(NO_DEFAULT_ROCM_TPLS OFF BOOL "Whether ROCM TPLs should be enabled by default. Default: OFF") # Unlike CUDA, ROCm does not automatically install these TPLs SET(ROCBLAS_DEFAULT OFF) -SET(ROCSOLVER_DEFAULT OFF) SET(ROCSPARSE_DEFAULT OFF) # Since the default is OFF we do not really need this piece of logic here. # IF(KOKKOSKERNELS_NO_DEFAULT_ROCM_TPLS) # SET(ROCBLAS_DEFAULT OFF) -# SET(ROCSOLVER_DEFAULT OFF) # SET(ROCSPARSE_DEFAULT OFF) # ENDIF() KOKKOSKERNELS_ADD_TPL_OPTION(ROCBLAS ${ROCBLAS_DEFAULT} "Whether to enable ROCBLAS" DEFAULT_DOCSTRING "ON if HIP-enabled Kokkos, otherwise OFF") -KOKKOSKERNELS_ADD_TPL_OPTION(ROCSOLVER ${ROCSOLVER_DEFAULT} "Whether to enable ROCSOLVER" - DEFAULT_DOCSTRING "ON if HIP-enabled Kokkos, otherwise OFF") KOKKOSKERNELS_ADD_TPL_OPTION(ROCSPARSE ${ROCSPARSE_DEFAULT} "Whether to enable ROCSPARSE" DEFAULT_DOCSTRING "ON if HIP-enabled Kokkos, otherwise OFF") -#AquiEEP IF (KOKKOSKERNELS_ENABLE_TPL_MAGMA) IF (F77_BLAS_MANGLE STREQUAL "(name,NAME) name ## _") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DADD_ -fopenmp -lgfortran") @@ -516,7 +507,6 @@ IF (NOT KOKKOSKERNELS_HAS_TRILINOS) KOKKOSKERNELS_IMPORT_TPL(LAPACK) KOKKOSKERNELS_IMPORT_TPL(MKL) KOKKOSKERNELS_IMPORT_TPL(CUBLAS) - KOKKOSKERNELS_IMPORT_TPL(CUSOLVER) KOKKOSKERNELS_IMPORT_TPL(CUSPARSE) KOKKOSKERNELS_IMPORT_TPL(CBLAS) KOKKOSKERNELS_IMPORT_TPL(LAPACKE) @@ -526,7 +516,6 @@ IF (NOT KOKKOSKERNELS_HAS_TRILINOS) KOKKOSKERNELS_IMPORT_TPL(ARMPL) KOKKOSKERNELS_IMPORT_TPL(MAGMA) KOKKOSKERNELS_IMPORT_TPL(ROCBLAS) - KOKKOSKERNELS_IMPORT_TPL(ROCSOLVER) KOKKOSKERNELS_IMPORT_TPL(ROCSPARSE) ELSE () IF (Trilinos_ENABLE_SuperLU5_API) From e8557be2780b221dee7757b2d14753aa587d1f4f Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Tue, 24 Oct 2023 22:16:05 -0600 Subject: [PATCH 14/22] More cleanup --- batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp | 2 +- example/half/xpy.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp b/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp index 35821318b6..5ff581bb64 100644 --- a/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp +++ b/batched/dense/impl/KokkosBatched_HostLevel_Gemm_Serial_Impl.hpp @@ -181,4 +181,4 @@ class BatchedSerialGemm { }; } // namespace Impl } // namespace KokkosBatched -#endif +#endif \ No newline at end of file diff --git a/example/half/xpy.cpp b/example/half/xpy.cpp index 92c422cfe8..238fdef187 100644 --- a/example/half/xpy.cpp +++ b/example/half/xpy.cpp @@ -109,4 +109,4 @@ int main(int argc, char **argv) { do_xpy(n, time_only); Kokkos::finalize(); return 0; -} +} \ No newline at end of file From 7c9ed9e6d3034c0d3e1143b97686cd17f5255e67 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Tue, 24 Oct 2023 22:49:07 -0600 Subject: [PATCH 15/22] Re-enabling gesv unit tests under the lapack subdirectory --- cmake/KokkosKernels_config.h.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/KokkosKernels_config.h.in b/cmake/KokkosKernels_config.h.in index b8b66fffbb..4c54a350b3 100644 --- a/cmake/KokkosKernels_config.h.in +++ b/cmake/KokkosKernels_config.h.in @@ -109,6 +109,8 @@ /* BLAS library */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_BLAS +/* LAPACKE */ +#cmakedefine KOKKOSKERNELS_ENABLE_TPL_LAPACK /* MKL library */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_MKL /* CUSPARSE */ From 6ac5ba3597b7a76c0343360848016f06efaf6fb3 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Tue, 24 Oct 2023 23:32:18 -0600 Subject: [PATCH 16/22] Adding BLAS routines back, for backwards compatibility --- blas/src/KokkosBlas_gesv.hpp | 55 +++++++++++++++++++++++++++++++++++ blas/src/KokkosBlas_trtri.hpp | 52 +++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 blas/src/KokkosBlas_gesv.hpp create mode 100644 blas/src/KokkosBlas_trtri.hpp diff --git a/blas/src/KokkosBlas_gesv.hpp b/blas/src/KokkosBlas_gesv.hpp new file mode 100644 index 0000000000..5e224c07e4 --- /dev/null +++ b/blas/src/KokkosBlas_gesv.hpp @@ -0,0 +1,55 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/// \file KokkosBlas_gesv.hpp +/// \brief Local dense linear solve +/// +/// This file provides KokkosBlas::gesv. This function performs a +/// local (no MPI) dense linear solve on a system of linear equations +/// A * X = B where A is a general N-by-N matrix and X and B are N-by-NRHS +/// matrices. + +#ifndef KOKKOSBLAS_GESV_HPP_ +#define KOKKOSBLAS_GESV_HPP_ + +#include "KokkosLapack_gesv.hpp" + +namespace KokkosBlas { + +/// \brief Solve the dense linear equation system A*X = B. +/// +/// \tparam AMatrix Input matrix/Output LU, as a 2-D Kokkos::View. +/// \tparam BXMV Input (right-hand side)/Output (solution) (multi)vector, as a +/// 1-D or 2-D Kokkos::View. \tparam IPIVV Output pivot indices, as a 1-D +/// Kokkos::View +/// +/// \param A [in,out] On entry, the N-by-N matrix to be solved. On exit, the +/// factors L and U from +/// the factorization A = P*L*U; the unit diagonal elements of L are not +/// stored. +/// \param B [in,out] On entry, the right hand side (multi)vector B. On exit, +/// the solution (multi)vector X. \param IPIV [out] On exit, the pivot indices +/// (for partial pivoting). If the View extents are zero and +/// its data pointer is NULL, pivoting is not used. +/// +template +[[deprecated]] void gesv(const AMatrix& A, const BXMV& B, const IPIVV& IPIV) { + KokkosLapack::gesv(A,B,IPIV); +} + +} // namespace KokkosBlas + +#endif // KOKKOSBLAS_GESV_HPP_ diff --git a/blas/src/KokkosBlas_trtri.hpp b/blas/src/KokkosBlas_trtri.hpp new file mode 100644 index 0000000000..34ca96b2d4 --- /dev/null +++ b/blas/src/KokkosBlas_trtri.hpp @@ -0,0 +1,52 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSBLAS_TRTRI_HPP_ +#define KOKKOSBLAS_TRTRI_HPP_ + +/// \file KokkosBlas_trtri.hpp + +#include "KokkosLapack_trtri.hpp" + +namespace KokkosBlas { + +/// \brief Find the inverse of the triangular matrix, A +/// +/// A = inv(A) +/// +/// \tparam AViewType Input matrix, as a 2-D Kokkos::View +/// +/// \param uplo [in] "U" or "u" indicates matrix A is an upper triangular +/// matrix +/// "L" or "l" indicates matrix A is a lower triangular matrix +/// \param diag [in] "U" or "u" indicates the diagonal of A is assumed to be +/// unit +// "N" or "n" indicates the diagonal of A is assumed to be +// non-unit +/// \param A [in,out] Input matrix, as a 2-D Kokkos::View +/// On entry, A +/// On successful exit, inv(A) +/// \return 0 upon success, +// i if the i-th diagonal elemet of A is zero, A is singular, +// and the inversion could not be completed. +// source: https://software.intel.com/en-us/mkl-developer-reference-c-trtri +template +[[deprecated]] int trtri(const char uplo[], const char diag[], const AViewType& A) { + return KokkosLapack::trtri(uplo, diag, A); +} + +} // namespace KokkosBlas + +#endif // KOKKOS_BLASLAPACK_TRTRI_HPP_ From a62d66640f00803f2765d02abc8efbdcaa03c76f Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Tue, 24 Oct 2023 23:51:21 -0600 Subject: [PATCH 17/22] Formatting --- blas/src/KokkosBlas_gesv.hpp | 2 +- blas/src/KokkosBlas_trtri.hpp | 3 +- blas/tpls/KokkosBlas_Host_tpl.cpp | 1 - lapack/impl/KokkosLapack_gesv_spec.hpp | 68 +++---- lapack/impl/KokkosLapack_trtri_spec.hpp | 30 +-- lapack/src/KokkosLapack_gesv.hpp | 17 +- lapack/src/KokkosLapack_trtri.hpp | 4 +- lapack/tpls/KokkosLapack_Host_tpl.cpp | 29 +-- .../tpls/KokkosLapack_gesv_tpl_spec_avail.hpp | 26 +-- .../tpls/KokkosLapack_gesv_tpl_spec_decl.hpp | 150 +++++++-------- lapack/tpls/KokkosLapack_tpl_spec.hpp | 42 +++-- .../KokkosLapack_trtri_tpl_spec_avail.hpp | 56 +++--- .../tpls/KokkosLapack_trtri_tpl_spec_decl.hpp | 175 +++++++++--------- lapack/unit_test/Test_Lapack_gesv.hpp | 27 ++- lapack/unit_test/Test_Lapack_trtri.hpp | 4 +- 15 files changed, 326 insertions(+), 308 deletions(-) diff --git a/blas/src/KokkosBlas_gesv.hpp b/blas/src/KokkosBlas_gesv.hpp index 5e224c07e4..1326c6fb8e 100644 --- a/blas/src/KokkosBlas_gesv.hpp +++ b/blas/src/KokkosBlas_gesv.hpp @@ -47,7 +47,7 @@ namespace KokkosBlas { /// template [[deprecated]] void gesv(const AMatrix& A, const BXMV& B, const IPIVV& IPIV) { - KokkosLapack::gesv(A,B,IPIV); + KokkosLapack::gesv(A, B, IPIV); } } // namespace KokkosBlas diff --git a/blas/src/KokkosBlas_trtri.hpp b/blas/src/KokkosBlas_trtri.hpp index 34ca96b2d4..d9771e3a16 100644 --- a/blas/src/KokkosBlas_trtri.hpp +++ b/blas/src/KokkosBlas_trtri.hpp @@ -43,7 +43,8 @@ namespace KokkosBlas { // and the inversion could not be completed. // source: https://software.intel.com/en-us/mkl-developer-reference-c-trtri template -[[deprecated]] int trtri(const char uplo[], const char diag[], const AViewType& A) { +[[deprecated]] int trtri(const char uplo[], const char diag[], + const AViewType& A) { return KokkosLapack::trtri(uplo, diag, A); } diff --git a/blas/tpls/KokkosBlas_Host_tpl.cpp b/blas/tpls/KokkosBlas_Host_tpl.cpp index 88c3ef7bbd..71e22a690c 100644 --- a/blas/tpls/KokkosBlas_Host_tpl.cpp +++ b/blas/tpls/KokkosBlas_Host_tpl.cpp @@ -411,7 +411,6 @@ void F77_BLAS_MANGLE(ztrsm, ZTRSM)(const char*, const char*, const char*, const std::complex*, const std::complex*, int*, /* */ std::complex*, int*); - } void F77_BLAS_MANGLE(sscal, SSCAL)(const int* N, const float* alpha, diff --git a/lapack/impl/KokkosLapack_gesv_spec.hpp b/lapack/impl/KokkosLapack_gesv_spec.hpp index 8ea1df03bf..b9f8549311 100644 --- a/lapack/impl/KokkosLapack_gesv_spec.hpp +++ b/lapack/impl/KokkosLapack_gesv_spec.hpp @@ -43,16 +43,16 @@ struct gesv_eti_spec_avail { // more .cpp files. // #define KOKKOSLAPACK_GESV_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - template <> \ - struct gesv_eti_spec_avail< \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct gesv_eti_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -98,33 +98,33 @@ struct GESV { // more .cpp files. // #define KOKKOSLAPACK_GESV_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - extern template struct GESV< \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + extern template struct GESV< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ false, true>; #define KOKKOSLAPACK_GESV_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ - EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ - template struct GESV< \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template struct GESV< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ false, true>; #include diff --git a/lapack/impl/KokkosLapack_trtri_spec.hpp b/lapack/impl/KokkosLapack_trtri_spec.hpp index e48b37f7c2..a17184dc41 100644 --- a/lapack/impl/KokkosLapack_trtri_spec.hpp +++ b/lapack/impl/KokkosLapack_trtri_spec.hpp @@ -37,8 +37,8 @@ struct trtri_eti_spec_avail { // This Macros provides the ETI specialization of trtri, currently not // available. // -#define KOKKOSLAPACK_TRTRI_ETI_SPEC_AVAIL(SCALAR, LAYOUTA, EXEC_SPACE, \ - MEM_SPACE) \ +#define KOKKOSLAPACK_TRTRI_ETI_SPEC_AVAIL(SCALAR, LAYOUTA, EXEC_SPACE, \ + MEM_SPACE) \ template <> \ struct trtri_eti_spec_avail< \ Kokkos::View { // "extern template" skips the implicit instatiation step ensuring that the // callers code uses this explicit instantiation definition of TRTRI. // -#define KOKKOSLAPACK_TRTRI_ETI_SPEC_DECL(SCALAR, LAYOUTA, EXEC_SPACE, MEM_SPACE) \ - extern template struct TRTRI< \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSLAPACK_TRTRI_ETI_SPEC_DECL(SCALAR, LAYOUTA, EXEC_SPACE, \ + MEM_SPACE) \ + extern template struct TRTRI< \ + Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ false, true>; -#define KOKKOSLAPACK_TRTRI_ETI_SPEC_INST(SCALAR, LAYOUTA, EXEC_SPACE, MEM_SPACE) \ - template struct TRTRI< \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ +#define KOKKOSLAPACK_TRTRI_ETI_SPEC_INST(SCALAR, LAYOUTA, EXEC_SPACE, \ + MEM_SPACE) \ + template struct TRTRI< \ + Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ false, true>; #include diff --git a/lapack/src/KokkosLapack_gesv.hpp b/lapack/src/KokkosLapack_gesv.hpp index b08f523f6e..4c9058f8ab 100644 --- a/lapack/src/KokkosLapack_gesv.hpp +++ b/lapack/src/KokkosLapack_gesv.hpp @@ -50,10 +50,11 @@ namespace KokkosLapack { /// template void gesv(const AMatrix& A, const BXMV& B, const IPIVV& IPIV) { - // NOTE: Currently, KokkosLapack::gesv only supports for MAGMA TPL and LAPACK TPL. + // NOTE: Currently, KokkosLapack::gesv only supports for MAGMA TPL and LAPACK + // TPL. // MAGMA TPL should be enabled to call the MAGMA GPU interface for - // device views LAPACK TPL should be enabled to call the LAPACK interface - // for host views + // device views LAPACK TPL should be enabled to call the LAPACK + // interface for host views static_assert(Kokkos::is_view::value, "KokkosLapack::gesv: A must be a Kokkos::View."); @@ -87,8 +88,8 @@ void gesv(const AMatrix& A, const BXMV& B, const IPIVV& IPIV) { } // Check for no pivoting case. Only MAGMA supports no pivoting interface -#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL if ((!std::is_same::value) && (IPIV0 == 0) && (IPIV.data() == nullptr)) { @@ -98,7 +99,7 @@ void gesv(const AMatrix& A, const BXMV& B, const IPIVV& IPIV) { KokkosKernels::Impl::throw_runtime_exception(os.str()); } #endif -#else // not have MAGMA TPL +#else // not have MAGMA TPL #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // but have LAPACK TPL if ((IPIV0 == 0) && (IPIV.data() == nullptr)) { std::ostringstream os; @@ -137,11 +138,11 @@ void gesv(const AMatrix& A, const BXMV& B, const IPIVV& IPIV) { if (BXMV::rank == 1) { auto B_i = BXMV_Internal(B.data(), B.extent(0), 1); KokkosLapack::Impl::GESV::gesv(A_i, B_i, IPIV_i); + IPIVV_Internal>::gesv(A_i, B_i, IPIV_i); } else { // BXMV::rank == 2 auto B_i = BXMV_Internal(B.data(), B.extent(0), B.extent(1)); KokkosLapack::Impl::GESV::gesv(A_i, B_i, IPIV_i); + IPIVV_Internal>::gesv(A_i, B_i, IPIV_i); } } diff --git a/lapack/src/KokkosLapack_trtri.hpp b/lapack/src/KokkosLapack_trtri.hpp index 44e8fc9f65..9a884f2303 100644 --- a/lapack/src/KokkosLapack_trtri.hpp +++ b/lapack/src/KokkosLapack_trtri.hpp @@ -108,8 +108,8 @@ int trtri(const char uplo[], const char diag[], const AViewType& A) { int result; RViewInternalType R = RViewInternalType(&result); - KokkosLapack::Impl::TRTRI::trtri(R, uplo, - diag, A); + KokkosLapack::Impl::TRTRI::trtri( + R, uplo, diag, A); return result; } diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp index 6ece9fe914..130eaba264 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.cpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp @@ -79,12 +79,12 @@ namespace Impl { template <> void HostLapack::gesv(int n, int rhs, float* a, int lda, int* ipiv, - float* b, int ldb, int info) { + float* b, int ldb, int info) { F77_FUNC_SGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> int HostLapack::trtri(const char uplo, const char diag, int n, - const float* a, int lda) { + const float* a, int lda) { int info = 0; F77_FUNC_STRTRI(&uplo, &diag, &n, a, &lda, &info); return info; @@ -96,12 +96,12 @@ int HostLapack::trtri(const char uplo, const char diag, int n, template <> void HostLapack::gesv(int n, int rhs, double* a, int lda, int* ipiv, - double* b, int ldb, int info) { + double* b, int ldb, int info) { F77_FUNC_DGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> int HostLapack::trtri(const char uplo, const char diag, int n, - const double* a, int lda) { + const double* a, int lda) { int info = 0; F77_FUNC_DTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; @@ -113,15 +113,15 @@ int HostLapack::trtri(const char uplo, const char diag, int n, template <> void HostLapack >::gesv(int n, int rhs, - std::complex* a, int lda, - int* ipiv, std::complex* b, - int ldb, int info) { + std::complex* a, int lda, + int* ipiv, std::complex* b, + int ldb, int info) { F77_FUNC_CGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> int HostLapack >::trtri(const char uplo, const char diag, - int n, const std::complex* a, - int lda) { + int n, const std::complex* a, + int lda) { int info = 0; F77_FUNC_CTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; @@ -133,15 +133,16 @@ int HostLapack >::trtri(const char uplo, const char diag, template <> void HostLapack >::gesv(int n, int rhs, - std::complex* a, int lda, - int* ipiv, std::complex* b, - int ldb, int info) { + std::complex* a, int lda, + int* ipiv, std::complex* b, + int ldb, int info) { F77_FUNC_ZGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> int HostLapack >::trtri(const char uplo, const char diag, - int n, const std::complex* a, - int lda) { + int n, + const std::complex* a, + int lda) { int info = 0; F77_FUNC_ZTRTRI(&uplo, &diag, &n, a, &lda, &info); return info; diff --git a/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp index 74a65d4cf9..a3d8bb6ee9 100644 --- a/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_gesv_tpl_spec_avail.hpp @@ -28,7 +28,7 @@ struct gesv_tpl_spec_avail { // Generic Host side LAPACK (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK -#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUT, MEMSPACE) \ +#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUT, MEMSPACE) \ template \ struct gesv_tpl_spec_avail< \ Kokkos::View, \ @@ -39,13 +39,13 @@ struct gesv_tpl_spec_avail { }; KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, - Kokkos::HostSpace) + Kokkos::HostSpace) KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace) + Kokkos::HostSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HostSpace) /* #if defined (KOKKOSKERNELS_INST_DOUBLE) \ && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) @@ -69,7 +69,7 @@ Kokkos::LayoutRight, Kokkos::HostSpace) #endif // MAGMA #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA -#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(SCALAR, LAYOUT, MEMSPACE) \ +#define KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(SCALAR, LAYOUT, MEMSPACE) \ template \ struct gesv_tpl_spec_avail< \ Kokkos::View, \ @@ -80,13 +80,13 @@ Kokkos::LayoutRight, Kokkos::HostSpace) #endif }; KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, - Kokkos::CudaSpace) + Kokkos::CudaSpace) KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, - Kokkos::CudaSpace) + Kokkos::CudaSpace) KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::CudaSpace) + Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GESV_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) /* #if defined (KOKKOSKERNELS_INST_DOUBLE) \ diff --git a/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp index dcab48f07b..2baa76a132 100644 --- a/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_gesv_tpl_spec_decl.hpp @@ -45,7 +45,7 @@ inline void gesv_print_specialization() { namespace KokkosLapack { namespace Impl { -#define KOKKOSLAPACK_DGESV_LAPACK(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_DGESV_LAPACK(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV< \ Kokkos::View, \ @@ -74,7 +74,7 @@ namespace Impl { \ static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_LAPACK,double]"); \ + Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_LAPACK,double]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -89,65 +89,65 @@ namespace Impl { int info = 0; \ \ if (with_pivot) { \ - HostLapack::gesv(N, NRHS, A.data(), LDA, IPIV.data(), B.data(), \ - LDB, info); \ + HostLapack::gesv(N, NRHS, A.data(), LDA, IPIV.data(), \ + B.data(), LDB, info); \ } \ Kokkos::Profiling::popRegion(); \ } \ }; -#define KOKKOSLAPACK_SGESV_LAPACK(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct GESV< \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - BViewType; \ - typedef Kokkos::View< \ - int*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - PViewType; \ - \ - static void gesv(const AViewType& A, const BViewType& B, \ - const PViewType& IPIV) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_LAPACK,float]"); \ - gesv_print_specialization(); \ - const bool with_pivot = \ - !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ - \ - const int N = static_cast(A.extent(1)); \ - const int AST = static_cast(A.stride(1)); \ - const int LDA = (AST == 0) ? 1 : AST; \ - const int BST = static_cast(B.stride(1)); \ - const int LDB = (BST == 0) ? 1 : BST; \ - const int NRHS = static_cast(B.extent(1)); \ - \ - int info = 0; \ - \ - if (with_pivot) { \ - HostLapack::gesv(N, NRHS, A.data(), LDA, IPIV.data(), B.data(), \ - LDB, info); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSLAPACK_SGESV_LAPACK(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ + template \ + struct GESV< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + BViewType; \ + typedef Kokkos::View< \ + int*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits > \ + PViewType; \ + \ + static void gesv(const AViewType& A, const BViewType& B, \ + const PViewType& IPIV) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_LAPACK,float]"); \ + gesv_print_specialization(); \ + const bool with_pivot = \ + !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ + \ + const int N = static_cast(A.extent(1)); \ + const int AST = static_cast(A.stride(1)); \ + const int LDA = (AST == 0) ? 1 : AST; \ + const int BST = static_cast(B.stride(1)); \ + const int LDB = (BST == 0) ? 1 : BST; \ + const int NRHS = static_cast(B.extent(1)); \ + \ + int info = 0; \ + \ + if (with_pivot) { \ + HostLapack::gesv(N, NRHS, A.data(), LDA, IPIV.data(), B.data(), \ + LDB, info); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSLAPACK_ZGESV_LAPACK(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_ZGESV_LAPACK(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV**, LAYOUT, \ Kokkos::Device, \ @@ -178,7 +178,7 @@ namespace Impl { static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ Kokkos::Profiling::pushRegion( \ - "KokkosLapack::gesv[TPL_LAPACK,complex]"); \ + "KokkosLapack::gesv[TPL_LAPACK,complex]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -193,7 +193,7 @@ namespace Impl { int info = 0; \ \ if (with_pivot) { \ - HostLapack >::gesv( \ + HostLapack >::gesv( \ N, NRHS, reinterpret_cast*>(A.data()), LDA, \ IPIV.data(), reinterpret_cast*>(B.data()), \ LDB, info); \ @@ -202,7 +202,7 @@ namespace Impl { } \ }; -#define KOKKOSLAPACK_CGESV_LAPACK(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_CGESV_LAPACK(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV**, LAYOUT, \ Kokkos::Device, \ @@ -233,7 +233,7 @@ namespace Impl { static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ Kokkos::Profiling::pushRegion( \ - "KokkosLapack::gesv[TPL_LAPACK,complex]"); \ + "KokkosLapack::gesv[TPL_LAPACK,complex]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -248,7 +248,7 @@ namespace Impl { int info = 0; \ \ if (with_pivot) { \ - HostLapack >::gesv( \ + HostLapack >::gesv( \ N, NRHS, reinterpret_cast*>(A.data()), LDA, \ IPIV.data(), reinterpret_cast*>(B.data()), \ LDB, info); \ @@ -280,7 +280,7 @@ KOKKOSLAPACK_CGESV_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, false) namespace KokkosLapack { namespace Impl { -#define KOKKOSLAPACK_DGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_DGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV< \ Kokkos::View, \ @@ -309,7 +309,7 @@ namespace Impl { \ static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_MAGMA,double]"); \ + Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_MAGMA,double]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -321,8 +321,8 @@ namespace Impl { magma_int_t LDB = (BST == 0) ? 1 : BST; \ magma_int_t NRHS = static_cast(B.extent(1)); \ \ - KokkosLapack::Impl::MagmaSingleton& s = \ - KokkosLapack::Impl::MagmaSingleton::singleton(); \ + KokkosLapack::Impl::MagmaSingleton& s = \ + KokkosLapack::Impl::MagmaSingleton::singleton(); \ magma_int_t info = 0; \ \ if (with_pivot) { \ @@ -339,7 +339,7 @@ namespace Impl { } \ }; -#define KOKKOSLAPACK_SGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_SGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV< \ Kokkos::View, \ @@ -368,7 +368,7 @@ namespace Impl { \ static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_MAGMA,float]"); \ + Kokkos::Profiling::pushRegion("KokkosLapack::gesv[TPL_MAGMA,float]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -380,8 +380,8 @@ namespace Impl { magma_int_t LDB = (BST == 0) ? 1 : BST; \ magma_int_t NRHS = static_cast(B.extent(1)); \ \ - KokkosLapack::Impl::MagmaSingleton& s = \ - KokkosLapack::Impl::MagmaSingleton::singleton(); \ + KokkosLapack::Impl::MagmaSingleton& s = \ + KokkosLapack::Impl::MagmaSingleton::singleton(); \ magma_int_t info = 0; \ \ if (with_pivot) { \ @@ -398,7 +398,7 @@ namespace Impl { } \ }; -#define KOKKOSLAPACK_ZGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_ZGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV**, LAYOUT, \ Kokkos::Device, \ @@ -429,7 +429,7 @@ namespace Impl { static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ Kokkos::Profiling::pushRegion( \ - "KokkosLapack::gesv[TPL_MAGMA,complex]"); \ + "KokkosLapack::gesv[TPL_MAGMA,complex]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -441,8 +441,8 @@ namespace Impl { magma_int_t LDB = (BST == 0) ? 1 : BST; \ magma_int_t NRHS = static_cast(B.extent(1)); \ \ - KokkosLapack::Impl::MagmaSingleton& s = \ - KokkosLapack::Impl::MagmaSingleton::singleton(); \ + KokkosLapack::Impl::MagmaSingleton& s = \ + KokkosLapack::Impl::MagmaSingleton::singleton(); \ magma_int_t info = 0; \ \ if (with_pivot) { \ @@ -459,7 +459,7 @@ namespace Impl { } \ }; -#define KOKKOSLAPACK_CGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_CGESV_MAGMA(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct GESV**, LAYOUT, \ Kokkos::Device, \ @@ -490,7 +490,7 @@ namespace Impl { static void gesv(const AViewType& A, const BViewType& B, \ const PViewType& IPIV) { \ Kokkos::Profiling::pushRegion( \ - "KokkosLapack::gesv[TPL_MAGMA,complex]"); \ + "KokkosLapack::gesv[TPL_MAGMA,complex]"); \ gesv_print_specialization(); \ const bool with_pivot = \ !((IPIV.extent(0) == 0) && (IPIV.data() == nullptr)); \ @@ -502,8 +502,8 @@ namespace Impl { magma_int_t LDB = (BST == 0) ? 1 : BST; \ magma_int_t NRHS = static_cast(B.extent(1)); \ \ - KokkosLapack::Impl::MagmaSingleton& s = \ - KokkosLapack::Impl::MagmaSingleton::singleton(); \ + KokkosLapack::Impl::MagmaSingleton& s = \ + KokkosLapack::Impl::MagmaSingleton::singleton(); \ magma_int_t info = 0; \ \ if (with_pivot) { \ diff --git a/lapack/tpls/KokkosLapack_tpl_spec.hpp b/lapack/tpls/KokkosLapack_tpl_spec.hpp index 7bfffc780f..3aed9533bd 100644 --- a/lapack/tpls/KokkosLapack_tpl_spec.hpp +++ b/lapack/tpls/KokkosLapack_tpl_spec.hpp @@ -34,15 +34,16 @@ struct CudaLapackSingleton { }; inline void cusolver_internal_error_throw(cusolverStatus_t cusolverState, - const char* name, const char* file, - const int line) { + const char* name, const char* file, + const int line) { std::ostringstream out; // out << name << " error( " << cusolverGetStatusName(cusolverState) // << "): " << cusolverGetStatusString(cusolverState); out << name << " error( "; switch (cusolverState) { case CUSOLVER_STATUS_NOT_INITIALIZED: - out << "CUSOLVER_STATUS_NOT_INITIALIZED): the library was not initialized."; + out << "CUSOLVER_STATUS_NOT_INITIALIZED): the library was not " + "initialized."; break; case CUSOLVER_STATUS_ALLOC_FAILED: out << "CUSOLVER_STATUS_ALLOC_FAILED): the resource allocation failed."; @@ -79,9 +80,9 @@ inline void cusolver_internal_error_throw(cusolverStatus_t cusolverState, } inline void cusolver_internal_safe_call(cusolverStatus_t cusolverState, - const char* name, - const char* file = nullptr, - const int line = 0) { + const char* name, + const char* file = nullptr, + const int line = 0) { if (CUSOLVER_STATUS_SUCCESS != cusolverState) { cusolver_internal_error_throw(cusolverState, name, file, line); } @@ -90,8 +91,9 @@ inline void cusolver_internal_safe_call(cusolverStatus_t cusolverState, // The macro below defines the interface for the safe cusolver calls. // The functions themselves are protected by impl namespace and this // is not meant to be used by external application or libraries. -#define KOKKOS_CUSOLVER_SAFE_CALL_IMPL(call) \ - KokkosLapack::Impl::cusolver_internal_safe_call(call, #call, __FILE__, __LINE__) +#define KOKKOS_CUSOLVER_SAFE_CALL_IMPL(call) \ + KokkosLapack::Impl::cusolver_internal_safe_call(call, #call, __FILE__, \ + __LINE__) /// \brief This function converts KK transpose mode to cusolver transpose mode inline cublasOperation_t trans_mode_kk_to_cusolver(const char kkMode[]) { @@ -124,13 +126,14 @@ struct RocsolverSingleton { }; inline void rocsolver_internal_error_throw(rocsolver_status rocsolverState, - const char* name, const char* file, - const int line) { + const char* name, const char* file, + const int line) { std::ostringstream out; out << name << " error( "; switch (rocsolverState) { case rocsolver_status_invalid_handle: - out << "rocsolver_status_invalid_handle): handle not initialized, invalid " + out << "rocsolver_status_invalid_handle): handle not initialized, " + "invalid " "or null."; break; case rocsolver_status_not_implemented: @@ -143,11 +146,13 @@ inline void rocsolver_internal_error_throw(rocsolver_status rocsolverState, out << "rocsolver_status_invalid_size): invalid size argument."; break; case rocsolver_status_memory_error: - out << "rocsolver_status_memory_error): failed internal memory allocation, " + out << "rocsolver_status_memory_error): failed internal memory " + "allocation, " "copy or dealloc."; break; case rocsolver_status_internal_error: - out << "rocsolver_status_internal_error): other internal library failure."; + out << "rocsolver_status_internal_error): other internal library " + "failure."; break; case rocsolver_status_perf_degraded: out << "rocsolver_status_perf_degraded): performance degraded due to low " @@ -184,9 +189,9 @@ inline void rocsolver_internal_error_throw(rocsolver_status rocsolverState, } inline void rocsolver_internal_safe_call(rocsolver_status rocsolverState, - const char* name, - const char* file = nullptr, - const int line = 0) { + const char* name, + const char* file = nullptr, + const int line = 0) { if (rocsolver_status_success != rocsolverState) { rocsolver_internal_error_throw(rocsolverState, name, file, line); } @@ -195,8 +200,9 @@ inline void rocsolver_internal_safe_call(rocsolver_status rocsolverState, // The macro below defines the interface for the safe rocsolver calls. // The functions themselves are protected by impl namespace and this // is not meant to be used by external application or libraries. -#define KOKKOS_ROCSOLVER_SAFE_CALL_IMPL(call) \ - KokkosLapack::Impl::rocsolver_internal_safe_call(call, #call, __FILE__, __LINE__) +#define KOKKOS_ROCSOLVER_SAFE_CALL_IMPL(call) \ + KokkosLapack::Impl::rocsolver_internal_safe_call(call, #call, __FILE__, \ + __LINE__) /// \brief This function converts KK transpose mode to rocsolver transpose mode inline rocsolver_operation trans_mode_kk_to_rocsolver(const char kkMode[]) { diff --git a/lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp index d723cef260..e9fe689fef 100644 --- a/lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp @@ -27,7 +27,7 @@ struct trtri_tpl_spec_avail { }; // Generic Host side LAPACK (could be MKL or whatever) -#define KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL(SCALAR, LAYOUTA, MEMSPACE) \ +#define KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL(SCALAR, LAYOUTA, MEMSPACE) \ template \ struct trtri_tpl_spec_avail< \ Kokkos::View, - Kokkos::LayoutLeft, Kokkos::HostSpace) + Kokkos::LayoutLeft, Kokkos::HostSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) + Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) -KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, - Kokkos::HostSpace) + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HostSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaSpace) + Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutRight, - Kokkos::HostSpace) + Kokkos::HostSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutRight, - Kokkos::CudaSpace) + Kokkos::CudaSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) + Kokkos::CudaUVMSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutRight, - Kokkos::HostSpace) + Kokkos::HostSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutRight, - Kokkos::CudaSpace) + Kokkos::CudaSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutRight, - Kokkos::CudaUVMSpace) + Kokkos::CudaUVMSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HostSpace) + Kokkos::LayoutRight, Kokkos::HostSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::CudaSpace) + Kokkos::LayoutRight, Kokkos::CudaSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace) + Kokkos::LayoutRight, + Kokkos::CudaUVMSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::HostSpace) + Kokkos::LayoutRight, Kokkos::HostSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::CudaSpace) + Kokkos::LayoutRight, Kokkos::CudaSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, - Kokkos::LayoutRight, Kokkos::CudaUVMSpace) + Kokkos::LayoutRight, + Kokkos::CudaUVMSpace) } // namespace Impl } // namespace KokkosLapack diff --git a/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp index 9f79ad2eb5..32e2434a86 100644 --- a/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp @@ -24,8 +24,8 @@ namespace KokkosLapack { namespace Impl { #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK -#define KOKKOSLAPACK_TRTRI_LAPACK_HOST(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, \ - MEM_SPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_TRTRI_LAPACK_HOST(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, \ + MEM_SPACE, ETI_SPEC_AVAIL) \ template \ struct TRTRI >, \ @@ -44,8 +44,8 @@ namespace Impl { \ static void trtri(const RViewType& R, const char uplo[], \ const char diag[], const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::trtri[TPL_LAPACK," #SCALAR_TYPE \ - "]"); \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::trtri[TPL_LAPACK," #SCALAR_TYPE "]"); \ const int M = static_cast(A.extent(0)); \ \ bool A_is_layout_left = \ @@ -61,7 +61,7 @@ namespace Impl { else \ uplo_ = A_is_layout_left ? 'U' : 'L'; \ \ - R() = HostLapack::trtri( \ + R() = HostLapack::trtri( \ uplo_, diag[0], M, \ reinterpret_cast(A.data()), LDA); \ Kokkos::Profiling::popRegion(); \ @@ -69,65 +69,67 @@ namespace Impl { }; #else #define KOKKOSLAPACK_TRTRI_LAPACK_HOST(SCALAR_TYPE, BASE_SCALAR_TYPE, LAYOUTA, \ - MEM_SPACE, ETI_SPEC_AVAIL) + MEM_SPACE, ETI_SPEC_AVAIL) #endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA -#define KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(SCALAR_TYPE, BASE_SCALAR_TYPE, MAGMA_FN, \ - LAYOUTA, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct TRTRI >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef SCALAR_TYPE SCALAR; \ - typedef Kokkos::View > \ - RViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - \ - static void trtri(const RViewType& R, const char uplo[], \ - const char diag[], const AViewType& A) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::trtri[TPL_LAPACK," #SCALAR_TYPE \ - "]"); \ - magma_int_t M = static_cast(A.extent(0)); \ - \ - bool A_is_layout_left = \ - std::is_same::value; \ - \ - magma_int_t AST = A_is_layout_left ? A.stride(1) : A.stride(0), \ - LDA = (AST == 0) ? 1 : AST; \ - magma_int_t info = 0; \ - magma_uplo_t uplo_; \ - magma_diag_t diag_; \ - \ - if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ - uplo_ = A_is_layout_left ? MagmaLower : MagmaUpper; \ - else \ - uplo_ = A_is_layout_left ? MagmaUpper : MagmaLower; \ - \ - if (diag[0] == 'U' || diag[0] == 'u') \ - diag_ = MagmaUnit; \ - else \ - diag_ = MagmaNonUnit; \ - \ - KokkosLapack::Impl::MagmaSingleton& s = \ - KokkosLapack::Impl::MagmaSingleton::singleton(); \ - R() = MAGMA_FN(uplo_, diag_, M, \ - reinterpret_cast( \ - const_cast(A.data())), \ - LDA, &info); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(SCALAR_TYPE, BASE_SCALAR_TYPE, \ + MAGMA_FN, LAYOUTA, MEM_SPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct TRTRI >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef SCALAR_TYPE SCALAR; \ + typedef Kokkos::View > \ + RViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + \ + static void trtri(const RViewType& R, const char uplo[], \ + const char diag[], const AViewType& A) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::trtri[TPL_LAPACK," #SCALAR_TYPE "]"); \ + magma_int_t M = static_cast(A.extent(0)); \ + \ + bool A_is_layout_left = \ + std::is_same::value; \ + \ + magma_int_t AST = A_is_layout_left ? A.stride(1) : A.stride(0), \ + LDA = (AST == 0) ? 1 : AST; \ + magma_int_t info = 0; \ + magma_uplo_t uplo_; \ + magma_diag_t diag_; \ + \ + if ((uplo[0] == 'L') || (uplo[0] == 'l')) \ + uplo_ = A_is_layout_left ? MagmaLower : MagmaUpper; \ + else \ + uplo_ = A_is_layout_left ? MagmaUpper : MagmaLower; \ + \ + if (diag[0] == 'U' || diag[0] == 'u') \ + diag_ = MagmaUnit; \ + else \ + diag_ = MagmaNonUnit; \ + \ + KokkosLapack::Impl::MagmaSingleton& s = \ + KokkosLapack::Impl::MagmaSingleton::singleton(); \ + R() = MAGMA_FN(uplo_, diag_, M, \ + reinterpret_cast( \ + const_cast(A.data())), \ + LDA, &info); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #else -#define KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(SCALAR_TYPE, BASE_SCALAR_TYPE, MAGMA_FN, \ - LAYOUTA, MEM_SPACE, ETI_SPEC_AVAIL) +#define KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(SCALAR_TYPE, BASE_SCALAR_TYPE, \ + MAGMA_FN, LAYOUTA, MEM_SPACE, \ + ETI_SPEC_AVAIL) #endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA // Explicitly define the TRTRI class for all permutations listed below @@ -135,39 +137,42 @@ namespace Impl { // Handle type and space permutations #define KOKKOSLAPACK_DTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ KOKKOSLAPACK_TRTRI_LAPACK_HOST(double, double, LAYOUTA, Kokkos::HostSpace, \ - ETI_SPEC_AVAIL) \ + ETI_SPEC_AVAIL) \ KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(double, magmaDouble_ptr, magma_dtrtri_gpu, \ - LAYOUTA, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ + LAYOUTA, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(double, magmaDouble_ptr, magma_dtrtri_gpu, \ - LAYOUTA, Kokkos::CudaUVMSpace, ETI_SPEC_AVAIL) + LAYOUTA, Kokkos::CudaUVMSpace, \ + ETI_SPEC_AVAIL) #define KOKKOSLAPACK_STRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ KOKKOSLAPACK_TRTRI_LAPACK_HOST(float, float, LAYOUTA, Kokkos::HostSpace, \ - ETI_SPEC_AVAIL) \ + ETI_SPEC_AVAIL) \ KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(float, magmaFloat_ptr, magma_strtri_gpu, \ - LAYOUTA, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ + LAYOUTA, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(float, magmaFloat_ptr, magma_strtri_gpu, \ - LAYOUTA, Kokkos::CudaUVMSpace, ETI_SPEC_AVAIL) - -#define KOKKOSLAPACK_ZTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, std::complex, \ - LAYOUTA, Kokkos::HostSpace, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, magmaDoubleComplex_ptr, \ - magma_ztrtri_gpu, LAYOUTA, Kokkos::CudaSpace, \ - ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, magmaDoubleComplex_ptr, \ - magma_ztrtri_gpu, LAYOUTA, Kokkos::CudaUVMSpace, \ - ETI_SPEC_AVAIL) - -#define KOKKOSLAPACK_CTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, std::complex, \ - LAYOUTA, Kokkos::HostSpace, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, magmaFloatComplex_ptr, \ - magma_ctrtri_gpu, LAYOUTA, Kokkos::CudaSpace, \ - ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, magmaFloatComplex_ptr, \ - magma_ctrtri_gpu, LAYOUTA, Kokkos::CudaUVMSpace, \ - ETI_SPEC_AVAIL) + LAYOUTA, Kokkos::CudaUVMSpace, \ + ETI_SPEC_AVAIL) + +#define KOKKOSLAPACK_ZTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, \ + std::complex, LAYOUTA, \ + Kokkos::HostSpace, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, \ + magmaDoubleComplex_ptr, magma_ztrtri_gpu, \ + LAYOUTA, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA( \ + Kokkos::complex, magmaDoubleComplex_ptr, magma_ztrtri_gpu, \ + LAYOUTA, Kokkos::CudaUVMSpace, ETI_SPEC_AVAIL) + +#define KOKKOSLAPACK_CTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, std::complex, \ + LAYOUTA, Kokkos::HostSpace, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(Kokkos::complex, \ + magmaFloatComplex_ptr, magma_ctrtri_gpu, \ + LAYOUTA, Kokkos::CudaSpace, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_MAGMA( \ + Kokkos::complex, magmaFloatComplex_ptr, magma_ctrtri_gpu, \ + LAYOUTA, Kokkos::CudaUVMSpace, ETI_SPEC_AVAIL) // Handle layout permutations KOKKOSLAPACK_DTRTRI_LAPACK(Kokkos::LayoutLeft, true) diff --git a/lapack/unit_test/Test_Lapack_gesv.hpp b/lapack/unit_test/Test_Lapack_gesv.hpp index f37770c812..06f51b7eb0 100644 --- a/lapack/unit_test/Test_Lapack_gesv.hpp +++ b/lapack/unit_test/Test_Lapack_gesv.hpp @@ -16,10 +16,11 @@ // only enable this test where KokkosLapack supports gesv: // CUDA+MAGMA and HOST+LAPACK -#if (defined(TEST_CUDA_LAPACK_CPP) && \ - defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA)) || \ - (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ - (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_OPENMPTARGET_LAPACK_CPP) || \ +#if (defined(TEST_CUDA_LAPACK_CPP) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA)) || \ + (defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) && \ + (defined(TEST_OPENMP_LAPACK_CPP) || \ + defined(TEST_OPENMPTARGET_LAPACK_CPP) || \ defined(TEST_SERIAL_LAPACK_CPP) || defined(TEST_THREADS_LAPACK_CPP))) #include @@ -96,8 +97,8 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { // and no-tpl case bool nopivot_runtime_err = false; bool notpl_runtime_err = false; -#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL nopivot_runtime_err = (!std::is_same::value) && (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); @@ -105,7 +106,7 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { #else notpl_runtime_err = true; #endif -#else // not have MAGMA TPL +#else // not have MAGMA TPL #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // but have LAPACK TPL nopivot_runtime_err = (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); notpl_runtime_err = false; @@ -201,8 +202,8 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, // and no-tpl case bool nopivot_runtime_err = false; bool notpl_runtime_err = false; -#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL -#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA // have MAGMA TPL +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // and have LAPACK TPL nopivot_runtime_err = (!std::is_same::value) && (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); @@ -210,7 +211,7 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, #else notpl_runtime_err = true; #endif -#else // not have MAGMA TPL +#else // not have MAGMA TPL #ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK // but have LAPACK TPL nopivot_runtime_err = (ipiv.extent(0) == 0) && (ipiv.data() == nullptr); notpl_runtime_err = false; @@ -387,8 +388,7 @@ TEST_F(TestCategory, gesv_complex_double) { TEST_F(TestCategory, gesv_mrhs_complex_double) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_mrhs_complex_double"); test_gesv_mrhs, TestDevice>("N"); // No pivoting - test_gesv_mrhs, TestDevice>( - "Y"); // Partial pivoting + test_gesv_mrhs, TestDevice>("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } #endif @@ -406,8 +406,7 @@ TEST_F(TestCategory, gesv_complex_float) { TEST_F(TestCategory, gesv_mrhs_complex_float) { Kokkos::Profiling::pushRegion("KokkosLapack::Test::gesv_mrhs_complex_float"); test_gesv_mrhs, TestDevice>("N"); // No pivoting - test_gesv_mrhs, TestDevice>( - "Y"); // Partial pivoting + test_gesv_mrhs, TestDevice>("Y"); // Partial pivoting Kokkos::Profiling::popRegion(); } #endif diff --git a/lapack/unit_test/Test_Lapack_trtri.hpp b/lapack/unit_test/Test_Lapack_trtri.hpp index 0105803567..a19e575d89 100644 --- a/lapack/unit_test/Test_Lapack_trtri.hpp +++ b/lapack/unit_test/Test_Lapack_trtri.hpp @@ -118,8 +118,8 @@ int impl_test_trtri(int bad_diag_idx, const char* uplo, const char* diag, // const int As0 = A.stride(0), As1 = A.stride(1); // const int Ae0 = A.extent(0), Ae1 = A.extent(1); - // printf("KokkosLapack::trtri test for %c %c, M %d, N %d, eps %g, ViewType: %s, - // A.stride(0): %d, A.stride(1): %d, A.extent(0): %d, A.extent(1): %d + // printf("KokkosLapack::trtri test for %c %c, M %d, N %d, eps %g, ViewType: + // %s, A.stride(0): %d, A.stride(1): %d, A.extent(0): %d, A.extent(1): %d // START\n", uplo[0],diag[0],M,N,eps,typeid(ViewTypeA).name(), As0, As1, Ae0, // Ae1); fflush(stdout); From f8cd2cb8aa6c2f53a422e17bcdfb07d4f82f220f Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Wed, 25 Oct 2023 00:07:10 -0600 Subject: [PATCH 18/22] Small cleaning --- cmake/kokkoskernels_tpls.cmake | 14 -------------- lapack/tpls/KokkosLapack_Host_tpl.cpp | 1 - 2 files changed, 15 deletions(-) diff --git a/cmake/kokkoskernels_tpls.cmake b/cmake/kokkoskernels_tpls.cmake index be1488e051..f650168757 100644 --- a/cmake/kokkoskernels_tpls.cmake +++ b/cmake/kokkoskernels_tpls.cmake @@ -440,20 +440,6 @@ IF ("${F77_BLAS_MANGLE}" STREQUAL "") ENDIF() ENDIF() -# AquiEEP -IF ("${F77_LAPACK_MANGLE}" STREQUAL "") - IF (KOKKOSKERNELS_ENABLE_TPL_LAPACK OR KOKKOSKERNELS_ENABLE_TPL_MKL OR KOKKOSKERNELS_ENABLE_TPL_MAGMA OR KOKKOSKERNELS_ENABLE_TPL_ARMPL) - ENABLE_LANGUAGE(C) - ENABLE_LANGUAGE(Fortran) - INCLUDE(FortranCInterface) - IF (FortranCInterface_GLOBAL_SUFFIX STREQUAL "") - SET(F77_LAPACK_MANGLE "(name,NAME) ${FortranCInterface_GLOBAL_PREFIX}name") - ELSE () - SET(F77_LAPACK_MANGLE "(name,NAME) ${FortranCInterface_GLOBAL_PREFIX}name ## ${FortranCInterface_GLOBAL_SUFFIX}") - ENDIF () - ENDIF() -ENDIF() - KOKKOSKERNELS_ADD_OPTION(NO_DEFAULT_CUDA_TPLS OFF BOOL "Whether CUDA TPLs should be enabled by default. Default: OFF") SET(CUBLAS_DEFAULT ${KOKKOS_ENABLE_CUDA}) SET(CUSPARSE_DEFAULT ${KOKKOS_ENABLE_CUDA}) diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp index 130eaba264..d629a17f1d 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.cpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp @@ -29,7 +29,6 @@ extern "C" { /// Gesv /// -// AquiEEP void F77_BLAS_MANGLE(sgesv, SGESV)(int*, int*, float*, int*, int*, float*, int*, int*); void F77_BLAS_MANGLE(dgesv, DGESV)(int*, int*, double*, int*, int*, double*, From edf2dd011264da42017fb217857aa8052af76968 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Wed, 25 Oct 2023 02:16:37 -0600 Subject: [PATCH 19/22] Correcting error in Jenkins --- perf_test/blas/blas3/KokkosBlas_trtri_perf_test.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/perf_test/blas/blas3/KokkosBlas_trtri_perf_test.hpp b/perf_test/blas/blas3/KokkosBlas_trtri_perf_test.hpp index cbadcef0b1..de2db8dbb0 100644 --- a/perf_test/blas/blas3/KokkosBlas_trtri_perf_test.hpp +++ b/perf_test/blas/blas3/KokkosBlas_trtri_perf_test.hpp @@ -21,7 +21,7 @@ #include -#include +#include #include "KokkosBatched_Trtri_Decl.hpp" #include "KokkosBatched_Trtri_Serial_Impl.hpp" @@ -185,7 +185,7 @@ void __do_trtri_serial_blas(options_t options, trtri_args_t trtri_args) { for (int i = 0; i < options.start.a.k; ++i) { auto A = Kokkos::subview(trtri_args.A, i, Kokkos::ALL(), Kokkos::ALL()); - KokkosBlas::trtri(&trtri_args.uplo, &trtri_args.diag, A); + KokkosLapack::trtri(&trtri_args.uplo, &trtri_args.diag, A); } // Fence after each batch operation Kokkos::fence(); @@ -196,7 +196,7 @@ void __do_trtri_serial_blas(options_t options, trtri_args_t trtri_args) { for (int i = 0; i < options.start.a.k; ++i) { auto A = Kokkos::subview(trtri_args.A, i, Kokkos::ALL(), Kokkos::ALL()); - KokkosBlas::trtri(&trtri_args.uplo, &trtri_args.diag, A); + KokkosLapack::trtri(&trtri_args.uplo, &trtri_args.diag, A); } // Fence after each batch operation Kokkos::fence(); @@ -300,7 +300,7 @@ struct parallel_blas_trtri { void operator()(const int& i) const { auto svA = Kokkos::subview(trtri_args_.A, i, Kokkos::ALL(), Kokkos::ALL()); - KokkosBlas::trtri(&trtri_args_.uplo, &trtri_args_.diag, svA); + KokkosLapack::trtri(&trtri_args_.uplo, &trtri_args_.diag, svA); } }; #endif // !KOKKOS_ENABLE_CUDA && !KOKKOS_ENABLE_HIP && From 915766579c33a179208158a15b89aa632ecba1f8 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Wed, 25 Oct 2023 04:13:46 -0600 Subject: [PATCH 20/22] Fixing compilation error on Jenkins when dealing with HIP --- lapack/unit_test/backends/Test_HIP_Lapack.cpp | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 lapack/unit_test/backends/Test_HIP_Lapack.cpp diff --git a/lapack/unit_test/backends/Test_HIP_Lapack.cpp b/lapack/unit_test/backends/Test_HIP_Lapack.cpp new file mode 100644 index 0000000000..c0ec152233 --- /dev/null +++ b/lapack/unit_test/backends/Test_HIP_Lapack.cpp @@ -0,0 +1,22 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef TEST_HIP_LAPACK_CPP +#define TEST_HIP_LAPACK_CPP + +#include "Test_HIP.hpp" +#include "Test_Lapack.hpp" + +#endif // TEST_HIP_LAPACK_CPP From 699f3b3f54ee57bafd200709b36a42c461708eaa Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Wed, 25 Oct 2023 12:42:58 -0600 Subject: [PATCH 21/22] Addressing latest feedbacks from Luc. --- cmake/KokkosKernels_config.h.in | 2 +- lapack/tpls/KokkosLapack_tpl_spec.hpp | 241 ------------------ .../KokkosLapack_trtri_tpl_spec_avail.hpp | 22 ++ .../tpls/KokkosLapack_trtri_tpl_spec_decl.hpp | 27 +- 4 files changed, 48 insertions(+), 244 deletions(-) delete mode 100644 lapack/tpls/KokkosLapack_tpl_spec.hpp diff --git a/cmake/KokkosKernels_config.h.in b/cmake/KokkosKernels_config.h.in index 4c54a350b3..7a61771231 100644 --- a/cmake/KokkosKernels_config.h.in +++ b/cmake/KokkosKernels_config.h.in @@ -109,7 +109,7 @@ /* BLAS library */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_BLAS -/* LAPACKE */ +/* LAPACK */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_LAPACK /* MKL library */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_MKL diff --git a/lapack/tpls/KokkosLapack_tpl_spec.hpp b/lapack/tpls/KokkosLapack_tpl_spec.hpp deleted file mode 100644 index 3aed9533bd..0000000000 --- a/lapack/tpls/KokkosLapack_tpl_spec.hpp +++ /dev/null @@ -1,241 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef KOKKOSLAPACK_TPL_SPEC_HPP_ -#define KOKKOSLAPACK_TPL_SPEC_HPP_ - -#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER -#include "cuda_runtime.h" -//#include "cublas_v2.h" -#include "cusolverDn.h" - -namespace KokkosLapack { -namespace Impl { - -struct CudaLapackSingleton { - cusolverDnHandle_t handle; - - CudaLapackSingleton(); - - static CudaLapackSingleton& singleton(); -}; - -inline void cusolver_internal_error_throw(cusolverStatus_t cusolverState, - const char* name, const char* file, - const int line) { - std::ostringstream out; - // out << name << " error( " << cusolverGetStatusName(cusolverState) - // << "): " << cusolverGetStatusString(cusolverState); - out << name << " error( "; - switch (cusolverState) { - case CUSOLVER_STATUS_NOT_INITIALIZED: - out << "CUSOLVER_STATUS_NOT_INITIALIZED): the library was not " - "initialized."; - break; - case CUSOLVER_STATUS_ALLOC_FAILED: - out << "CUSOLVER_STATUS_ALLOC_FAILED): the resource allocation failed."; - break; - case CUSOLVER_STATUS_INVALID_VALUE: - out << "CUSOLVER_STATUS_INVALID_VALUE): an invalid numerical value was " - "used as an argument."; - break; - case CUSOLVER_STATUS_ARCH_MISMATCH: - out << "CUSOLVER_STATUS_ARCH_MISMATCH): an absent device architectural " - "feature is required."; - break; - case CUSOLVER_STATUS_MAPPING_ERROR: - out << "CUSOLVER_STATUS_MAPPING_ERROR): an access to GPU memory space " - "failed."; - break; - case CUSOLVER_STATUS_EXECUTION_FAILED: - out << "CUSOLVER_STATUS_EXECUTION_FAILED): the GPU program failed to " - "execute."; - break; - case CUSOLVER_STATUS_INTERNAL_ERROR: - out << "CUSOLVER_STATUS_INTERNAL_ERROR): an internal operation failed."; - break; - case CUSOLVER_STATUS_NOT_SUPPORTED: - out << "CUSOLVER_STATUS_NOT_SUPPORTED): the feature required is not " - "supported."; - break; - default: out << "unrecognized error code): this is bad!"; break; - } - if (file) { - out << " " << file << ":" << line; - } - throw std::runtime_error(out.str()); -} - -inline void cusolver_internal_safe_call(cusolverStatus_t cusolverState, - const char* name, - const char* file = nullptr, - const int line = 0) { - if (CUSOLVER_STATUS_SUCCESS != cusolverState) { - cusolver_internal_error_throw(cusolverState, name, file, line); - } -} - -// The macro below defines the interface for the safe cusolver calls. -// The functions themselves are protected by impl namespace and this -// is not meant to be used by external application or libraries. -#define KOKKOS_CUSOLVER_SAFE_CALL_IMPL(call) \ - KokkosLapack::Impl::cusolver_internal_safe_call(call, #call, __FILE__, \ - __LINE__) - -/// \brief This function converts KK transpose mode to cusolver transpose mode -inline cublasOperation_t trans_mode_kk_to_cusolver(const char kkMode[]) { - cublasOperation_t trans; - if ((kkMode[0] == 'N') || (kkMode[0] == 'n')) - trans = CUBLAS_OP_N; - else if ((kkMode[0] == 'T') || (kkMode[0] == 't')) - trans = CUBLAS_OP_T; - else - trans = CUBLAS_OP_C; - return trans; -} - -} // namespace Impl -} // namespace KokkosLapack -#endif // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER - -#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER -#include - -namespace KokkosLapack { -namespace Impl { - -struct RocsolverSingleton { - rocsolver_handle handle; - - RocsolverSingleton(); - - static RocsolverSingleton& singleton(); -}; - -inline void rocsolver_internal_error_throw(rocsolver_status rocsolverState, - const char* name, const char* file, - const int line) { - std::ostringstream out; - out << name << " error( "; - switch (rocsolverState) { - case rocsolver_status_invalid_handle: - out << "rocsolver_status_invalid_handle): handle not initialized, " - "invalid " - "or null."; - break; - case rocsolver_status_not_implemented: - out << "rocsolver_status_not_implemented): function is not implemented."; - break; - case rocsolver_status_invalid_pointer: - out << "rocsolver_status_invalid_pointer): invalid pointer argument."; - break; - case rocsolver_status_invalid_size: - out << "rocsolver_status_invalid_size): invalid size argument."; - break; - case rocsolver_status_memory_error: - out << "rocsolver_status_memory_error): failed internal memory " - "allocation, " - "copy or dealloc."; - break; - case rocsolver_status_internal_error: - out << "rocsolver_status_internal_error): other internal library " - "failure."; - break; - case rocsolver_status_perf_degraded: - out << "rocsolver_status_perf_degraded): performance degraded due to low " - "device memory."; - break; - case rocsolver_status_size_query_mismatch: - out << "unmatched start/stop size query): ."; - break; - case rocsolver_status_size_increased: - out << "rocsolver_status_size_increased): queried device memory size " - "increased."; - break; - case rocsolver_status_size_unchanged: - out << "rocsolver_status_size_unchanged): queried device memory size " - "unchanged."; - break; - case rocsolver_status_invalid_value: - out << "rocsolver_status_invalid_value): passed argument not valid."; - break; - case rocsolver_status_continue: - out << "rocsolver_status_continue): nothing preventing function to " - "proceed."; - break; - case rocsolver_status_check_numerics_fail: - out << "rocsolver_status_check_numerics_fail): will be set if the " - "vector/matrix has a NaN or an Infinity."; - break; - default: out << "unrecognized error code): this is bad!"; break; - } - if (file) { - out << " " << file << ":" << line; - } - throw std::runtime_error(out.str()); -} - -inline void rocsolver_internal_safe_call(rocsolver_status rocsolverState, - const char* name, - const char* file = nullptr, - const int line = 0) { - if (rocsolver_status_success != rocsolverState) { - rocsolver_internal_error_throw(rocsolverState, name, file, line); - } -} - -// The macro below defines the interface for the safe rocsolver calls. -// The functions themselves are protected by impl namespace and this -// is not meant to be used by external application or libraries. -#define KOKKOS_ROCSOLVER_SAFE_CALL_IMPL(call) \ - KokkosLapack::Impl::rocsolver_internal_safe_call(call, #call, __FILE__, \ - __LINE__) - -/// \brief This function converts KK transpose mode to rocsolver transpose mode -inline rocsolver_operation trans_mode_kk_to_rocsolver(const char kkMode[]) { - rocsolver_operation trans; - if ((kkMode[0] == 'N') || (kkMode[0] == 'n')) - trans = rocsolver_operation_none; - else if ((kkMode[0] == 'T') || (kkMode[0] == 't')) - trans = rocsolver_operation_transpose; - else - trans = rocsolver_operation_conjugate_transpose; - return trans; -} - -} // namespace Impl -} // namespace KokkosLapack - -#endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER - -// If LAPACK TPL is enabled, it is preferred over magma's LAPACK -#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA -#include "magma_v2.h" - -namespace KokkosLapack { -namespace Impl { - -struct MagmaSingleton { - MagmaSingleton(); - - static MagmaSingleton& singleton(); -}; - -} // namespace Impl -} // namespace KokkosLapack -#endif // KOKKOSKERNELS_ENABLE_TPL_MAGMA - -#endif // KOKKOSLAPACK_TPL_SPEC_HPP_ diff --git a/lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp index e9fe689fef..7251d97086 100644 --- a/lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_trtri_tpl_spec_avail.hpp @@ -53,57 +53,79 @@ struct trtri_tpl_spec_avail { KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, Kokkos::HostSpace) +#ifdef KOKKOS_ENABLE_CUDA KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +#endif + KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, Kokkos::HostSpace) +#ifdef KOKKOS_ENABLE_CUDA KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +#endif + KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +#ifdef KOKKOS_ENABLE_CUDA KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +#endif + KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +#ifdef KOKKOS_ENABLE_CUDA KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +#endif KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutRight, Kokkos::HostSpace) +#ifdef KOKKOS_ENABLE_CUDA KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutRight, Kokkos::CudaSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(double, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +#endif + KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutRight, Kokkos::HostSpace) +#ifdef KOKKOS_ENABLE_CUDA KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutRight, Kokkos::CudaSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(float, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +#endif + KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HostSpace) +#ifdef KOKKOS_ENABLE_CUDA KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutRight, Kokkos::CudaSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +#endif + KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutRight, Kokkos::HostSpace) +#ifdef KOKKOS_ENABLE_CUDA KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutRight, Kokkos::CudaSpace) KOKKOSLAPACK_TRTRI_TPL_SPEC_AVAIL_MAGMA(Kokkos::complex, Kokkos::LayoutRight, Kokkos::CudaUVMSpace) +#endif } // namespace Impl } // namespace KokkosLapack diff --git a/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp index 32e2434a86..6aadf1ad72 100644 --- a/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp @@ -18,7 +18,7 @@ #define KOKKOSLAPACK_TRTRI_TPL_SPEC_DECL_HPP_ #include "KokkosLapack_Host_tpl.hpp" // trtri prototype -#include "KokkosLapack_tpl_spec.hpp" +//#include "KokkosLapack_tpl_spec.hpp" namespace KokkosLapack { namespace Impl { @@ -135,7 +135,9 @@ namespace Impl { // Explicitly define the TRTRI class for all permutations listed below // Handle type and space permutations -#define KOKKOSLAPACK_DTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ +#ifdef KOKKOS_ENABLE_CUDA + +#define KOKKOSLAPACK_DTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ KOKKOSLAPACK_TRTRI_LAPACK_HOST(double, double, LAYOUTA, Kokkos::HostSpace, \ ETI_SPEC_AVAIL) \ KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(double, magmaDouble_ptr, magma_dtrtri_gpu, \ @@ -174,6 +176,27 @@ namespace Impl { Kokkos::complex, magmaFloatComplex_ptr, magma_ctrtri_gpu, \ LAYOUTA, Kokkos::CudaUVMSpace, ETI_SPEC_AVAIL) +#else + +#define KOKKOSLAPACK_DTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(double, double, LAYOUTA, Kokkos::HostSpace, \ + ETI_SPEC_AVAIL) + +#define KOKKOSLAPACK_STRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(float, float, LAYOUTA, Kokkos::HostSpace, \ + ETI_SPEC_AVAIL) + +#define KOKKOSLAPACK_ZTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, \ + std::complex, LAYOUTA, \ + Kokkos::HostSpace, ETI_SPEC_AVAIL) + +#define KOKKOSLAPACK_CTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, std::complex, \ + LAYOUTA, Kokkos::HostSpace, ETI_SPEC_AVAIL) + +#endif + // Handle layout permutations KOKKOSLAPACK_DTRTRI_LAPACK(Kokkos::LayoutLeft, true) KOKKOSLAPACK_DTRTRI_LAPACK(Kokkos::LayoutLeft, false) From d674964889aec3b47c7252be0bdc41ae31fc7423 Mon Sep 17 00:00:00 2001 From: Ernesto Prudencio Date: Wed, 25 Oct 2023 12:53:05 -0600 Subject: [PATCH 22/22] Formatting --- lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp index 6aadf1ad72..3ed0623018 100644 --- a/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_trtri_tpl_spec_decl.hpp @@ -137,7 +137,7 @@ namespace Impl { // Handle type and space permutations #ifdef KOKKOS_ENABLE_CUDA -#define KOKKOSLAPACK_DTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ +#define KOKKOSLAPACK_DTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ KOKKOSLAPACK_TRTRI_LAPACK_HOST(double, double, LAYOUTA, Kokkos::HostSpace, \ ETI_SPEC_AVAIL) \ KOKKOSLAPACK_TRTRI_LAPACK_MAGMA(double, magmaDouble_ptr, magma_dtrtri_gpu, \ @@ -178,17 +178,17 @@ namespace Impl { #else -#define KOKKOSLAPACK_DTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_HOST(double, double, LAYOUTA, Kokkos::HostSpace, \ +#define KOKKOSLAPACK_DTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(double, double, LAYOUTA, Kokkos::HostSpace, \ ETI_SPEC_AVAIL) -#define KOKKOSLAPACK_STRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_HOST(float, float, LAYOUTA, Kokkos::HostSpace, \ +#define KOKKOSLAPACK_STRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(float, float, LAYOUTA, Kokkos::HostSpace, \ ETI_SPEC_AVAIL) -#define KOKKOSLAPACK_ZTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ - KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, \ - std::complex, LAYOUTA, \ +#define KOKKOSLAPACK_ZTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \ + KOKKOSLAPACK_TRTRI_LAPACK_HOST(Kokkos::complex, \ + std::complex, LAYOUTA, \ Kokkos::HostSpace, ETI_SPEC_AVAIL) #define KOKKOSLAPACK_CTRTRI_LAPACK(LAYOUTA, ETI_SPEC_AVAIL) \