diff --git a/CHANGELOG.md b/CHANGELOG.md index 187d99d376..4326f3ee5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Change Log +## [3.4.01](https://github.com/kokkos/kokkos-kernels/tree/3.4.01) (2021-05-19) +[Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/3.4.00...3.4.01) + +**Fixed Bugs:** +- Windows: Fixes for Windows [\#981](https://github.com/kokkos/kokkos-kernels/pull/981) +- Sycl: ArithTraits fixes for Sycl [\#959](https://github.com/kokkos/kokkos-kernels/pull/959) +- Sparse: Added code to allow KokkosKernels coloring to accept partial colorings [\#938](https://github.com/kokkos/kokkos-kernels/pull/938) +- Sparse: Include sorting within spiluk [\#972](https://github.com/kokkos/kokkos-kernels/pull/972) +- Sparse: Fix CrsMatrix raw pointer constructor [\#971](https://github.com/kokkos/kokkos-kernels/pull/971) +- Sparse: Fix spmv Serial beta==-1 code path [\#947](https://github.com/kokkos/kokkos-kernels/pull/947) + ## [3.4.00](https://github.com/kokkos/kokkos-kernels/tree/3.4.00) (2021-04-25) [Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/3.3.01...3.4.00) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f698db668..88292bdd0c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ IF(NOT KOKKOSKERNELS_HAS_TRILINOS) ENDIF() SET(KokkosKernels_VERSION_MAJOR 3) SET(KokkosKernels_VERSION_MINOR 4) - SET(KokkosKernels_VERSION_PATCH 0) + SET(KokkosKernels_VERSION_PATCH 01) ENDIF() IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0") diff --git a/master_history.txt b/master_history.txt index 022e459733..5c63ba453d 100644 --- a/master_history.txt +++ b/master_history.txt @@ -13,3 +13,4 @@ tag: 3.2.00 date: 08/19/2020 master: 07a60bcc release: ea3f2b77 tag: 3.3.00 date: 12/16/2020 master: 42defc56 release: e5279e55 tag: 3.3.01 date: 01/18/2021 master: f64b1c57 release: 4e1cc00b tag: 3.4.00 date: 04/26/2021 master: fe439b21 release: d3c33910 +tag: 3.4.01 date: 05/20/2021 master: 564dccb3 release: 4c62eb86 diff --git a/src/Kokkos_ArithTraits.hpp b/src/Kokkos_ArithTraits.hpp index f96ffc49c3..17d3f568fe 100644 --- a/src/Kokkos_ArithTraits.hpp +++ b/src/Kokkos_ArithTraits.hpp @@ -729,7 +729,13 @@ class ArithTraits { return Kokkos::Experimental::cast_to_half(::sqrt (Kokkos::Experimental::cast_from_half(x))); } static KOKKOS_FORCEINLINE_FUNCTION val_type cbrt (const val_type x) { - return Kokkos::Experimental::cast_to_half(::cbrt (Kokkos::Experimental::cast_from_half(x))); + return Kokkos::Experimental::cast_to_half( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::cbrt(Kokkos::Experimental::cast_from_half(x)) +#else + ::cbrt(Kokkos::Experimental::cast_from_half(x)) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type exp (const val_type x) { return Kokkos::Experimental::cast_to_half(::exp (Kokkos::Experimental::cast_from_half(x))); @@ -762,10 +768,22 @@ class ArithTraits { return Kokkos::Experimental::cast_to_half(::asin (Kokkos::Experimental::cast_from_half(x))); } static KOKKOS_FORCEINLINE_FUNCTION val_type acos (const val_type x) { - return Kokkos::Experimental::cast_to_half(::acos (Kokkos::Experimental::cast_from_half(x))); + return Kokkos::Experimental::cast_to_half( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::acos(Kokkos::Experimental::cast_from_half(x)) +#else + ::acos(Kokkos::Experimental::cast_from_half(x)) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type atan (const val_type x) { - return Kokkos::Experimental::cast_to_half(::atan (Kokkos::Experimental::cast_from_half(x))); + return Kokkos::Experimental::cast_to_half( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::atan(Kokkos::Experimental::cast_from_half(x)) +#else + ::atan(Kokkos::Experimental::cast_from_half(x)) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION mag_type epsilon () { //return ::pow(2, -KOKKOSKERNELS_IMPL_FP16_SIGNIFICAND_BITS); @@ -858,16 +876,16 @@ class ArithTraits { static KOKKOS_FORCEINLINE_FUNCTION bool isInf (const float x) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST using std::isinf; -#elif KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using sycl::isinf +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL) + using sycl::isinf; #endif return isinf (x); } static KOKKOS_FORCEINLINE_FUNCTION bool isNan (const float x) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST using std::isnan; -#elif KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using sycl::isnan +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL) + using sycl::isnan; #endif return isnan (x); } @@ -899,10 +917,18 @@ class ArithTraits { return ::pow (x, y); } static KOKKOS_FORCEINLINE_FUNCTION float sqrt (const float x) { - return ::sqrt (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::sqrt(x); +#else + return ::sqrt(x); +#endif } static KOKKOS_FORCEINLINE_FUNCTION float cbrt (const float x) { - return ::cbrt (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::cbrt(x); +#else + return ::cbrt(x); +#endif } static KOKKOS_FORCEINLINE_FUNCTION float exp (const float x) { return ::exp (x); @@ -938,7 +964,11 @@ class ArithTraits { return ::acos (x); } static KOKKOS_FORCEINLINE_FUNCTION float atan (const float x) { - return ::atan (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::atan(x); +#else + return ::atan(x); +#endif } static KOKKOS_FORCEINLINE_FUNCTION mag_type epsilon () { return FLT_EPSILON; @@ -1039,8 +1069,8 @@ class ArithTraits > { static bool isInf(const std::complex& x) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST using std::isinf; -#elif KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using sycl::isinf +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL) + using sycl::isinf; #endif return isinf (real (x)) || isinf (imag (x)); } @@ -1062,8 +1092,8 @@ class ArithTraits > { static bool isNan(const std::complex& x) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST using std::isnan; -#elif KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using sycl::isnan +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL) + using sycl::isnan; #endif return isnan (real (x)) || isnan (imag (x)); } @@ -1130,7 +1160,11 @@ class ArithTraits > { return std::sqrt (x); } static std::complex cbrt (const std::complex& x) { - return std::cbrt (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::cbrt(x); +#else + return ::cbrt(x); +#endif } static std::complex exp (const std::complex& x) { return std::exp (x); @@ -1166,7 +1200,12 @@ class ArithTraits > { return std::acos (x); } static std::complex atan (const std::complex& x) { - return std::atan (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + using sycl::atan; +#else + using std::atan; +#endif + return atan(x); } static std::complex nan () { const mag_type mag_nan = ArithTraits::nan (); @@ -1251,17 +1290,17 @@ class ArithTraits { static KOKKOS_FORCEINLINE_FUNCTION bool isInf (const val_type x) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST using std::isinf; - #elif KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL) using sycl::isinf; - #endif +#endif return isinf (x); } static KOKKOS_FORCEINLINE_FUNCTION bool isNan (const val_type x) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST using std::isnan; - #elif KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL) using sycl::isnan; - #endif +#endif return isnan (x); } static KOKKOS_FORCEINLINE_FUNCTION mag_type abs (const val_type x) { @@ -1292,10 +1331,18 @@ class ArithTraits { return ::pow (x, y); } static KOKKOS_FORCEINLINE_FUNCTION val_type sqrt (const val_type x) { - return ::sqrt (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::sqrt(x); +#else + return ::sqrt(x); +#endif } static KOKKOS_FORCEINLINE_FUNCTION val_type cbrt (const val_type x) { - return ::cbrt (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::cbrt(x); +#else + return ::cbrt(x); +#endif } static KOKKOS_FORCEINLINE_FUNCTION val_type exp (const val_type x) { return ::exp (x); @@ -1331,7 +1378,11 @@ class ArithTraits { return ::acos (x); } static KOKKOS_FORCEINLINE_FUNCTION val_type atan (const val_type x) { - return ::atan (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::atan(x); +#else + return ::atan(x); +#endif } static KOKKOS_FORCEINLINE_FUNCTION val_type nan () { #if defined(__CUDA_ARCH__) @@ -2224,10 +2275,22 @@ class ArithTraits { // some reasonable value (like 0), though this might be more // expensive than the absolute value interpreted using the ternary // operator. - return static_cast ( ::sqrt (static_cast (abs (x)))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::sqrt(static_cast(abs(x))) +#else + ::sqrt(static_cast(abs(x))) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type cbrt (const val_type x) { - return static_cast ( ::cbrt (static_cast (abs (x)))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::cbrt(static_cast(abs(x))) +#else + ::cbrt(static_cast(abs(x))) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type exp (const val_type x) { return static_cast ( ::exp (static_cast (abs (x)))); @@ -2346,10 +2409,22 @@ class ArithTraits { return intPowSigned (x, y); } static KOKKOS_FORCEINLINE_FUNCTION val_type sqrt (const val_type x) { - return static_cast ( ::sqrt (static_cast (abs (x)))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::sqrt(static_cast(abs(x))) +#else + ::sqrt(static_cast(abs(x))) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type cbrt (const val_type x) { - return static_cast ( ::cbrt (static_cast (abs (x)))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::cbrt(static_cast(abs(x))) +#else + ::cbrt(static_cast(abs(x))) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type exp (const val_type x) { return static_cast ( ::exp (static_cast (abs (x)))); @@ -2471,10 +2546,22 @@ class ArithTraits { // This will result in no loss of accuracy, though it might be // more expensive than it should, if we were clever about using // bit operations. - return static_cast ( ::sqrt (static_cast (x))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::sqrt(static_cast(abs(x))) +#else + ::sqrt(static_cast(abs(x))) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type cbrt (const val_type x) { - return static_cast ( ::cbrt (static_cast (x))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::cbrt(static_cast(abs(x))) +#else + ::cbrt(static_cast(abs(x))) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type exp (const val_type x) { return static_cast ( ::exp (static_cast (x))); @@ -2604,10 +2691,22 @@ class ArithTraits { // This will result in no loss of accuracy, though it might be // more expensive than it should, if we were clever about using // bit operations. - return static_cast ( ::sqrt (static_cast (abs (x)))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::sqrt(static_cast(abs(x))) +#else + ::sqrt(static_cast(abs(x))) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type cbrt (const val_type x) { - return static_cast ( ::cbrt (static_cast (abs (x)))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::cbrt(static_cast(abs(x))) +#else + ::cbrt(static_cast(abs(x))) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type exp (const val_type x) { return static_cast ( ::exp (static_cast (abs (x)))); @@ -2735,10 +2834,22 @@ class ArithTraits { // This will result in no loss of accuracy, though it might be // more expensive than it should, if we were clever about using // bit operations. - return static_cast ( ::sqrt (static_cast (x))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::sqrt(static_cast(abs(x))) +#else + ::sqrt(static_cast(abs(x))) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type cbrt (const val_type x) { - return static_cast ( ::cbrt (static_cast (x))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::cbrt(static_cast(abs(x))) +#else + ::cbrt(static_cast(abs(x))) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type exp (const val_type x) { return static_cast ( ::exp (static_cast (x))); @@ -2874,10 +2985,22 @@ class ArithTraits { // This will result in no loss of accuracy, though it might be // more expensive than it should, if we were clever about using // bit operations. - return static_cast ( ::sqrt (static_cast (abs (x)))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::sqrt(static_cast(abs(x))) +#else + ::sqrt(static_cast(abs(x))) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type cbrt (const val_type x) { - return static_cast ( ::cbrt (static_cast (abs (x)))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::cbrt(static_cast(abs(x))) +#else + ::cbrt(static_cast(abs(x))) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type exp (const val_type x) { return static_cast ( ::exp (static_cast (abs (x)))); @@ -3005,10 +3128,22 @@ class ArithTraits { // This will result in no loss of accuracy, though it might be // more expensive than it should, if we were clever about using // bit operations. - return static_cast ( ::sqrt (static_cast (x))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::sqrt(static_cast(abs(x))) +#else + ::sqrt(static_cast(abs(x))) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type cbrt (const val_type x) { - return static_cast ( ::cbrt (static_cast (x))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::cbrt(static_cast(abs(x))) +#else + ::cbrt(static_cast(abs(x))) +#endif + ); } static KOKKOS_FORCEINLINE_FUNCTION val_type exp (const val_type x) { return static_cast ( ::exp (static_cast (x))); @@ -3272,7 +3407,13 @@ class ArithTraits { using std::cbrtl; return static_cast ( ::cbrtl (static_cast (x))); #else - return static_cast ( ::cbrt (static_cast (x))); + return static_cast( +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + sycl::cbrt(static_cast(abs(x))) +#else + ::cbrt(static_cast(abs(x))) +#endif + ); #endif } static KOKKOS_FORCEINLINE_FUNCTION val_type exp (const val_type x) { @@ -3406,7 +3547,7 @@ class ArithTraits { // 64-bit integer type exactly. However, CUDA does not implement // long double for device functions. return static_cast ( sqrt (static_cast (abs (x)))); -#else +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL) // Casting from a 64-bit integer type to double does result in a // loss of accuracy. However, it gives us a good first // approximation. For very large numbers, we may lose some @@ -3417,6 +3558,8 @@ class ArithTraits { // which it has to be, so we don't have to check) to ensure // correctness. It actually should suffice to check numbers // within 1 of the result. + return static_cast(sycl::sqrt(static_cast(abs(x)))); +#else return static_cast ( ::sqrt (static_cast (abs (x)))); #endif } @@ -3425,6 +3568,8 @@ class ArithTraits { using std::cbrtl; using std::abs; return static_cast ( cbrtl (static_cast (abs (x)))); +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL) + return static_cast(sycl::cbrt(static_cast(abs(x)))); #else return static_cast ( ::cbrt (static_cast (abs (x)))); #endif @@ -3555,6 +3700,8 @@ class ArithTraits { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST using std::sqrt; return static_cast ( sqrt (static_cast (x))); +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL) + return static_cast(sycl::sqrt(static_cast(x))); #else return static_cast ( ::sqrt (static_cast (x))); #endif @@ -3563,6 +3710,8 @@ class ArithTraits { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST using std::cbrtl; return static_cast ( cbrtl (static_cast (x))); +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL) + return static_cast(sycl::cbrt(static_cast(x))); #else return static_cast ( ::cbrt (static_cast (x))); #endif @@ -3700,10 +3849,18 @@ struct ArithTraits return ::pow(x,y); } static inline val_type sqrt (const val_type& x) { - return ::sqrt (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::sqrt(x); +#else + return ::sqrt(x); +#endif } static inline val_type cbrt (const val_type& x) { - return ::cbrt (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::cbrt(x); +#else + return ::cbrt(x); +#endif } static inline val_type exp (const val_type& x) { return ::exp (x); @@ -3740,7 +3897,11 @@ struct ArithTraits return ::acos (x); } static KOKKOS_FORCEINLINE_FUNCTION val_type atan (const val_type x) { - return ::atan (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::atan(x); +#else + return ::atan(x); +#endif } static inline val_type nan () { return val_type::_nan; @@ -3801,7 +3962,11 @@ struct ArithTraits } static std::string name () { return "dd_real"; } static val_type squareroot (const val_type& x) { - return ::sqrt (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::sqrt(x); +#else + return ::sqrt(x); +#endif } }; @@ -3852,10 +4017,18 @@ struct ArithTraits return ::pow (x, y); } static inline val_type sqrt (const val_type& x) { - return ::sqrt (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::sqrt(x); +#else + return ::sqrt(x); +#endif } static inline val_type cbrt (const val_type& x) { - return ::cbrt (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::cbrt(x); +#else + return ::cbrt(x); +#endif } static inline val_type exp (const val_type& x) { return ::exp (x); @@ -3892,7 +4065,11 @@ struct ArithTraits return ::acos (x); } static KOKKOS_FORCEINLINE_FUNCTION val_type atan (const val_type x) { - return ::atan (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::atan(x); +#else + return ::atan(x); +#endif } static inline val_type nan () { return val_type::_nan; @@ -3957,7 +4134,11 @@ struct ArithTraits } static std::string name () { return "qd_real"; } static val_type squareroot (const val_type& x) { - return ::sqrt (x); +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL + return sycl::sqrt(x); +#else + return ::sqrt(x); +#endif } }; #endif // HAVE_KOKKOS_QD diff --git a/src/common/KokkosKernels_BitUtils.hpp b/src/common/KokkosKernels_BitUtils.hpp index c845e37c53..7c343ff5a4 100644 --- a/src/common/KokkosKernels_BitUtils.hpp +++ b/src/common/KokkosKernels_BitUtils.hpp @@ -46,6 +46,10 @@ #define _KOKKOSKERNELS_BITUTILS_HPP #include "Kokkos_Core.hpp" +#if defined (KOKKOS_COMPILER_MSVC) +#include +#endif + namespace KokkosKernels{ namespace Impl{ @@ -203,6 +207,36 @@ int pop_count( long long i ){ return __popcnt8(i); } +#elif defined (KOKKOS_COMPILER_MSVC) +KOKKOS_FORCEINLINE_FUNCTION +int pop_count( unsigned i ){ + return __popcnt(i); +} +KOKKOS_FORCEINLINE_FUNCTION +int pop_count( unsigned long i ){ + return __popcnt(i); +} + +KOKKOS_FORCEINLINE_FUNCTION +int pop_count( unsigned long long i ){ + return __popcnt64(i); +} + +KOKKOS_FORCEINLINE_FUNCTION +int pop_count(int i ){ + return __popcnt(i); +} + +KOKKOS_FORCEINLINE_FUNCTION +int pop_count( long i ){ + return __popcnt(i); +} + +KOKKOS_FORCEINLINE_FUNCTION +int pop_count( long long i ){ + return __popcnt64(i); +} + #else #error "Popcount function is not defined for this compiler. Please report this with the compiler you are using to KokkosKernels." #endif @@ -328,6 +362,35 @@ int least_set_bit( long long i ){ return __builtin_ffsll(i); } +#elif defined (KOKKOS_COMPILER_MSVC) +KOKKOS_FORCEINLINE_FUNCTION +int least_set_bit( unsigned i ){ + return __lzcnt(i); +} +KOKKOS_FORCEINLINE_FUNCTION +int least_set_bit( unsigned long i ){ + return __lzcnt(i); +} + +KOKKOS_FORCEINLINE_FUNCTION +int least_set_bit( unsigned long long i ){ + return __lzcnt64(i); +} + +KOKKOS_FORCEINLINE_FUNCTION +int least_set_bit( int i ){ + return __lzcnt(i); +} +KOKKOS_FORCEINLINE_FUNCTION +int least_set_bit( long i ){ + return __lzcnt(i); +} + +KOKKOS_FORCEINLINE_FUNCTION +int least_set_bit( long long i ){ + return __lzcnt64(i); +} + #else #error "least_set_bit function is not defined for this compiler. Please report this with the compiler you are using to KokkosKernels." #endif diff --git a/src/common/KokkosKernels_IOUtils.hpp b/src/common/KokkosKernels_IOUtils.hpp index b74834db5f..351480c3a3 100644 --- a/src/common/KokkosKernels_IOUtils.hpp +++ b/src/common/KokkosKernels_IOUtils.hpp @@ -422,11 +422,13 @@ struct Edge{ //////////////////////////////////////////////////////////////////////////////// inline size_t kk_get_file_size(const char* file) { - struct stat stat_buf; + // struct stat stat_buf; #ifdef _WIN32 + struct _stat stat_buf; int retval = _stat(file, &stat_buf); #else + struct stat stat_buf; int retval = stat(file, &stat_buf); #endif diff --git a/src/graph/KokkosGraph_Distance1Color.hpp b/src/graph/KokkosGraph_Distance1Color.hpp index 2e9a4bc03d..f33d6b757f 100644 --- a/src/graph/KokkosGraph_Distance1Color.hpp +++ b/src/graph/KokkosGraph_Distance1Color.hpp @@ -73,8 +73,12 @@ void graph_color_symbolic( gch->set_tictoc(handle->get_verbose()); - color_view_type colors_out = color_view_type("Graph Colors", num_rows); - + color_view_type colors_out; + if(gch->get_vertex_colors().use_count() > 0){ + colors_out = gch->get_vertex_colors(); + } else { + colors_out = color_view_type("Graph Colors", num_rows); + } typedef typename Impl::GraphColor BaseGraphColoring; diff --git a/src/graph/KokkosGraph_Distance1ColorHandle.hpp b/src/graph/KokkosGraph_Distance1ColorHandle.hpp index 54a9b6db5b..826d0da962 100644 --- a/src/graph/KokkosGraph_Distance1ColorHandle.hpp +++ b/src/graph/KokkosGraph_Distance1ColorHandle.hpp @@ -158,6 +158,10 @@ class GraphColoringHandle nnz_lno_persistent_work_view_t lower_triangle_src; nnz_lno_persistent_work_view_t lower_triangle_dst; + bool use_vtx_list; + nnz_lno_temp_work_view_t vertex_list; + size_type vertex_list_size; + color_view_t vertex_colors; bool is_coloring_called_before; nnz_lno_t num_colors; @@ -189,7 +193,7 @@ class GraphColoringHandle overall_coloring_time_phase5(0), coloring_time(0), num_phases(0), size_of_edge_list(0), lower_triangle_src(), lower_triangle_dst(), - vertex_colors(), is_coloring_called_before(false), num_colors(0) + use_vtx_list(false), vertex_colors(), is_coloring_called_before(false), num_colors(0) { this->choose_default_algorithm(); this->set_defaults(this->coloring_algorithm_type); @@ -647,7 +651,15 @@ class GraphColoringHandle int get_num_phases() const { return this->num_phases;} color_view_t get_vertex_colors() const {return this->vertex_colors;} bool is_coloring_called() const {return this->is_coloring_called_before;} + bool get_use_vtx_list() const {return this->use_vtx_list;} + nnz_lno_temp_work_view_t get_vertex_list() const {return this->vertex_list;} + size_type get_vertex_list_size() const {return this->vertex_list_size;} //setters + void set_vertex_list(nnz_lno_temp_work_view_t vertex_list_, size_type vertex_list_size_){ + this->vertex_list = vertex_list_; + this->vertex_list_size = vertex_list_size_; + this->use_vtx_list = true; + } void set_coloring_algo_type(const ColoringAlgorithm &col_algo){this->coloring_algorithm_type = col_algo;} void set_conflict_list_type(const ConflictList &cl){this->conflict_list_type = cl;} void set_min_reduction_for_conflictlist(const double &min_reduction){this->min_reduction_for_conflictlist = min_reduction;} diff --git a/src/graph/KokkosGraph_Distance2ColorHandle.hpp b/src/graph/KokkosGraph_Distance2ColorHandle.hpp index 4dc7dd7fe7..35402a72ff 100644 --- a/src/graph/KokkosGraph_Distance2ColorHandle.hpp +++ b/src/graph/KokkosGraph_Distance2ColorHandle.hpp @@ -120,6 +120,10 @@ class GraphColorDistance2Handle double overall_coloring_time_phase5; // double coloring_time; // the time that it took to color the graph + bool use_vtx_list; + nnz_lno_temp_work_view_type vertex_list; + size_type vertex_list_size; + int num_phases; // Number of phases used by the coloring algorithm color_view_type vertex_colors; @@ -144,6 +148,7 @@ class GraphColorDistance2Handle , overall_coloring_time_phase4(0) , overall_coloring_time_phase5(0) , coloring_time(0) + , use_vtx_list(false) , num_phases(0) , vertex_colors() , is_coloring_called_before(false) @@ -282,7 +287,16 @@ class GraphColorDistance2Handle bool is_coloring_called() const { return this->is_coloring_called_before; } + bool get_use_vtx_list() const { return this->use_vtx_list; } + nnz_lno_temp_work_view_type get_vertex_list() const { return this->vertex_list; } + size_type get_vertex_list_size() const { return this->vertex_list_size; } + // setters + void set_vertex_list(nnz_lno_temp_work_view_type vertex_list_, size_type vertex_list_size_){ + this->vertex_list = vertex_list_; + this->vertex_list_size = vertex_list_size_; + this->use_vtx_list = true; + } void set_coloring_called() { this->is_coloring_called_before = true; } void set_coloring_algo_type(const GraphColoringAlgorithmDistance2& col_algo) { this->coloring_algorithm_type = col_algo; } diff --git a/src/graph/impl/KokkosGraph_Distance1Color_impl.hpp b/src/graph/impl/KokkosGraph_Distance1Color_impl.hpp index 3adda031df..22ca44cc11 100644 --- a/src/graph/impl/KokkosGraph_Distance1Color_impl.hpp +++ b/src/graph/impl/KokkosGraph_Distance1Color_impl.hpp @@ -367,11 +367,17 @@ class GraphColor_VB:public GraphColor nv); - - //init vertexList sequentially. - Kokkos::parallel_for("KokkosGraph::GraphColoring::InitList", - my_exec_space(0, this->nv), functorInitList (current_vertexList)); - + nnz_lno_t current_vertexListLength = this->nv; + + if(this->cp->get_use_vtx_list()){ + //get the vertexList from the color handle, if it exists. + current_vertexList = this->cp->get_vertex_list(); + current_vertexListLength = this->cp->get_vertex_list_size(); + } else { + //init vertexList sequentially. + Kokkos::parallel_for("KokkosGraph::GraphColoring::InitList", + my_exec_space(0, this->nv), functorInitList (current_vertexList)); + } // the next iteration's conflict list nnz_lno_temp_work_view_t next_iteration_recolorList; @@ -388,7 +394,6 @@ class GraphColor_VB:public GraphColor nv; - nnz_lno_t current_vertexListLength = this->nv; double t, total=0.0; @@ -2310,7 +2315,7 @@ class GraphColor_EB:public GraphColor nv); //initialized with zero. //initialize colors, color bans Kokkos::parallel_for ("KokkosGraph::GraphColoring::initColors", - my_exec_space (0, this->nv) , init_colors (kok_colors, color_ban, numInitialColors)); + my_exec_space (0, this->nv) , init_colors (kok_colors, color_ban, numInitialColors, color_set)); //std::cout << "nv:" << this->nv << " init_colors" << std::endl; //worklist @@ -2521,23 +2526,27 @@ class GraphColor_EB:public GraphColor 0){ + color_t colorsize = sizeof(color_t) * 8 - 1; + color_set(ii) = (kokcolors(ii) - 1) / colorsize; + kokcolors(ii) = 1 << ((kokcolors(ii) - 1) % colorsize); + } color_ban(ii) = color_ban_init_val; } }; diff --git a/src/graph/impl/KokkosGraph_Distance2Color_impl.hpp b/src/graph/impl/KokkosGraph_Distance2Color_impl.hpp index 60030dcaac..72a617dc4b 100644 --- a/src/graph/impl/KokkosGraph_Distance2Color_impl.hpp +++ b/src/graph/impl/KokkosGraph_Distance2Color_impl.hpp @@ -191,7 +191,12 @@ class GraphColorDistance2 { //Delegate to different coloring functions, depending on algorithm using_edge_filtering = false; - color_view_type colors_out("Graph Colors", this->nr); + color_view_type colors_out; + if(gc_handle->get_vertex_colors().use_count() > 0){ + colors_out = gc_handle->get_vertex_colors(); + } else { + colors_out = color_view_type("Graph Colors", this->nr); + } switch(this->gc_handle->get_coloring_algo_type()) { case COLORING_D2_VB_BIT_EF: @@ -244,9 +249,16 @@ class GraphColorDistance2 lno_view_t current_vertexList( Kokkos::ViewAllocateWithoutInitializing("vertexList"), this->nr); - // init conflictlist sequentially. - Kokkos::parallel_for("InitList", range_policy_type(0, this->nr), functorInitList(current_vertexList)); - + lno_t current_vertexListLength = this->nr; + + if(this->gc_handle->get_use_vtx_list()){ + //init conflict list from coloring handle + current_vertexList = this->gc_handle->get_vertex_list(); + current_vertexListLength = this->gc_handle->get_vertex_list_size(); + } else { + // init conflictlist sequentially. + Kokkos::parallel_for("InitList", range_policy_type(0, this->nr), functorInitList(current_vertexList)); + } // Next iteratons's conflictList lno_view_t next_iteration_recolorList(Kokkos::ViewAllocateWithoutInitializing("recolorList"), this->nr); @@ -255,7 +267,6 @@ class GraphColorDistance2 lno_t numUncolored = this->nr; lno_t numUncoloredPreviousIter = this->nr + 1; - lno_t current_vertexListLength = this->nr; double time; double total_time = 0.0; @@ -445,7 +456,7 @@ class GraphColorDistance2 break; } } - if(color) + if(color && (colors(v) == 0 || colors(v) == CONFLICTED || colors(v) == UNCOLORABLE)) { //Color v colors(v) = color; @@ -466,7 +477,7 @@ class GraphColorDistance2 } } } - else + else if (colors(v) == 0 || colors(v) == CONFLICTED || colors(v) == UNCOLORABLE) { colors(v) = UNCOLORABLE; } @@ -737,6 +748,31 @@ class GraphColorDistance2 lno_t vertsPerThread = 1; lno_t workBatches = (currentWork + vertsPerThread - 1) / vertsPerThread; timer.reset(); + //if still using this color set, refresh forbidden. + //This avoids using too many colors, by relying on forbidden from before previous conflict resolution (which is now stale). + //Refreshing forbidden before conflict resolution ensures that previously-colored vertices do not get recolored. + switch(batch) + { + case 1: + Kokkos::parallel_for("NB D2 Forbidden", range_policy_type(0, numCols), + NB_RefreshForbidden<1>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); + break; + case 2: + Kokkos::parallel_for("NB D2 Forbidden", range_policy_type(0, numCols), + NB_RefreshForbidden<2>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); + break; + case 4: + Kokkos::parallel_for("NB D2 Forbidden", range_policy_type(0, numCols), + NB_RefreshForbidden<4>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); + break; + case 8: + Kokkos::parallel_for("NB D2 Forbidden", range_policy_type(0, numCols), + NB_RefreshForbidden<8>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); + break; + default:; + } + forbiddenTime += timer.seconds(); + timer.reset(); switch(batch) { case 1: @@ -788,33 +824,6 @@ class GraphColorDistance2 NB_Worklist(colors_out, worklist, worklen, numVerts), currentWork); worklistTime += timer.seconds(); timer.reset(); - //if still using this color set, refresh forbidden. - //This avoids using too many colors, by relying on forbidden from before conflict resolution (which is now stale). - if(currentWork) - { - switch(batch) - { - case 1: - Kokkos::parallel_for("NB D2 Forbidden", range_policy_type(0, numCols), - NB_RefreshForbidden<1>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); - break; - case 2: - Kokkos::parallel_for("NB D2 Forbidden", range_policy_type(0, numCols), - NB_RefreshForbidden<2>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); - break; - case 4: - Kokkos::parallel_for("NB D2 Forbidden", range_policy_type(0, numCols), - NB_RefreshForbidden<4>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); - break; - case 8: - Kokkos::parallel_for("NB D2 Forbidden", range_policy_type(0, numCols), - NB_RefreshForbidden<8>(colorBase, forbidden, colors_out, this->t_xadj, this->t_adj, numVerts)); - break; - default:; - } - forbiddenTime += timer.seconds(); - timer.reset(); - } iter++; } //Will need to run with a different color base, so rebuild the work list diff --git a/src/sparse/KokkosSparse_CrsMatrix.hpp b/src/sparse/KokkosSparse_CrsMatrix.hpp index d734d9ac3a..3ce574602c 100644 --- a/src/sparse/KokkosSparse_CrsMatrix.hpp +++ b/src/sparse/KokkosSparse_CrsMatrix.hpp @@ -554,7 +554,37 @@ class CrsMatrix { OrdinalType* rowmap, OrdinalType* cols) { - ctor_impl (label, nrows, ncols, annz, val, rowmap, cols); + using Kokkos::Unmanaged; + using HostRowmap = Kokkos::View; + using UnmanagedRowmap = Kokkos::View>; + using UnmanagedEntries = Kokkos::View>; + using UnmanagedValues = Kokkos::View>; + //Allocate device rowmap, entries, values views + typename row_map_type::non_const_type rowmapDevice(Kokkos::ViewAllocateWithoutInitializing("rowmap"), nrows + 1); + index_type entriesDevice(Kokkos::ViewAllocateWithoutInitializing("entries"), annz); + //given rowmap in ordinal_type, so may need to convert to size_type explicitly + HostRowmap rowmapConverted; + UnmanagedRowmap rowmapRaw; + if(!std::is_same::value) + { + rowmapConverted = HostRowmap(Kokkos::ViewAllocateWithoutInitializing("rowmap raw"), nrows + 1); + for(OrdinalType i = 0; i <= nrows; i++) + rowmapConverted(i) = rowmap[i]; + rowmapRaw = rowmapConverted; + } + else + { + rowmapRaw = UnmanagedRowmap((const SizeType*) rowmap, nrows + 1); + } + Kokkos::deep_copy(rowmapDevice, rowmapRaw); + UnmanagedEntries entriesRaw(cols, annz); + Kokkos::deep_copy(entriesDevice, entriesRaw); + //Construct graph and populate all members + this->numCols_ = ncols; + this->graph = StaticCrsGraphType(entriesDevice, rowmapDevice); + this->values = values_type(Kokkos::ViewAllocateWithoutInitializing("values"), annz); + UnmanagedValues valuesRaw(val, annz); + Kokkos::deep_copy(this->values, valuesRaw); // FIXME (mfh 09 Aug 2013) Specialize this on the Device type. // Only use cuSPARSE for the Cuda Device. @@ -646,15 +676,6 @@ class CrsMatrix { #endif // KOKKOS_USE_CUSPARSE } - void - ctor_impl (const std::string &label, - const OrdinalType nrows, - const OrdinalType ncols, - const size_type annz, - ScalarType* val, - OrdinalType* rows, - OrdinalType* cols); - KOKKOS_INLINE_FUNCTION OrdinalType sumIntoValues (const OrdinalType rowi, @@ -883,50 +904,5 @@ class CrsMatrix { ordinal_type numCols_; }; -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -template< typename ScalarType , typename OrdinalType, class Device, class MemoryTraits, typename SizeType > -void -CrsMatrix:: -ctor_impl (const std::string &label, - const OrdinalType nrows, - const OrdinalType ncols, - const size_type annz, - ScalarType* val, - OrdinalType* rows, - OrdinalType* cols) -{ - std::string str = label; - values = values_type (str.append (".values"), annz); - - numCols_ = ncols; - - // FIXME (09 Aug 2013) CrsArray only takes std::vector for now. - // We'll need to fix that. - std::vector row_lengths (nrows, 0); - - // FIXME (mfh 21 Jun 2013) This calls for a parallel_for kernel. - for (OrdinalType i = 0; i < nrows; ++i) { - row_lengths[i] = rows[i + 1] - rows[i]; - } - - graph = Kokkos::create_staticcrsgraph (str.append (".graph"), row_lengths); - typename values_type::HostMirror h_values = Kokkos::create_mirror_view (values); - typename index_type::HostMirror h_entries = Kokkos::create_mirror_view (graph.entries); - - // FIXME (mfh 21 Jun 2013) This needs to be a parallel copy. - // Furthermore, why are the arrays copied twice? -- once here, to a - // host view, and once below, in the deep copy? - for (size_type i = 0; i < annz; ++i) { - if (val) { - h_values(i) = val[i]; - } - h_entries(i) = cols[i]; - } - - Kokkos::deep_copy (values, h_values); - Kokkos::deep_copy (graph.entries, h_entries); -} } #endif diff --git a/src/sparse/impl/KokkosSparse_spiluk_symbolic_impl.hpp b/src/sparse/impl/KokkosSparse_spiluk_symbolic_impl.hpp index 89c6f81a2c..630a617f38 100644 --- a/src/sparse/impl/KokkosSparse_spiluk_symbolic_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spiluk_symbolic_impl.hpp @@ -51,6 +51,7 @@ #include #include #include +#include //#define SYMBOLIC_OUTPUT_INFO @@ -171,49 +172,32 @@ void iluk_symbolic ( IlukHandle& thandle, { // Scheduling and symbolic phase currently compute on host - need host copy of all views - typedef typename ARowMapType::HostMirror AHostRowMapType; - typedef typename AEntriesType::HostMirror AHostEntriesType; - typedef typename LRowMapType::HostMirror LHostRowMapType; - typedef typename LEntriesType::HostMirror LHostEntriesType; - typedef typename URowMapType::HostMirror UHostRowMapType; - typedef typename UEntriesType::HostMirror UHostEntriesType; - typedef typename IlukHandle::size_type size_type; typedef typename IlukHandle::nnz_lno_t nnz_lno_t; typedef typename IlukHandle::nnz_lno_view_t HandleDeviceEntriesType; - typedef typename IlukHandle::nnz_lno_view_t::HostMirror HandleHostEntriesType; - typedef typename IlukHandle::nnz_row_view_t HandleDeviceRowMapType; - typedef typename IlukHandle::nnz_row_view_t::HostMirror HandleHostRowMapType; //typedef typename IlukHandle::signed_integral_t signed_integral_t; size_type nrows = thandle.get_nrows(); - AHostRowMapType A_row_map = Kokkos::create_mirror_view(A_row_map_d); - Kokkos::deep_copy(A_row_map, A_row_map_d); - - AHostEntriesType A_entries = Kokkos::create_mirror_view(A_entries_d); - Kokkos::deep_copy(A_entries, A_entries_d); + auto A_row_map = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), A_row_map_d ); + auto A_entries = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), A_entries_d ); + auto L_row_map = Kokkos::create_mirror_view(Kokkos::HostSpace(), L_row_map_d); + auto L_entries = Kokkos::create_mirror_view(Kokkos::HostSpace(), L_entries_d); + auto U_row_map = Kokkos::create_mirror_view(Kokkos::HostSpace(), U_row_map_d); + auto U_entries = Kokkos::create_mirror_view(Kokkos::HostSpace(), U_entries_d); - LHostRowMapType L_row_map = Kokkos::create_mirror_view(L_row_map_d); - LHostEntriesType L_entries = Kokkos::create_mirror_view(L_entries_d); - UHostRowMapType U_row_map = Kokkos::create_mirror_view(U_row_map_d); - UHostEntriesType U_entries = Kokkos::create_mirror_view(U_entries_d); - HandleDeviceRowMapType dlevel_list = thandle.get_level_list(); - HandleHostRowMapType level_list = Kokkos::create_mirror_view(dlevel_list); - Kokkos::deep_copy(level_list, dlevel_list); - + auto level_list = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), dlevel_list ); + HandleDeviceEntriesType dlevel_ptr = thandle.get_level_ptr(); - HandleHostEntriesType level_ptr = Kokkos::create_mirror_view(dlevel_ptr); - Kokkos::deep_copy(level_ptr, dlevel_ptr); + auto level_ptr = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), dlevel_ptr ); HandleDeviceEntriesType dlevel_idx = thandle.get_level_idx(); - HandleHostEntriesType level_idx = Kokkos::create_mirror_view(dlevel_idx); - Kokkos::deep_copy(level_idx, dlevel_idx); - + auto level_idx = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), dlevel_idx ); + size_type nlev = 0; //Level scheduling on A??? @@ -358,6 +342,18 @@ void iluk_symbolic ( IlukHandle& thandle, thandle.set_nnzL(cntL); thandle.set_nnzU(cntU); + // Sort + for (size_type row_id = 0; row_id < static_cast(L_row_map.extent(0))-1; row_id++) { + size_type row_start = L_row_map(row_id); + size_type row_end = L_row_map(row_id + 1); + Kokkos::sort(subview(L_entries, Kokkos::make_pair(row_start, row_end))); + } + for (size_type row_id = 0; row_id < static_cast(U_row_map.extent(0))-1; row_id++) { + size_type row_start = U_row_map(row_id); + size_type row_end = U_row_map(row_id + 1); + Kokkos::sort(subview(U_entries, Kokkos::make_pair(row_start, row_end))); + } + //Level scheduling on L level_sched (thandle, L_row_map, L_entries, nrows, level_list, level_ptr, level_idx, nlev); diff --git a/src/sparse/impl/KokkosSparse_spmv_impl.hpp b/src/sparse/impl/KokkosSparse_spmv_impl.hpp index f06e2fb9d9..1061d60d9a 100644 --- a/src/sparse/impl/KokkosSparse_spmv_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spmv_impl.hpp @@ -386,8 +386,6 @@ spmv_beta_no_transpose (const KokkosKernels::Experimental::Controls& controls, } if (dobeta == 0) { y_ptr[i] = alpha*(tmp1 + tmp2 + tmp3 + tmp4); - } else if (dobeta == -1) { - y_ptr[i] -= alpha*(tmp1 + tmp2 + tmp3 + tmp4); } else if (dobeta == 1) { y_ptr[i] += alpha*(tmp1 + tmp2 + tmp3 + tmp4); } else { diff --git a/unit_test/sparse/Test_Sparse_CrsMatrix.hpp b/unit_test/sparse/Test_Sparse_CrsMatrix.hpp index 85b427d445..6caa9d96a1 100644 --- a/unit_test/sparse/Test_Sparse_CrsMatrix.hpp +++ b/unit_test/sparse/Test_Sparse_CrsMatrix.hpp @@ -47,11 +47,15 @@ #include #include #include "KokkosSparse_CrsMatrix.hpp" +#include "Kokkos_ArithTraits.hpp" -#ifndef kokkos_complex_double -#define kokkos_complex_double Kokkos::complex -#define kokkos_complex_float Kokkos::complex -#endif +// #ifndef kokkos_complex_double +// #define kokkos_complex_double Kokkos::complex +// #define kokkos_complex_float Kokkos::complex +// #endif + +typedef Kokkos::complex kokkos_complex_double; +typedef Kokkos::complex kokkos_complex_float; namespace Test{ // anonymous @@ -189,6 +193,40 @@ testCrsMatrix () //printf ("A is %d by %d\n", A.numRows (), A.numCols ()); } +template +void +testCrsMatrixRawConstructor() +{ + int nrows = 5; + //note: last 2 columns will be empty. + //This makes sure the ncols provided to constructor is preserved. + int ncols = 7; + int nnz = 9; + //NOTE: this is not a mistake, the raw ptr constructor takes rowmap as ordinal. + std::vector rowmap = {0, 0, 2, 5, 6, 9}; + std::vector entries = {3, 4, 0, 1, 2, 2, 0, 3, 4}; + std::vector values; + for(int i = 0; i < nnz; i++) + values.push_back(Kokkos::ArithTraits::one() * (1.0 * rand() / RAND_MAX)); + KokkosSparse::CrsMatrix A( + "A", nrows, ncols, nnz, values.data(), rowmap.data(), entries.data()); + EXPECT_EQ(A.numRows(), nrows); + EXPECT_EQ(A.numCols(), ncols); + EXPECT_EQ(A.nnz(), nnz); + //verify rowmap, entries, values: should all be identical to original raw arrays + //(except the rowmap elements are now size_type) + auto checkRowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); + auto checkEntries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); + auto checkValues = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.values); + for(int i = 0; i < nrows + 1; i++) + EXPECT_EQ(checkRowmap(i), (size_type) rowmap[i]); + for(int i = 0; i < nnz; i++) + { + EXPECT_EQ(checkEntries(i), entries[i]); + EXPECT_EQ(checkValues(i), values[i]); + } +} + template void testCrsMatrixHostMirror () @@ -226,6 +264,7 @@ testCrsMatrixHostMirror () #define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ TEST_F( TestCategory, sparse ## _ ## crsmatrix ## _ ## SCALAR ## _ ## ORDINAL ## _ ## OFFSET ## _ ## DEVICE ) { \ testCrsMatrix (); \ + testCrsMatrixRawConstructor (); \ } \ TEST_F( TestCategory, sparse ## _ ## crsmatrix_host_mirror ## _ ## SCALAR ## _ ## ORDINAL ## _ ## OFFSET ## _ ## DEVICE ) { \ testCrsMatrixHostMirror (); \ @@ -329,4 +368,4 @@ TEST_F( TestCategory, sparse ## _ ## crsmatrix_host_mirror ## _ ## SCALAR ## _ # EXECUTE_TEST(kokkos_complex_float, int64_t, size_t, TestExecSpace) #endif - +#undef EXECUTE_TEST diff --git a/unit_test/sparse/Test_Sparse_spmv.hpp b/unit_test/sparse/Test_Sparse_spmv.hpp index 5a033fdf34..5d1b630561 100644 --- a/unit_test/sparse/Test_Sparse_spmv.hpp +++ b/unit_test/sparse/Test_Sparse_spmv.hpp @@ -359,7 +359,7 @@ Kokkos::complex randomUpperBound>(int mag) } template -void test_spmv(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_size_variance){ +void test_spmv(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_size_variance, bool heavy){ typedef typename KokkosSparse::CrsMatrix crsMat_t; typedef typename crsMat_t::values_type::non_const_type scalar_view_t; @@ -390,24 +390,40 @@ void test_spmv(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_size_vari Kokkos::fill_random(input_xt,rand_pool,randomUpperBound(10)); Kokkos::fill_random(output_yt,rand_pool,randomUpperBound(10)); - std::vector nonTransModes = {'N', 'C'}; - std::vector transModes = {'T', 'H'}; + std::vector nonTransModes = {'N'}; + std::vector transModes = {'T'}; + std::vector testAlphaBeta = {0.0, 1.0}; + if(heavy) + { + nonTransModes.push_back('C'); + transModes.push_back('H'); + testAlphaBeta.push_back(-1.0); + testAlphaBeta.push_back(2.5); + } for(auto mode : nonTransModes) { - Test::check_spmv(input_mat, input_x, output_y, 1.0, 0.0, mode); - Test::check_spmv(input_mat, input_x, output_y, 0.0, 1.0, mode); - Test::check_spmv(input_mat, input_x, output_y, 1.0, 1.0, mode); + for(double alpha : testAlphaBeta) + { + for(double beta : testAlphaBeta) + { + Test::check_spmv(input_mat, input_x, output_y, alpha, beta, mode); + } + } } for(auto mode : transModes) { - Test::check_spmv(input_mat, input_xt, output_yt, 1.0, 0.0, mode); - Test::check_spmv(input_mat, input_xt, output_yt, 0.0, 1.0, mode); - Test::check_spmv(input_mat, input_xt, output_yt, 1.0, 1.0, mode); + for(double alpha : testAlphaBeta) + { + for(double beta : testAlphaBeta) + { + Test::check_spmv(input_mat, input_xt, output_yt, alpha, beta, mode); + } + } } } template -void test_spmv_mv(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_size_variance, int numMV){ +void test_spmv_mv(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_size_variance, bool heavy, int numMV){ lno_t numCols = numRows; typedef typename KokkosSparse::CrsMatrix crsMat_t; @@ -435,19 +451,35 @@ void test_spmv_mv(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_size_v Kokkos::deep_copy(b_y_copy, b_y); Kokkos::deep_copy(b_yt_copy, b_yt); - std::vector nonTransModes = {'N', 'C'}; - std::vector transModes = {'T', 'H'}; + std::vector nonTransModes = {'N'}; + std::vector transModes = {'T'}; + std::vector testAlphaBeta = {0.0, 1.0}; + if(heavy) + { + nonTransModes.push_back('C'); + transModes.push_back('H'); + testAlphaBeta.push_back(-1.0); + testAlphaBeta.push_back(2.5); + } for(auto mode : nonTransModes) { - Test::check_spmv_mv(input_mat, b_x, b_y, b_y_copy, 1.0, 0.0, numMV, mode); - Test::check_spmv_mv(input_mat, b_x, b_y, b_y_copy, 0.0, 1.0, numMV, mode); - Test::check_spmv_mv(input_mat, b_x, b_y, b_y_copy, 1.0, 1.0, numMV, mode); + for(double alpha : testAlphaBeta) + { + for(double beta : testAlphaBeta) + { + Test::check_spmv_mv(input_mat, b_x, b_y, b_y_copy, alpha, beta, numMV, mode); + } + } } for(auto mode : transModes) { - Test::check_spmv_mv(input_mat, b_xt, b_yt, b_yt_copy, 1.0, 0.0, numMV, mode); - Test::check_spmv_mv(input_mat, b_xt, b_yt, b_yt_copy, 0.0, 1.0, numMV, mode); - Test::check_spmv_mv(input_mat, b_xt, b_yt, b_yt_copy, 1.0, 1.0, numMV, mode); + for(double alpha : testAlphaBeta) + { + for(double beta : testAlphaBeta) + { + Test::check_spmv_mv(input_mat, b_xt, b_yt, b_yt_copy, alpha, beta, numMV, mode); + } + } } } @@ -477,7 +509,19 @@ void test_spmv_mv_heavy(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_ Test::check_spmv_mv(input_mat, b_x, b_y, b_y_copy, 1.0, 1.0, nv, 'N'); Test::check_spmv_mv(input_mat, b_x, b_y, b_y_copy, 1.0, 0.0, nv, 'T'); Test::check_spmv_mv(input_mat, b_x, b_y, b_y_copy, 0.0, 1.0, nv, 'T'); - Test::check_spmv_mv(input_mat, b_x, b_y, b_y_copy, 1.0, 1.0, nv, 'T'); + //Testing all modes together, since matrix is square + std::vector modes = {'N', 'C', 'T', 'H'}; + std::vector testAlphaBeta = {0.0, 1.0, -1.0, 2.5}; + for(auto mode : modes) + { + for(double alpha : testAlphaBeta) + { + for(double beta : testAlphaBeta) + { + Test::check_spmv_mv(input_mat, b_x, b_y, b_y_copy, alpha, beta, nv, mode); + } + } + } } } @@ -836,17 +880,23 @@ TEST_F( TestCategory,sparse ## _ ## spmv_issue_101 ## _ ## OFFSET ## _ ## DEVICE #define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ TEST_F( TestCategory,sparse ## _ ## spmv ## _ ## SCALAR ## _ ## ORDINAL ## _ ## OFFSET ## _ ## DEVICE ) { \ - test_spmv (50000, 50000 * 30, 200, 10); \ - test_spmv (50000, 50000 * 30, 100, 10); \ - test_spmv (10000, 10000 * 20, 100, 5); \ + test_spmv (1000, 1000 * 30, 200, 10, true); \ + test_spmv (1000, 1000 * 30, 100, 10, true); \ + test_spmv (1000, 1000 * 20, 100, 5, true); \ + test_spmv (50000, 50000 * 30, 200, 10, false); \ + test_spmv (50000, 50000 * 30, 100, 10, false); \ + test_spmv (10000, 10000 * 20, 100, 5, false); \ test_spmv_controls (10000, 10000 * 20, 100, 5); \ } #define EXECUTE_TEST_MV(SCALAR, ORDINAL, OFFSET, LAYOUT, DEVICE) \ TEST_F( TestCategory,sparse ## _ ## spmv_mv ## _ ## SCALAR ## _ ## ORDINAL ## _ ## OFFSET ## _ ## LAYOUT ## _ ## DEVICE ) { \ - test_spmv_mv (50000, 50000 * 30, 100, 10, 5); \ - test_spmv_mv (50000, 50000 * 30, 200, 10, 1); \ - test_spmv_mv (10000, 10000 * 20, 100, 5, 10); \ + test_spmv_mv (1000, 1000 * 30, 200, 10, true, 1); \ + test_spmv_mv (1000, 1000 * 30, 100, 10, true, 5); \ + test_spmv_mv (1000, 1000 * 20, 100, 5, true, 10); \ + test_spmv_mv (50000, 50000 * 30, 200, 10, false, 1); \ + test_spmv_mv (50000, 50000 * 30, 100, 10, false, 5); \ + test_spmv_mv (10000, 10000 * 20, 100, 5, false, 10); \ test_spmv_mv_heavy (200, 200 * 10, 60, 4, 30); \ }