Skip to content

Commit

Permalink
Merge pull request #1177 from ndellingwood/fix-match-1171
Browse files Browse the repository at this point in the history
using namespace should be scoped to prevent name clashes
  • Loading branch information
ndellingwood authored Nov 8, 2021
2 parents 92d1e94 + 9fb2fa8 commit f171533
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 82 deletions.
93 changes: 47 additions & 46 deletions perf_test/blas/blas3/KokkosBlas3_gemm_perf_test.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ void __do_gemm_serial_batched_template(options_t options,
C = Kokkos::subview(_gemm_args.C, Kokkos::ALL(), Kokkos::ALL(), j);
}

SerialGemm<TransAType, TransBType, AlgoType>::invoke(
KokkosBatched::SerialGemm<TransAType, TransBType, AlgoType>::invoke(
_gemm_args.alpha, A, B, _gemm_args.beta, C);
}
}
Expand Down Expand Up @@ -446,8 +446,8 @@ template <class scalar_type, class vta, class vtb, class vtc, class device_type,
void __do_gemm_serial_batched(options_t options, gemm_args_t gemm_args) {
char a = toupper(gemm_args.transA);
char b = toupper(gemm_args.transB);
using N = Trans::NoTranspose;
using T = Trans::Transpose;
using N = KokkosBatched::Trans::NoTranspose;
using T = KokkosBatched::Trans::Transpose;
// using C = Trans::ConjTranspose;

STATUS;
Expand Down Expand Up @@ -480,12 +480,13 @@ template <class algo_tag, class blocking_type, class device_type,
class algo_mode = void>
void __do_gemm_parallel_batched_heuristic_template(options_t options,
gemm_args_t gemm_args) {
BatchedGemmHandle batchedGemmHandle(BaseHeuristicAlgos::SQUARE);
KokkosBatched::BatchedGemmHandle batchedGemmHandle(KokkosBatched::BaseHeuristicAlgos::SQUARE);
char a = toupper(gemm_args.transA);
char b = toupper(gemm_args.transB);
using N = Trans::NoTranspose;
using T = Trans::Transpose;
// using C = Trans::ConjTranspose;
using N = KokkosBatched::Trans::NoTranspose;
using T = KokkosBatched::Trans::Transpose;
// using C = KokkosBatched::Trans::ConjTranspose;
using KokkosBatched::BatchLayout;

STATUS;

Expand Down Expand Up @@ -915,9 +916,9 @@ template <class algo_tag, class blocking_type, class device_type,
void __do_gemm_parallel_batched(options_t options, gemm_args_t gemm_args) {
char a = gemm_args.transA;
char b = gemm_args.transB;
using N = Trans::NoTranspose;
using T = Trans::Transpose;
// using C = Trans::ConjTranspose;
using N = KokkosBatched::Trans::NoTranspose;
using T = KokkosBatched::Trans::Transpose;
// using C = KokkosBatched::Trans::ConjTranspose;

STATUS;

Expand Down Expand Up @@ -1480,8 +1481,8 @@ void __do_gemm_armpl(options_t options, gemm_args_t gemm_args) {
uint32_t warm_up_n = options.warm_up_n;
uint32_t n = options.n;
Kokkos::Timer timer;
char transa = std::is_same<TransAType, Trans::NoTranspose>::value ? 'N' : 'T';
char transb = std::is_same<TransBType, Trans::NoTranspose>::value ? 'N' : 'T';
char transa = std::is_same<TransAType, KokkosBatched::Trans::NoTranspose>::value ? 'N' : 'T';
char transb = std::is_same<TransBType, KokkosBatched::Trans::NoTranspose>::value ? 'N' : 'T';

if (!std::is_same<default_scalar, double>::value)
FATAL_ERROR("only double scalars are supported!");
Expand Down Expand Up @@ -2200,7 +2201,7 @@ void do_gemm_serial_batched(options_t options) {
__do_loop_and_invoke(
options, __do_gemm_serial_batched<default_scalar, view_type_3d,
view_type_3d, view_type_3d,
default_device, Algo::Gemm::Unblocked>);
default_device, KokkosBatched::Algo::Gemm::Unblocked>);
return;
}

Expand All @@ -2209,7 +2210,7 @@ void do_gemm_serial_batched_blocked(options_t options) {
__do_loop_and_invoke(
options, __do_gemm_serial_batched<default_scalar, view_type_3d,
view_type_3d, view_type_3d,
default_device, Algo::Gemm::Blocked>);
default_device, KokkosBatched::Algo::Gemm::Blocked>);
return;
}

Expand All @@ -2232,11 +2233,11 @@ void do_gemm_serial_batched_parallel(options_t options) {
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<SerialBatchDim3Tag, Algo::Gemm::Unblocked,
__do_gemm_parallel_batched<SerialBatchDim3Tag, KokkosBatched::Algo::Gemm::Unblocked,
default_device>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<SerialTag, Algo::Gemm::Unblocked,
options, __do_gemm_parallel_batched<SerialTag, KokkosBatched::Algo::Gemm::Unblocked,
default_device>);
return;
}
Expand All @@ -2246,11 +2247,11 @@ void do_gemm_serial_batched_blocked_parallel(options_t options) {
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<SerialBatchDim3Tag, Algo::Gemm::Blocked,
__do_gemm_parallel_batched<SerialBatchDim3Tag, KokkosBatched::Algo::Gemm::Blocked,
default_device>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<SerialTag, Algo::Gemm::Blocked,
options, __do_gemm_parallel_batched<SerialTag, KokkosBatched::Algo::Gemm::Blocked,
default_device>);
return;
}
Expand All @@ -2262,12 +2263,12 @@ void do_gemm_serial_simd_batched_parallel(options_t options) {
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamSimdBatchDim4Tag, Algo::Gemm::Unblocked,
default_device, Mode::Serial>);
__do_gemm_parallel_batched<TeamSimdBatchDim4Tag, KokkosBatched::Algo::Gemm::Unblocked,
default_device, KokkosBatched::Mode::Serial>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<TeamSimdTag, Algo::Gemm::Unblocked,
default_device, Mode::Serial>);
options, __do_gemm_parallel_batched<TeamSimdTag, KokkosBatched::Algo::Gemm::Unblocked,
default_device, KokkosBatched::Mode::Serial>);
return;
}

Expand All @@ -2278,12 +2279,12 @@ void do_gemm_serial_simd_batched_blocked_parallel(options_t options) {
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamSimdBatchDim4Tag, Algo::Gemm::Blocked,
default_device, Mode::Serial>);
__do_gemm_parallel_batched<TeamSimdBatchDim4Tag, KokkosBatched::Algo::Gemm::Blocked,
default_device, KokkosBatched::Mode::Serial>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<TeamSimdTag, Algo::Gemm::Blocked,
default_device, Mode::Serial>);
options, __do_gemm_parallel_batched<TeamSimdTag, KokkosBatched::Algo::Gemm::Blocked,
default_device, KokkosBatched::Mode::Serial>);
return;
}

Expand All @@ -2296,11 +2297,11 @@ void do_gemm_serial_batched_compact_mkl_parallel(options_t options) {
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<SerialSimdBatchDim3Tag,
Algo::Gemm::CompactMKL, default_device>);
KokkosBatched::Algo::Gemm::CompactMKL, default_device>);
else
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<SerialSimdTag, Algo::Gemm::CompactMKL,
__do_gemm_parallel_batched<SerialSimdTag, KokkosBatched::Algo::Gemm::CompactMKL,
default_device>);
return;
}
Expand Down Expand Up @@ -2333,11 +2334,11 @@ void do_gemm_team_batched_parallel(options_t options) {
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamBatchDim3Tag, Algo::Gemm::Unblocked,
__do_gemm_parallel_batched<TeamBatchDim3Tag, KokkosBatched::Algo::Gemm::Unblocked,
default_device>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<TeamTag, Algo::Gemm::Unblocked,
options, __do_gemm_parallel_batched<TeamTag, KokkosBatched::Algo::Gemm::Unblocked,
default_device>);
return;
}
Expand All @@ -2347,11 +2348,11 @@ void do_gemm_team_batched_blocked_parallel(options_t options) {
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamBatchDim3Tag, Algo::Gemm::Blocked,
__do_gemm_parallel_batched<TeamBatchDim3Tag, KokkosBatched::Algo::Gemm::Blocked,
default_device>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<TeamTag, Algo::Gemm::Blocked,
options, __do_gemm_parallel_batched<TeamTag, KokkosBatched::Algo::Gemm::Blocked,
default_device>);
return;
}
Expand All @@ -2362,11 +2363,11 @@ void do_gemm_team_vector_batched_parallel(options_t options) {
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamVectorBatchDim3Tag,
Algo::Gemm::Unblocked, default_device>);
KokkosBatched::Algo::Gemm::Unblocked, default_device>);
else
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamVectorTag, Algo::Gemm::Unblocked,
__do_gemm_parallel_batched<TeamVectorTag, KokkosBatched::Algo::Gemm::Unblocked,
default_device>);
return;
}
Expand All @@ -2376,12 +2377,12 @@ void do_gemm_team_simd_batched_parallel(options_t options) {
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamSimdBatchDim4Tag, Algo::Gemm::Unblocked,
default_device, Mode::Team>);
__do_gemm_parallel_batched<TeamSimdBatchDim4Tag, KokkosBatched::Algo::Gemm::Unblocked,
default_device, KokkosBatched::Mode::Team>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<TeamSimdTag, Algo::Gemm::Unblocked,
default_device, Mode::Team>);
options, __do_gemm_parallel_batched<TeamSimdTag, KokkosBatched::Algo::Gemm::Unblocked,
default_device, KokkosBatched::Mode::Team>);
return;
}

Expand All @@ -2390,28 +2391,28 @@ void do_gemm_team_simd_batched_blocked_parallel(options_t options) {
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamSimdBatchDim4Tag, Algo::Gemm::Blocked,
default_device, Mode::Team>);
__do_gemm_parallel_batched<TeamSimdBatchDim4Tag, KokkosBatched::Algo::Gemm::Blocked,
default_device, KokkosBatched::Mode::Team>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<TeamSimdTag, Algo::Gemm::Blocked,
default_device, Mode::Team>);
options, __do_gemm_parallel_batched<TeamSimdTag, KokkosBatched::Algo::Gemm::Blocked,
default_device, KokkosBatched::Mode::Team>);
return;
}

// Blocked algo not yet implemented for TeamVectorGemm.
/* void do_gemm_team_vector_batched_blocked_parallel(options_t options) {
STATUS;
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<TeamVectorTag, Algo::Gemm::Blocked,
options, __do_gemm_parallel_batched<TeamVectorTag, KokkosBatched::Algo::Gemm::Blocked,
default_device>); return;
} */

void do_gemm_experiment_parallel(options_t options) {
STATUS;
using TransAType = Trans::NoTranspose;
using TransBType = Trans::NoTranspose;
using BlockingType = Algo::Gemm::Unblocked;
using TransAType = KokkosBatched::Trans::NoTranspose;
using TransBType = KokkosBatched::Trans::NoTranspose;
using BlockingType = KokkosBatched::Algo::Gemm::Unblocked;

// __do_loop_and_invoke(
// options, __do_gemm_parallel_experiment1<TransAType, TransBType,
Expand Down
20 changes: 15 additions & 5 deletions perf_test/blas/blas3/KokkosBlas3_trmm_perf_test.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,14 +274,14 @@ void __do_trmm_serial_batched_template(options_t options,
uint32_t warm_up_n = options.warm_up_n;
uint32_t n = options.n;
Kokkos::Timer timer;
using tag = Algo::Trmm::Unblocked;
using tag = KokkosBatched::Algo::Trmm::Unblocked;

for (uint32_t j = 0; j < warm_up_n; ++j) {
for (int i = 0; i < options.start.a.k; ++i) {
auto A = Kokkos::subview(trmm_args.A, i, Kokkos::ALL(), Kokkos::ALL());
auto B = Kokkos::subview(trmm_args.B, i, Kokkos::ALL(), Kokkos::ALL());

SerialTrmm<side, uplo, trans, diag, tag>::invoke(trmm_args.alpha, A, B);
KokkosBatched::SerialTrmm<side, uplo, trans, diag, tag>::invoke(trmm_args.alpha, A, B);
}
// Fence after submitting each batch operation
Kokkos::fence();
Expand All @@ -293,7 +293,7 @@ void __do_trmm_serial_batched_template(options_t options,
auto A = Kokkos::subview(trmm_args.A, i, Kokkos::ALL(), Kokkos::ALL());
auto B = Kokkos::subview(trmm_args.B, i, Kokkos::ALL(), Kokkos::ALL());

SerialTrmm<side, uplo, trans, diag, tag>::invoke(trmm_args.alpha, A, B);
KokkosBatched::SerialTrmm<side, uplo, trans, diag, tag>::invoke(trmm_args.alpha, A, B);
}
// Fence after submitting each batch operation
Kokkos::fence();
Expand All @@ -315,6 +315,11 @@ void __do_trmm_serial_batched(options_t options, trmm_args_t trmm_args) {
__trans = tolower(trmm_args.trans);
//__diag = tolower(diag[0]);

using KokkosBatched::Diag;
using KokkosBatched::Side;
using KokkosBatched::Trans;
using KokkosBatched::Uplo;

STATUS;

//// Lower non-transpose ////
Expand Down Expand Up @@ -482,7 +487,7 @@ struct parallel_batched_trmm {
auto svA = Kokkos::subview(trmm_args_.A, i, Kokkos::ALL(), Kokkos::ALL());
auto svB = Kokkos::subview(trmm_args_.B, i, Kokkos::ALL(), Kokkos::ALL());

SerialTrmm<side, uplo, trans, diag, tag>::invoke(trmm_args_.alpha, svA,
KokkosBatched::SerialTrmm<side, uplo, trans, diag, tag>::invoke(trmm_args_.alpha, svA,
svB);
}
};
Expand All @@ -493,7 +498,7 @@ void __do_trmm_parallel_batched_template(options_t options,
uint32_t warm_up_n = options.warm_up_n;
uint32_t n = options.n;
Kokkos::Timer timer;
using tag = Algo::Trmm::Unblocked;
using tag = KokkosBatched::Algo::Trmm::Unblocked;
using execution_space = typename device_type::execution_space;
using functor_type =
parallel_batched_trmm<side, uplo, trans, diag, tag, execution_space>;
Expand Down Expand Up @@ -530,6 +535,11 @@ void __do_trmm_parallel_batched(options_t options, trmm_args_t trmm_args) {
__trans = tolower(trmm_args.trans);
//__diag = tolower(diag[0]);

using KokkosBatched::Diag;
using KokkosBatched::Side;
using KokkosBatched::Trans;
using KokkosBatched::Uplo;

STATUS;

//// Lower non-transpose ////
Expand Down
15 changes: 10 additions & 5 deletions perf_test/blas/blas3/KokkosBlas_trtri_perf_test.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,13 +244,13 @@ void __do_trtri_serial_batched_template(options_t options,
uint32_t warm_up_n = options.warm_up_n;
uint32_t n = options.n;
Kokkos::Timer timer;
using tag = Algo::Trtri::Unblocked;
using tag = KokkosBatched::Algo::Trtri::Unblocked;

for (uint32_t j = 0; j < warm_up_n; ++j) {
for (int i = 0; i < options.start.a.k; ++i) {
auto A = Kokkos::subview(trtri_args.A, i, Kokkos::ALL(), Kokkos::ALL());

SerialTrtri<uplo, diag, tag>::invoke(A);
KokkosBatched::SerialTrtri<uplo, diag, tag>::invoke(A);
}
// Fence after each batch operation
Kokkos::fence();
Expand All @@ -261,7 +261,7 @@ void __do_trtri_serial_batched_template(options_t options,
for (int i = 0; i < options.start.a.k; ++i) {
auto A = Kokkos::subview(trtri_args.A, i, Kokkos::ALL(), Kokkos::ALL());

SerialTrtri<uplo, diag, tag>::invoke(A);
KokkosBatched::SerialTrtri<uplo, diag, tag>::invoke(A);
}
// Fence after each batch operation
Kokkos::fence();
Expand All @@ -281,6 +281,9 @@ template <class scalar_type, class vta, class device_type>
void __do_trtri_serial_batched(options_t options, trtri_args_t trtri_args) {
char __uplo = tolower(trtri_args.uplo), __diag = tolower(trtri_args.diag);

using KokkosBatched::Diag;
using KokkosBatched::Uplo;

STATUS;

//// Lower ////
Expand Down Expand Up @@ -373,7 +376,7 @@ struct parallel_batched_trtri {
void operator()(const int& i) const {
auto svA = Kokkos::subview(trtri_args_.A, i, Kokkos::ALL(), Kokkos::ALL());

SerialTrtri<uplo, diag, tag>::invoke(svA);
KokkosBatched::SerialTrtri<uplo, diag, tag>::invoke(svA);
}
};

Expand All @@ -383,7 +386,7 @@ void __do_trtri_parallel_batched_template(options_t options,
uint32_t warm_up_n = options.warm_up_n;
uint32_t n = options.n;
Kokkos::Timer timer;
using tag = Algo::Trtri::Unblocked;
using tag = KokkosBatched::Algo::Trtri::Unblocked;
using execution_space = typename device_type::execution_space;
using functor_type = parallel_batched_trtri<uplo, diag, tag, execution_space>;
functor_type parallel_batched_trtri_functor(trtri_args);
Expand Down Expand Up @@ -415,6 +418,8 @@ void __do_trtri_parallel_batched_template(options_t options,
template <class scalar_type, class vta, class device_type>
void __do_trtri_parallel_batched(options_t options, trtri_args_t trtri_args) {
char __uplo = tolower(trtri_args.uplo), __diag = tolower(trtri_args.diag);
using KokkosBatched::Diag;
using KokkosBatched::Uplo;

STATUS;

Expand Down
Loading

0 comments on commit f171533

Please sign in to comment.