From 131eca13219f8cb79d52decebf92ada8dc708014 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Wed, 31 Jan 2024 19:42:12 +0900 Subject: [PATCH] Add a trivial benchmark --- fft/CMakeLists.txt | 4 + fft/perf_test/BenchmarkMain.cpp | 37 ++++++ fft/perf_test/Benchmark_Context.hpp | 121 +++++++++++++++++ fft/perf_test/CMakeLists.txt | 71 ++++++++++ .../KokkosFFT_PrintConfiguration.hpp | 85 ++++++++++++ fft/perf_test/KokkosFFT_TplsVersion.hpp | 40 ++++++ fft/perf_test/PerfTest_FFT1.cpp | 123 ++++++++++++++++++ fft/perf_test/PerfTest_FFT1.hpp | 103 +++++++++++++++ 8 files changed, 584 insertions(+) create mode 100644 fft/perf_test/BenchmarkMain.cpp create mode 100644 fft/perf_test/Benchmark_Context.hpp create mode 100644 fft/perf_test/CMakeLists.txt create mode 100644 fft/perf_test/KokkosFFT_PrintConfiguration.hpp create mode 100644 fft/perf_test/KokkosFFT_TplsVersion.hpp create mode 100644 fft/perf_test/PerfTest_FFT1.cpp create mode 100644 fft/perf_test/PerfTest_FFT1.hpp diff --git a/fft/CMakeLists.txt b/fft/CMakeLists.txt index ffddf45b..e2b19192 100644 --- a/fft/CMakeLists.txt +++ b/fft/CMakeLists.txt @@ -1,4 +1,8 @@ add_subdirectory(src) if(BUILD_TESTING) add_subdirectory(unit_test) +endif() + +if(KokkosFFT_ENABLE_BENCHMARK) + add_subdirectory(perf_test) endif() \ No newline at end of file diff --git a/fft/perf_test/BenchmarkMain.cpp b/fft/perf_test/BenchmarkMain.cpp new file mode 100644 index 00000000..2b6c856b --- /dev/null +++ b/fft/perf_test/BenchmarkMain.cpp @@ -0,0 +1,37 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +*/ + +#include + +#include "Benchmark_Context.hpp" +#include + +int main(int argc, char** argv) { + Kokkos::initialize(argc, argv); + { + benchmark::Initialize(&argc, argv); + benchmark::SetDefaultTimeUnit(benchmark::kSecond); + KokkosFFTBenchmark::add_benchmark_context(true); + + benchmark::RunSpecifiedBenchmarks(); + + benchmark::Shutdown(); + } + Kokkos::finalize(); + return 0; +} \ No newline at end of file diff --git a/fft/perf_test/Benchmark_Context.hpp b/fft/perf_test/Benchmark_Context.hpp new file mode 100644 index 00000000..1d222bb4 --- /dev/null +++ b/fft/perf_test/Benchmark_Context.hpp @@ -0,0 +1,121 @@ +#ifndef KOKKOSFFT_BENCHMARK_CONTEXT_HPP +#define KOKKOSFFT_BENCHMARK_CONTEXT_HPP + +#include +#include + +#include + +#include +#include "KokkosFFT_PrintConfiguration.hpp" +#include + +namespace KokkosFFTBenchmark { + /// \brief Remove unwanted spaces and colon signs from input string. In case of + /// invalid input it will return an empty string. + inline std::string remove_unwanted_characters(std::string str) { + auto from = str.find_first_not_of(" :"); + auto to = str.find_last_not_of(" :"); + + if (from == std::string::npos || to == std::string::npos) { + return ""; + } + + // return extracted part of string without unwanted spaces and colon signs + return str.substr(from, to + 1); + } + + /// \brief Extract all key:value pairs from kokkos configuration and add it to + /// the benchmark context + inline void add_kokkos_configuration(bool verbose) { + std::ostringstream msg; + Kokkos::print_configuration(msg, verbose); + KokkosFFT::print_configuration(msg); + + // Iterate over lines returned from kokkos and extract key:value pairs + std::stringstream ss{msg.str()}; + for (std::string line; std::getline(ss, line, '\n');) { + auto found = line.find_first_of(':'); + if (found != std::string::npos) { + auto val = remove_unwanted_characters(line.substr(found + 1)); + // Ignore line without value, for example a category name + if (!val.empty()) { + benchmark::AddCustomContext( + remove_unwanted_characters(line.substr(0, found)), val); + } + } + } + } + + /// \brief Add Kokkos Kernels git info and google benchmark release to + /// benchmark context. + inline void add_version_info() { + using namespace KokkosFFT::Impl; + + if (!GIT_BRANCH.empty()) { + benchmark::AddCustomContext("GIT_BRANCH", std::string(GIT_BRANCH)); + benchmark::AddCustomContext("GIT_COMMIT_HASH", + std::string(GIT_COMMIT_HASH)); + benchmark::AddCustomContext("GIT_CLEAN_STATUS", + std::string(GIT_CLEAN_STATUS)); + benchmark::AddCustomContext("GIT_COMMIT_DESCRIPTION", + std::string(GIT_COMMIT_DESCRIPTION)); + benchmark::AddCustomContext("GIT_COMMIT_DATE", + std::string(GIT_COMMIT_DATE)); + } + if (!BENCHMARK_VERSION.empty()) { + benchmark::AddCustomContext("GOOGLE_BENCHMARK_VERSION", + std::string(BENCHMARK_VERSION)); + } + } + + inline void add_env_info() { + auto num_threads = std::getenv("OMP_NUM_THREADS"); + if (num_threads) { + benchmark::AddCustomContext("OMP_NUM_THREADS", num_threads); + } + auto dynamic = std::getenv("OMP_DYNAMIC"); + if (dynamic) { + benchmark::AddCustomContext("OMP_DYNAMIC", dynamic); + } + auto proc_bind = std::getenv("OMP_PROC_BIND"); + if (proc_bind) { + benchmark::AddCustomContext("OMP_PROC_BIND", proc_bind); + } + auto places = std::getenv("OMP_PLACES"); + if (places) { + benchmark::AddCustomContext("OMP_PLACES", places); + } + } + + /// \brief Gather all context information and add it to benchmark context + inline void add_benchmark_context(bool verbose = false) { + add_kokkos_configuration(verbose); + add_version_info(); + add_env_info(); + } + + /** + * \brief Report throughput and amount of data processed for simple View + * operations + */ + template + void report_results(benchmark::State& state, InViewType in, OutViewType out, double time) { + // data processed in megabytes + const double in_data_processed = static_cast(in.size() * + sizeof(typename InViewType::value_type)) / + 1.0e6; + const double out_data_processed = static_cast(out.size() * + sizeof(typename OutViewType::value_type)) / + 1.0e6; + + state.SetIterationTime(time); + state.counters["MB (In)"] = benchmark::Counter(in_data_processed); + state.counters["MB (Out)"] = benchmark::Counter(out_data_processed); + state.counters["GB/s"] = benchmark::Counter( + (in_data_processed + out_data_processed) / 1.0e3, benchmark::Counter::kIsIterationInvariantRate); + } + +} // namespace KokkosFFTBenchmark + +#endif \ No newline at end of file diff --git a/fft/perf_test/CMakeLists.txt b/fft/perf_test/CMakeLists.txt new file mode 100644 index 00000000..f2f37c81 --- /dev/null +++ b/fft/perf_test/CMakeLists.txt @@ -0,0 +1,71 @@ +function(KOKKOSFFT_ADD_BENCHMARK name) + CMAKE_PARSE_ARGUMENTS( + BENCHMARK + "" + "" + "SOURCES" + ${ARGN} + ) + + if(DEFINED BENCHMARK_UNPARSED_ARGUMENTS) + message( + WARNING + "Unexpected arguments when adding a benchmark: " + ${BENCHMARK_UNPARSED_ARGUMENTS} + ) + endif() + + set(BENCHMARK_NAME ${PACKAGE_NAME}_${name}) + + #Adding BenchmarkMain.cpp to sources + list(APPEND BENCHMARK_SOURCES + BenchmarkMain.cpp + ) + + add_executable( + ${BENCHMARK_NAME} + ${BENCHMARK_SOURCES} + ) + target_link_libraries( + ${BENCHMARK_NAME} + PRIVATE benchmark::benchmark Kokkos::kokkos KokkosFFT::fft + ) + target_include_directories( + ${BENCHMARK_NAME} + SYSTEM PRIVATE ${benchmark_SOURCE_DIR}/include + ) + target_include_directories( + ${BENCHMARK_NAME} + PRIVATE ${CMAKE_BINARY_DIR} + ) + + foreach(SOURCE_FILE ${BENCHMARK_SOURCES}) + SET_SOURCE_FILES_PROPERTIES( + ${SOURCE_FILE} + PROPERTIES LANGUAGE CXX + ) + endforeach() + + string(TIMESTAMP BENCHMARK_TIME "%Y-%m-%d_T%H-%M-%S" UTC) + set( + BENCHMARK_ARGS + --benchmark_counters_tabular=true + --benchmark_out=${BENCHMARK_NAME}_${BENCHMARK_TIME}.json + ) + + add_test( + NAME ${BENCHMARK_NAME} + COMMAND ${BENCHMARK_NAME} ${BENCHMARK_ARGS} + ) +endfunction() + +# Set benchmark targets +set( + BENCHMARK_SOURCES + PerfTest_FFT1.cpp +) + +KOKKOSFFT_ADD_BENCHMARK( + PerformanceTest_Benchmark + SOURCES ${BENCHMARK_SOURCES} +) \ No newline at end of file diff --git a/fft/perf_test/KokkosFFT_PrintConfiguration.hpp b/fft/perf_test/KokkosFFT_PrintConfiguration.hpp new file mode 100644 index 00000000..bf590f00 --- /dev/null +++ b/fft/perf_test/KokkosFFT_PrintConfiguration.hpp @@ -0,0 +1,85 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSFFT_PRINT_CONFIGURATION_HPP +#define KOKKOSFFT_PRINT_CONFIGURATION_HPP + +#include "KokkosFFT_config.h" +#include "KokkosFFT_TplsVersion.hpp" +#include + +namespace KokkosFFT { +namespace Impl { + +inline void print_cufft_version_if_enabled(std::ostream& os) { +#if defined(KOKKOSFFT_ENABLE_TPL_CUFFT) + os << " " + << "KOKKOSFFT_ENABLE_TPL_CUFFT: " << cufft_version_string() << "\n"; +#else + os << " " + << "KOKKOSFFT_ENABLE_TPL_CUFFT: no\n"; +#endif +} + +inline void print_enabled_tpls(std::ostream& os) { +#ifdef KOKKOSFFT_ENABLE_TPL_FFTW + os << " " + << "KOKKOSFFT_ENABLE_TPL_FFTW: yes\n"; +#else + os << " " + << "KOKKOSFFT_ENABLE_TPL_FFTW: no\n"; +#endif + + print_cufft_version_if_enabled(os); + +#ifdef KOKKOSFFT_ENABLE_TPL_HIPFFT + os << " " + << "KOKKOSFFT_ENABLE_TPL_HIPFFT: yes\n"; +#else + os << " " + << "KOKKOSFFT_ENABLE_TPL_HIPFFT: no\n"; +#endif + +#ifdef KOKKOSFFT_ENABLE_TPL_ONEMKL + os << " " + << "KOKKOSFFT_ENABLE_TPL_ONEMKL: yes\n"; +#else + os << " " + << "KOKKOSFFT_ENABLE_TPL_ONEMKL: no\n"; +#endif +} + + +inline void print_version(std::ostream& os) { + + // KOKKOSFFT_VERSION is used because MAJOR, MINOR and PATCH macros + // are not available in FFT + os << " " + << "KokkosFFT Version: " << KOKKOSFFT_VERSION_MAJOR << "." + << KOKKOSFFT_VERSION_MINOR << "." << KOKKOSFFT_VERSION_PATCH + << '\n'; +} +} // namespace Impl + +inline void print_configuration(std::ostream& os) { + Impl::print_version(os); + + os << "TPLs: \n"; + Impl::print_enabled_tpls(os); +} + +} // namespace KokkosFFT + +#endif \ No newline at end of file diff --git a/fft/perf_test/KokkosFFT_TplsVersion.hpp b/fft/perf_test/KokkosFFT_TplsVersion.hpp new file mode 100644 index 00000000..11e0354a --- /dev/null +++ b/fft/perf_test/KokkosFFT_TplsVersion.hpp @@ -0,0 +1,40 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSFFT_TPLS_VERSIONS_HPP +#define KOKKOSFFT_TPLS_VERSIONS_HPP + +#include "KokkosFFT_config.h" +#include +#include + +#if defined(KOKKOSFFT_ENABLE_TPL_CUFFT) +#include "cufft.h" +#endif + +namespace KokkosFFT { +#if defined(KOKKOSFFT_ENABLE_TPL_CUFFT) +inline std::string cufft_version_string() { + // Print version + std::stringstream ss; + + ss << CUFFT_VER_MAJOR << "." << CUFFT_VER_MINOR << "." << CUFFT_VER_PATCH; + + return ss.str(); +} +#endif + +} // namespace KokkosFFT +#endif \ No newline at end of file diff --git a/fft/perf_test/PerfTest_FFT1.cpp b/fft/perf_test/PerfTest_FFT1.cpp new file mode 100644 index 00000000..846714cd --- /dev/null +++ b/fft/perf_test/PerfTest_FFT1.cpp @@ -0,0 +1,123 @@ +#include +#include "Benchmark_Context.hpp" +#include "PerfTest_FFT1.hpp" + +namespace KokkosFFTBenchmark { + +// 1D FFT on 1D View +BENCHMARK(FFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 1D IFFT on 1D View +BENCHMARK(IFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 1D RFFT on 1D View +BENCHMARK(RFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 1D IRFFT on 1D View +BENCHMARK(IRFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +} // namespace KokkosFFTBenchmark \ No newline at end of file diff --git a/fft/perf_test/PerfTest_FFT1.hpp b/fft/perf_test/PerfTest_FFT1.hpp new file mode 100644 index 00000000..074a78e0 --- /dev/null +++ b/fft/perf_test/PerfTest_FFT1.hpp @@ -0,0 +1,103 @@ +#ifndef KOKKOSFFT_PERFTEST_FFT1_HPP +#define KOKKOSFFT_PERFTEST_FFT1_HPP + +#include +#include +#include +#include "Benchmark_Context.hpp" + +using execution_space = Kokkos::DefaultExecutionSpace; + +namespace KokkosFFTBenchmark { + +template +void fft(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::fft(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void ifft(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::ifft(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void rfft(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::rfft(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void irfft(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::irfft(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +static void FFT_1DView(benchmark::State& state) { + using ComplexView1DType = + Kokkos::View*, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView1DType x("x", n), x_hat("x_hat", n); + + fft(x, x_hat, state); +} + +template +static void IFFT_1DView(benchmark::State& state) { + using ComplexView1DType = + Kokkos::View*, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView1DType x("x", n), x_hat("x_hat", n); + + ifft(x, x_hat, state); +} + +template +static void RFFT_1DView(benchmark::State& state) { + using RealView1DType = Kokkos::View; + using ComplexView1DType = + Kokkos::View*, LayoutType, execution_space>; + + const int n = state.range(0); + RealView1DType x("x", n); + ComplexView1DType x_hat("x_hat", n/2+1); + + rfft(x, x_hat, state); +} + +template +static void IRFFT_1DView(benchmark::State& state) { + using RealView1DType = Kokkos::View; + using ComplexView1DType = + Kokkos::View*, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView1DType x("x", n/2+1); + RealView1DType x_hat("x_hat", n); + + irfft(x, x_hat, state); +} + +} // namespace KokkosFFTBenchmark + +#endif \ No newline at end of file