Add tests for sin #1

AuroraPerego · 2024-02-29T09:40:32Z

to run the tests:

cd test
make all
make runAll

fwyzard · 2024-03-01T17:52:30Z

test/sin/sin_t.cu

+#define CATCH_CONFIG_MAIN
+#include <catch.hpp>
+
+#include "math.h"
+#include <cuda_runtime.h>
+#include <limits>
+#include <vector>
+
+template <typename T> __global__ void sinKernel(double *result, T input) {
+  result[0] = static_cast<double>(xtd::sin(input));
+}
+
+template <typename T> __global__ void sinfKernel(double *result, T input) {
+  result[0] = static_cast<double>(xtd::sinf(input));
+}
+
+TEST_CASE("sinCuda", "[sin]") {
+  int deviceCount;
+  cudaError_t cudaStatus = cudaGetDeviceCount(&deviceCount);
+
+  if (cudaStatus != cudaSuccess || deviceCount == 0) {
+    exit(EXIT_SUCCESS);
+  }
+
+  cudaSetDevice(0);
+  cudaStream_t q;
+  cudaStreamCreate(&q);
+
+  std::vector<double> values{-1., 0., M_PI / 2, M_PI, 42.};
+
+  double *result;
+  int constexpr N = 6;
+  cudaMallocAsync(&result, N * sizeof(double), q);
+
+  for (auto v : values) {
+
+    cudaMemsetAsync(&result, 0x00, N * sizeof(double), q);
+
+    sinKernel<<<1, 1, 0, q>>>(&result[0], static_cast<int>(v));
+    sinKernel<<<1, 1, 0, q>>>(&result[1], static_cast<float>(v));
+    sinKernel<<<1, 1, 0, q>>>(&result[2], static_cast<double>(v));
+    sinfKernel<<<1, 1, 0, q>>>(&result[3], static_cast<int>(v));
+    sinfKernel<<<1, 1, 0, q>>>(&result[4], static_cast<float>(v));
+    sinfKernel<<<1, 1, 0, q>>>(&result[5], static_cast<double>(v));
+
+    double resultHost[N];
+    cudaMemcpyAsync(resultHost, result, N * sizeof(double),
+                    cudaMemcpyDeviceToHost, q);
+
+    cudaStreamSynchronize(q);
+
+    auto const epsilon = std::numeric_limits<double>::epsilon();
+    auto const epsilon_f = std::numeric_limits<float>::epsilon();
+    REQUIRE_THAT(resultHost[0], Catch::Matchers::WithinAbs(
+                                    std::sin(static_cast<int>(v)), epsilon));
+    REQUIRE_THAT(resultHost[1],
+                 Catch::Matchers::WithinAbs(std::sin(v), epsilon_f));
+    REQUIRE_THAT(resultHost[2],
+                 Catch::Matchers::WithinAbs(std::sin(v), epsilon));
+    REQUIRE_THAT(resultHost[3], Catch::Matchers::WithinAbs(
+                                    sinf(static_cast<int>(v)), epsilon_f));
+    REQUIRE_THAT(resultHost[4], Catch::Matchers::WithinAbs(sinf(v), epsilon_f));
+    REQUIRE_THAT(resultHost[5], Catch::Matchers::WithinAbs(sinf(v), epsilon_f));
+  }
+
+  cudaFreeAsync(result, q);
+}


Suggested change

#define CATCH_CONFIG_MAIN

#include <catch.hpp>

#include "math.h"

#include <cuda_runtime.h>

#include <limits>

#include <vector>

template <typename T> __global__ void sinKernel(double *result, T input) {

result[0] = static_cast<double>(xtd::sin(input));

}

template <typename T> __global__ void sinfKernel(double *result, T input) {

result[0] = static_cast<double>(xtd::sinf(input));

}

TEST_CASE("sinCuda", "[sin]") {

int deviceCount;

cudaError_t cudaStatus = cudaGetDeviceCount(&deviceCount);

if (cudaStatus != cudaSuccess || deviceCount == 0) {

exit(EXIT_SUCCESS);

}

cudaSetDevice(0);

cudaStream_t q;

cudaStreamCreate(&q);

std::vector<double> values{-1., 0., M_PI / 2, M_PI, 42.};

double *result;

int constexpr N = 6;

cudaMallocAsync(&result, N * sizeof(double), q);

for (auto v : values) {

cudaMemsetAsync(&result, 0x00, N * sizeof(double), q);

sinKernel<<<1, 1, 0, q>>>(&result[0], static_cast<int>(v));

sinKernel<<<1, 1, 0, q>>>(&result[1], static_cast<float>(v));

sinKernel<<<1, 1, 0, q>>>(&result[2], static_cast<double>(v));

sinfKernel<<<1, 1, 0, q>>>(&result[3], static_cast<int>(v));

sinfKernel<<<1, 1, 0, q>>>(&result[4], static_cast<float>(v));

sinfKernel<<<1, 1, 0, q>>>(&result[5], static_cast<double>(v));

double resultHost[N];

cudaMemcpyAsync(resultHost, result, N * sizeof(double),

cudaMemcpyDeviceToHost, q);

cudaStreamSynchronize(q);

auto const epsilon = std::numeric_limits<double>::epsilon();

auto const epsilon_f = std::numeric_limits<float>::epsilon();

REQUIRE_THAT(resultHost[0], Catch::Matchers::WithinAbs(

std::sin(static_cast<int>(v)), epsilon));

REQUIRE_THAT(resultHost[1],

Catch::Matchers::WithinAbs(std::sin(v), epsilon_f));

REQUIRE_THAT(resultHost[2],

Catch::Matchers::WithinAbs(std::sin(v), epsilon));

REQUIRE_THAT(resultHost[3], Catch::Matchers::WithinAbs(

sinf(static_cast<int>(v)), epsilon_f));

REQUIRE_THAT(resultHost[4], Catch::Matchers::WithinAbs(sinf(v), epsilon_f));

REQUIRE_THAT(resultHost[5], Catch::Matchers::WithinAbs(sinf(v), epsilon_f));

}

cudaFreeAsync(result, q);

}

#include <limits>

#include <vector>

#include <cuda_runtime.h>

#define CATCH_CONFIG_MAIN

#include <catch.hpp>

#include "math.h"

#include "cuda_check.h"

template <typename T> __global__ void sinKernel(double *result, T input) {

*result = static_cast<double>(xtd::sin(input));

}

template <typename T> __global__ void sinfKernel(double *result, T input) {

*result = static_cast<double>(xtd::sinf(input));

}

TEST_CASE("sinCuda", "[sin]") {

int deviceCount;

cudaError_t cudaStatus = cudaGetDeviceCount(&deviceCount);

if (cudaStatus != cudaSuccess || deviceCount == 0) {

exit(EXIT_SUCCESS);

}

CUDA_CHECK(cudaSetDevice(0));

cudaStream_t q;

CUDA_CHECK(cudaStreamCreate(&q));

std::vector<double> values{-1., 0., M_PI / 2, M_PI, 42.};

double *result;

int constexpr N = 6;

CUDA_CHECK(cudaMallocAsync(&result, N * sizeof(double), q));

for (auto v : values) {

CUDA_CHECK(cudaMemsetAsync(result, 0x00, N * sizeof(double), q));

sinKernel<<<1, 1, 0, q>>>(result + 0, static_cast<int>(v));

CUDA_CHECK(cudaGetLastError());

sinKernel<<<1, 1, 0, q>>>(result + 1, static_cast<float>(v));

CUDA_CHECK(cudaGetLastError());

sinKernel<<<1, 1, 0, q>>>(result + 2, static_cast<double>(v));

CUDA_CHECK(cudaGetLastError());

sinfKernel<<<1, 1, 0, q>>>(result + 3, static_cast<int>(v));

CUDA_CHECK(cudaGetLastError());

sinfKernel<<<1, 1, 0, q>>>(result + 4, static_cast<float>(v));

CUDA_CHECK(cudaGetLastError());

sinfKernel<<<1, 1, 0, q>>>(result + 5, static_cast<double>(v));

CUDA_CHECK(cudaGetLastError());

double resultHost[N];

CUDA_CHECK(cudaMemcpyAsync(resultHost, result, N * sizeof(double), cudaMemcpyDeviceToHost, q));

CUDA_CHECK(cudaStreamSynchronize(q));

auto const epsilon = std::numeric_limits<double>::epsilon();

auto const epsilon_f = std::numeric_limits<float>::epsilon();

REQUIRE_THAT(resultHost[0], Catch::Matchers::WithinAbs(std::sin(static_cast<int>(v)), epsilon));

REQUIRE_THAT(resultHost[1], Catch::Matchers::WithinAbs(std::sin(v), epsilon_f));

REQUIRE_THAT(resultHost[2], Catch::Matchers::WithinAbs(std::sin(v), epsilon));

REQUIRE_THAT(resultHost[3], Catch::Matchers::WithinAbs(sinf(static_cast<int>(v)), epsilon_f));

REQUIRE_THAT(resultHost[4], Catch::Matchers::WithinAbs(sinf(v), epsilon_f));

REQUIRE_THAT(resultHost[5], Catch::Matchers::WithinAbs(sinf(v), epsilon_f));

}

CUDA_CHECK(cudaFreeAsync(result, q));

CUDA_CHECK(cudaStreamDestroy(q));

}

fwyzard · 2024-03-02T11:11:59Z

On my notebook I get

fwyzard@fools:~/src/xtd/test$ make
Running /home/fwyzard/src/xtd/test/sin/bin/sin_t_cc
===============================================================================
All tests passed (30 assertions in 1 test case)


Running /home/fwyzard/src/xtd/test/sin/bin/sin_t_hip
No AMD GPUs found, the test will be skipped.

Running /home/fwyzard/src/xtd/test/sin/bin/sin_t_cuda
===============================================================================
All tests passed (30 assertions in 1 test case)


Running /home/fwyzard/src/xtd/test/sin/bin/sin_t_cpusycl
===============================================================================
All tests passed (30 assertions in 1 test case)


Running /home/fwyzard/src/xtd/test/sin/bin/sin_t_gpusycl
Native API failed. Native API returns: -2 (PI_ERROR_DEVICE_NOT_AVAILABLE) -2 (PI_ERROR_DEVICE_NOT_AVAILABLE)Exception caught at file:sin/sin_t.sycl.cc, line:60

AuroraPerego · 2024-03-02T20:49:36Z

Thanks for the improvements :D
The tests pass on:

NVIDIA L4, V100 and T4
Intel GPU Flex

The hip test hangs on the AMD GPU Radeon PRO WX 9100 (don't know yet if it's due to the GPU or the test)

AuroraPerego added 6 commits February 27, 2024 01:01

fix typo

38e9355

add tests for sin

04f6a7d

generalize Makefile for different targets

251323b

add github action

4796528

handle Catch2 download in Makefile

6016a69

clang format

865575a

fwyzard reviewed Mar 1, 2024

View reviewed changes

AuroraPerego and others added 5 commits March 1, 2024 21:49

handle errors in tests

08988a5

Move helpers under test/common/

6af1a65

Apply code formtting

a65dd7f

Always download Catch2

fc40123

Clean up and optimise the Makefile

e7e18a0

Update CI for the Makefile changes

da2e6e1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add tests for sin #1

Add tests for sin #1

AuroraPerego commented Feb 29, 2024

fwyzard Mar 1, 2024

fwyzard commented Mar 2, 2024

AuroraPerego commented Mar 2, 2024

Add tests for sin #1

Are you sure you want to change the base?

Add tests for sin #1

Conversation

AuroraPerego commented Feb 29, 2024

fwyzard Mar 1, 2024

Choose a reason for hiding this comment

fwyzard commented Mar 2, 2024

AuroraPerego commented Mar 2, 2024