Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add RISC-V port #503

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,12 @@ ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQ
SET(EMBREE_ARM ON)
ENDIF()

# detect RISC-V compilation
IF (CMAKE_SYSTEM_PROCESSOR STREQUAL "riscv64")
MESSAGE(STATUS "Building for RISC-V 64")
SET(EMBREE_RISCV ON)
ENDIF()

SET(EMBREE_TASKING_SYSTEM "TBB" CACHE STRING "Selects tasking system")
SET(EMBREE_TBB_COMPONENT "tbb" CACHE STRING "The TBB component/library name.")

Expand Down Expand Up @@ -385,6 +391,8 @@ ENDIF()

IF (EMBREE_ARM)
SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE NEON NEON2X)
ELSEIF (EMBREE_RISCV)
SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE SSE2 SSE4.2 DEFAULT)
ELSE()
SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE SSE2 SSE4.2 AVX AVX2 AVX512 DEFAULT)
ENDIF()
Expand All @@ -399,6 +407,8 @@ IF (EMBREE_MAX_ISA STREQUAL "NONE")
OPTION(EMBREE_ISA_NEON "Enables NEON ISA." ON)
OPTION(EMBREE_ISA_NEON2X "Enables NEON ISA double pumped." OFF)
ENDIF()
ELSEIF (EMBREE_RISCV)
OPTION(EMBREE_ISA_RVV "Enables RVV ISA." ON)
ELSE()
TRY_COMPILE(COMPILER_SUPPORTS_AVX "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX})
TRY_COMPILE(COMPILER_SUPPORTS_AVX2 "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX2})
Expand All @@ -416,18 +426,21 @@ IF (EMBREE_MAX_ISA STREQUAL "NONE")
# Don't use OPTION, but still set them to OFF, so that embree-config.cmake is consisten with its definitions
SET(EMBREE_ISA_NEON OFF)
SET(EMBREE_ISA_NEON2X OFF)
SET(EMBREE_ISA_RVV OFF)
ENDIF()

ELSEIF (EMBREE_MAX_ISA STREQUAL "DEFAULT")
UNSET(EMBREE_ISA_NEON CACHE)
UNSET(EMBREE_ISA_NEON2X CACHE)
UNSET(EMBREE_ISA_RVV CACHE)
UNSET(EMBREE_ISA_SSE2 CACHE)
UNSET(EMBREE_ISA_SSE42 CACHE)
UNSET(EMBREE_ISA_AVX CACHE)
UNSET(EMBREE_ISA_AVX2 CACHE)
UNSET(EMBREE_ISA_AVX512 CACHE)
SET(EMBREE_ISA_NEON OFF)
SET(EMBREE_ISA_NEON2X OFF)
SET(EMBREE_ISA_RVV OFF)
SET(EMBREE_ISA_SSE2 OFF)
SET(EMBREE_ISA_SSE42 OFF)
SET(EMBREE_ISA_AVX OFF)
Expand All @@ -442,6 +455,7 @@ ELSEIF (EMBREE_MAX_ISA STREQUAL "DEFAULT")
ELSE()
UNSET(EMBREE_ISA_NEON CACHE)
UNSET(EMBREE_ISA_NEON2X CACHE)
UNSET(EMBREE_ISA_RVV CACHE)
UNSET(EMBREE_ISA_SSE2 CACHE)
UNSET(EMBREE_ISA_SSE42 CACHE)
UNSET(EMBREE_ISA_AVX CACHE)
Expand All @@ -452,6 +466,8 @@ ELSE()
SET(ISA 1)
ELSEIF(EMBREE_MAX_ISA STREQUAL "NEON2X")
SET(ISA 2)
ELSEIF(EMBREE_MAX_ISA STREQUAL "RVV")
SET(ISA 1)
ELSEIF(EMBREE_MAX_ISA STREQUAL "SSE2")
SET(ISA 1)
ELSEIF(EMBREE_MAX_ISA STREQUAL "SSE4.2")
Expand All @@ -470,6 +486,7 @@ ELSE()

SET(EMBREE_ISA_NEON OFF)
SET(EMBREE_ISA_NEON2X OFF)
SET(EMBREE_ISA_RVV OFF)
SET(EMBREE_ISA_SSE2 OFF)
SET(EMBREE_ISA_SSE42 OFF)
SET(EMBREE_ISA_AVX OFF)
Expand All @@ -483,6 +500,10 @@ ELSE()
IF (ISA GREATER 1)
SET(EMBREE_ISA_NEON2X ON)
ENDIF ()
ELSEIF (EMBREE_RISCV)
IF (ISA GREATER 0)
SET(EMBREE_ISA_RVV ON)
ENDIF ()
ELSE()
IF (ISA GREATER 0)
SET(EMBREE_ISA_SSE2 ON)
Expand Down Expand Up @@ -574,6 +595,11 @@ IF (EMBREE_ISA_NEON2X)
SET(EMBREE_ISA_AVX2 ON)
ENDIF()

IF (EMBREE_ISA_RVV)
SET(EMBREE_ISA_SSE2 ON)
SET(EMBREE_ISA_SSE42 ON)
ENDIF()

IF (EMBREE_ISA_SSE2)
ADD_DEFINITIONS(-DEMBREE_TARGET_SSE2)
IF (NOT EMBREE_ARM)
Expand Down
3 changes: 3 additions & 0 deletions common/cmake/clang.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ IF (EMBREE_ARM)
SET(FLAGS_AVX "-D__AVX__ -D__SSE4_2__ -D__SSE4_1__ -D__BMI__ -D__BMI2__ -D__LZCNT__")
SET(FLAGS_AVX2 "-D__AVX2__ -D__AVX__ -D__SSE4_2__ -D__SSE4_1__ -D__BMI__ -D__BMI2__ -D__LZCNT__")
ENDIF ()
ELSEIF (EMBREE_RISCV)
SET(FLAGS_SSE2 "-D__SSE__ -D__SSE2__ -march=rv64gcv_zba_zbb_zbs -mrvv-vector-bits=zvl")
SET(FLAGS_SSE42 "-D__SSE4_2__ -D__SSE4_1__ -march=rv64gcv_zba_zbb_zbs -mrvv-vector-bits=zvl")
ELSE ()
# for `thread` keyword
_SET_IF_EMPTY(FLAGS_SSE2 "-msse -msse2 -mno-sse4.2")
Expand Down
2 changes: 2 additions & 0 deletions common/math/emath.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

#if defined(__ARM_NEON)
#include "../simd/arm/emulation.h"
#elif defined(__riscv_v)
#include "../simd/riscv/emulation.h"
#else
#include <emmintrin.h>
#include <xmmintrin.h>
Expand Down
2 changes: 1 addition & 1 deletion common/math/vec3.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ namespace embree
template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
#elif defined(__SSE__) || defined(__ARM_NEON)
#elif defined(__SSE__) || defined(__ARM_NEON) || defined(__riscv_v)
template<>
__forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v);
Expand Down
2 changes: 1 addition & 1 deletion common/math/vec4.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ namespace embree
template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#elif defined(__SSE__) || defined(__ARM_NEON)
#elif defined(__SSE__) || defined(__ARM_NEON) || defined(__riscv_v)
template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v); w = shuffle<3,3,3,3>(v);
}
Expand Down
156 changes: 156 additions & 0 deletions common/simd/riscv/emulation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#pragma once

#define SSE2RVV_PRECISE_DIV 1
#define SSE2RVV_PRECISE_SQRT 1
#define SSE2RVV_PRECISE_MINMAX 1

#include "sse2rvv.h"

#define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))

/* Rounding mode macros. */
#define _MM_FROUND_TO_NEAREST_INT 0x00
#define _MM_FROUND_TO_NEG_INF 0x01
#define _MM_FROUND_TO_POS_INF 0x02
#define _MM_FROUND_TO_ZERO 0x03
#define _MM_FROUND_CUR_DIRECTION 0x04
#define _MM_FROUND_NO_EXC 0x08
#define _MM_ROUND_NEAREST 0x0000
#define _MM_ROUND_DOWN 0x2000
#define _MM_ROUND_UP 0x4000
#define _MM_ROUND_TOWARD_ZERO 0x6000
/* Flush zero mode macros. */
#define _MM_FLUSH_ZERO_MASK 0x8000
#define _MM_FLUSH_ZERO_ON 0x8000
#define _MM_FLUSH_ZERO_OFF 0x0000

enum _mm_hint {
_MM_HINT_NTA = 0,
_MM_HINT_T0 = 1,
_MM_HINT_T1 = 2,
_MM_HINT_T2 = 3,
};

__forceinline __m128i _mm_cvtps_epi32(__m128 a) {
return __riscv_vfcvt_x_f_v_i32m1(a, 4);
}

__forceinline int _mm_cvtsi128_si32(__m128i a) {
return __riscv_vmv_x_s_i32m1_i32(a);
}

__forceinline float _mm_cvtss_f32 (__m128 a) {
return __riscv_vfmv_f_s_f32m1_f32(a);
}

__forceinline __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm8) {
vfloat32m1_t zeros = __riscv_vfmv_v_f_f32m1(0, 4);
vbool32_t high = __riscv_vreinterpret_v_i32m1_b32(__riscv_vmv_s_x_i32m1(imm8 >> 4, 1));
vbool32_t low = __riscv_vreinterpret_v_i32m1_b32(__riscv_vmv_s_x_i32m1(imm8 & 0xf, 1));
vfloat32m1_t sum = __riscv_vfredusum_vs_f32m1_f32m1_m(high, __riscv_vfmul(a, b, 4), zeros, 4);
return vreinterpretq_f32_m128(__riscv_vrgather_vx_f32m1_mu(low, zeros, sum, 0, 4));
}

__forceinline __int64 _mm_cvtsi128_si64 (__m128i a) {
return __riscv_vmv_x_s_i64m1_i64(__riscv_vreinterpret_v_i32m1_i64m1(a));
}

__forceinline unsigned int _mm_getcsr(void) {
return 0;
}

__forceinline void _mm_setcsr(unsigned int a) {
int rm;

switch (a) {
case _MM_ROUND_TOWARD_ZERO:
// FIXME: I can't find the straightforward mapping of this.
rm = 0b01;
break;
case _MM_ROUND_DOWN:
rm = 0b10;
break;
case _MM_ROUND_UP:
rm = 0b00;
break;
default: //_MM_ROUND_NEAREST
rm = 0b01;
}

asm volatile("csrw vxrm,%0" :: "r"(rm));
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think _mm_setcsr is for floating point rounding modes. So this should be a write to frm not vxrm.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update SSE2RVV to include the proper implementation.

}

__forceinline void _mm_mfence (void) {
__sync_synchronize();
}

__forceinline void _mm_pause (void) {
__asm__ __volatile__("fence.i\n\t"
"fence r, r\n\t");
}

__forceinline void _mm_prefetch (char const* p, int i) {
(void)i;
__builtin_prefetch(p);
}

__forceinline __m128 _mm_round_ps(__m128 a, int rounding) {
vfloat32m1_t _a = vreinterpretq_m128_f32(a);
switch (rounding) {
case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC):
return __riscv_vfcvt_f_x_v_f32m1(__riscv_vfcvt_x_f_v_i32m1_rm(_a, 0, 4), 4);
case (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC):
return _mm_floor_ps(a);
case (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC):
return _mm_ceil_ps(a);
case (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC):
return __riscv_vfcvt_f_x_v_f32m1(__riscv_vfcvt_x_f_v_i32m1_rm(_a, 1, 4), 4);
default: //_MM_FROUND_CUR_DIRECTION
return __riscv_vfcvt_f_x_v_f32m1(__riscv_vfcvt_x_f_v_i32m1(_a, 4), 4);
}
}

__forceinline int _mm_popcnt_u32(unsigned int a) {
return __builtin_popcount(a);
}

__forceinline int64_t _mm_popcnt_u64(uint64_t a) {
return __builtin_popcount(a);
}

__forceinline __m128 _mm_fmadd_ps(__m128 a, __m128 b, __m128 c) {
vfloat32m1_t _a = vreinterpretq_m128_f32(a);
vfloat32m1_t _b = vreinterpretq_m128_f32(b);
vfloat32m1_t _c = vreinterpretq_m128_f32(c);
return vreinterpretq_f32_m128(__riscv_vfmacc_vv_f32m1(_c, _a, _b, 4));
}

__forceinline __m128 _mm_fmsub_ps(__m128 a, __m128 b, __m128 c) {
vfloat32m1_t _a = vreinterpretq_m128_f32(a);
vfloat32m1_t _b = vreinterpretq_m128_f32(b);
vfloat32m1_t _c = vreinterpretq_m128_f32(c);
return vreinterpretq_f32_m128(__riscv_vfmsac_vv_f32m1(_c, _a, _b, 4));
}

__forceinline __m128 _mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) {
vfloat32m1_t _a = vreinterpretq_m128_f32(a);
vfloat32m1_t _b = vreinterpretq_m128_f32(b);
vfloat32m1_t _c = vreinterpretq_m128_f32(c);
return vreinterpretq_f32_m128(__riscv_vfnmsac_vv_f32m1(_c, _a, _b, 4));
}

__forceinline __m128 _mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) {
vfloat32m1_t _a = vreinterpretq_m128_f32(a);
vfloat32m1_t _b = vreinterpretq_m128_f32(b);
vfloat32m1_t _c = vreinterpretq_m128_f32(c);
return vreinterpretq_f32_m128(__riscv_vfnmacc_vv_f32m1(_c, _a, _b, 4));
}

/* Dummy defines for floating point control */
#define _MM_MASK_MASK 0x1f80
#define _MM_MASK_DIV_ZERO 0x200
// #define _MM_FLUSH_ZERO_ON 0x8000
#define _MM_MASK_DENORM 0x100
#define _MM_SET_EXCEPTION_MASK(x)
#define _MM_SET_FLUSH_ZERO_MODE(x)
Loading