-
Notifications
You must be signed in to change notification settings - Fork 392
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add RISC-V port #503
Open
tingsi5
wants to merge
5
commits into
RenderKit:master
Choose a base branch
from
sifive:add_riscv_port
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Add RISC-V port #503
Changes from 4 commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
a70dfcf
Add RISC-V implementation from https://github.com/pattonkan/sse2rvv c…
tingsi5 68afeaf
Integrate riscv64 to the build system.
tingsi5 bac41d4
Improve the precision of _mm_sqrt_ps(), _mm_rsqrt_ps(), and _mm_rcp_p…
tingsi5 6bedb90
Handle NaN in the second operad of _mm_min_ps and _mm_max_ps to pass …
tingsi5 1d88f07
Upgrade to SSE2RVV commit f3a1d7d2.
tingsi5 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
#pragma once | ||
|
||
#define SSE2RVV_PRECISE_DIV 1 | ||
#define SSE2RVV_PRECISE_SQRT 1 | ||
#define SSE2RVV_PRECISE_MINMAX 1 | ||
|
||
#include "sse2rvv.h" | ||
|
||
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \ | ||
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) | ||
|
||
/* Rounding mode macros. */ | ||
#define _MM_FROUND_TO_NEAREST_INT 0x00 | ||
#define _MM_FROUND_TO_NEG_INF 0x01 | ||
#define _MM_FROUND_TO_POS_INF 0x02 | ||
#define _MM_FROUND_TO_ZERO 0x03 | ||
#define _MM_FROUND_CUR_DIRECTION 0x04 | ||
#define _MM_FROUND_NO_EXC 0x08 | ||
#define _MM_ROUND_NEAREST 0x0000 | ||
#define _MM_ROUND_DOWN 0x2000 | ||
#define _MM_ROUND_UP 0x4000 | ||
#define _MM_ROUND_TOWARD_ZERO 0x6000 | ||
/* Flush zero mode macros. */ | ||
#define _MM_FLUSH_ZERO_MASK 0x8000 | ||
#define _MM_FLUSH_ZERO_ON 0x8000 | ||
#define _MM_FLUSH_ZERO_OFF 0x0000 | ||
|
||
enum _mm_hint { | ||
_MM_HINT_NTA = 0, | ||
_MM_HINT_T0 = 1, | ||
_MM_HINT_T1 = 2, | ||
_MM_HINT_T2 = 3, | ||
}; | ||
|
||
__forceinline __m128i _mm_cvtps_epi32(__m128 a) { | ||
return __riscv_vfcvt_x_f_v_i32m1(a, 4); | ||
} | ||
|
||
__forceinline int _mm_cvtsi128_si32(__m128i a) { | ||
return __riscv_vmv_x_s_i32m1_i32(a); | ||
} | ||
|
||
__forceinline float _mm_cvtss_f32 (__m128 a) { | ||
return __riscv_vfmv_f_s_f32m1_f32(a); | ||
} | ||
|
||
__forceinline __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm8) { | ||
vfloat32m1_t zeros = __riscv_vfmv_v_f_f32m1(0, 4); | ||
vbool32_t high = __riscv_vreinterpret_v_i32m1_b32(__riscv_vmv_s_x_i32m1(imm8 >> 4, 1)); | ||
vbool32_t low = __riscv_vreinterpret_v_i32m1_b32(__riscv_vmv_s_x_i32m1(imm8 & 0xf, 1)); | ||
vfloat32m1_t sum = __riscv_vfredusum_vs_f32m1_f32m1_m(high, __riscv_vfmul(a, b, 4), zeros, 4); | ||
return vreinterpretq_f32_m128(__riscv_vrgather_vx_f32m1_mu(low, zeros, sum, 0, 4)); | ||
} | ||
|
||
__forceinline __int64 _mm_cvtsi128_si64 (__m128i a) { | ||
return __riscv_vmv_x_s_i64m1_i64(__riscv_vreinterpret_v_i32m1_i64m1(a)); | ||
} | ||
|
||
__forceinline unsigned int _mm_getcsr(void) { | ||
return 0; | ||
} | ||
|
||
__forceinline void _mm_setcsr(unsigned int a) { | ||
int rm; | ||
|
||
switch (a) { | ||
case _MM_ROUND_TOWARD_ZERO: | ||
// FIXME: I can't find the straightforward mapping of this. | ||
rm = 0b01; | ||
break; | ||
case _MM_ROUND_DOWN: | ||
rm = 0b10; | ||
break; | ||
case _MM_ROUND_UP: | ||
rm = 0b00; | ||
break; | ||
default: //_MM_ROUND_NEAREST | ||
rm = 0b01; | ||
} | ||
|
||
asm volatile("csrw vxrm,%0" :: "r"(rm)); | ||
} | ||
|
||
__forceinline void _mm_mfence (void) { | ||
__sync_synchronize(); | ||
} | ||
|
||
__forceinline void _mm_pause (void) { | ||
__asm__ __volatile__("fence.i\n\t" | ||
"fence r, r\n\t"); | ||
} | ||
|
||
__forceinline void _mm_prefetch (char const* p, int i) { | ||
(void)i; | ||
__builtin_prefetch(p); | ||
} | ||
|
||
__forceinline __m128 _mm_round_ps(__m128 a, int rounding) { | ||
vfloat32m1_t _a = vreinterpretq_m128_f32(a); | ||
switch (rounding) { | ||
case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC): | ||
return __riscv_vfcvt_f_x_v_f32m1(__riscv_vfcvt_x_f_v_i32m1_rm(_a, 0, 4), 4); | ||
case (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC): | ||
return _mm_floor_ps(a); | ||
case (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC): | ||
return _mm_ceil_ps(a); | ||
case (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC): | ||
return __riscv_vfcvt_f_x_v_f32m1(__riscv_vfcvt_x_f_v_i32m1_rm(_a, 1, 4), 4); | ||
default: //_MM_FROUND_CUR_DIRECTION | ||
return __riscv_vfcvt_f_x_v_f32m1(__riscv_vfcvt_x_f_v_i32m1(_a, 4), 4); | ||
} | ||
} | ||
|
||
__forceinline int _mm_popcnt_u32(unsigned int a) { | ||
return __builtin_popcount(a); | ||
} | ||
|
||
__forceinline int64_t _mm_popcnt_u64(uint64_t a) { | ||
return __builtin_popcount(a); | ||
} | ||
|
||
__forceinline __m128 _mm_fmadd_ps(__m128 a, __m128 b, __m128 c) { | ||
vfloat32m1_t _a = vreinterpretq_m128_f32(a); | ||
vfloat32m1_t _b = vreinterpretq_m128_f32(b); | ||
vfloat32m1_t _c = vreinterpretq_m128_f32(c); | ||
return vreinterpretq_f32_m128(__riscv_vfmacc_vv_f32m1(_c, _a, _b, 4)); | ||
} | ||
|
||
__forceinline __m128 _mm_fmsub_ps(__m128 a, __m128 b, __m128 c) { | ||
vfloat32m1_t _a = vreinterpretq_m128_f32(a); | ||
vfloat32m1_t _b = vreinterpretq_m128_f32(b); | ||
vfloat32m1_t _c = vreinterpretq_m128_f32(c); | ||
return vreinterpretq_f32_m128(__riscv_vfmsac_vv_f32m1(_c, _a, _b, 4)); | ||
} | ||
|
||
__forceinline __m128 _mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) { | ||
vfloat32m1_t _a = vreinterpretq_m128_f32(a); | ||
vfloat32m1_t _b = vreinterpretq_m128_f32(b); | ||
vfloat32m1_t _c = vreinterpretq_m128_f32(c); | ||
return vreinterpretq_f32_m128(__riscv_vfnmsac_vv_f32m1(_c, _a, _b, 4)); | ||
} | ||
|
||
__forceinline __m128 _mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) { | ||
vfloat32m1_t _a = vreinterpretq_m128_f32(a); | ||
vfloat32m1_t _b = vreinterpretq_m128_f32(b); | ||
vfloat32m1_t _c = vreinterpretq_m128_f32(c); | ||
return vreinterpretq_f32_m128(__riscv_vfnmacc_vv_f32m1(_c, _a, _b, 4)); | ||
} | ||
|
||
/* Dummy defines for floating point control */ | ||
#define _MM_MASK_MASK 0x1f80 | ||
#define _MM_MASK_DIV_ZERO 0x200 | ||
// #define _MM_FLUSH_ZERO_ON 0x8000 | ||
#define _MM_MASK_DENORM 0x100 | ||
#define _MM_SET_EXCEPTION_MASK(x) | ||
#define _MM_SET_FLUSH_ZERO_MODE(x) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think _mm_setcsr is for floating point rounding modes. So this should be a write to
frm
notvxrm
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Update SSE2RVV to include the proper implementation.