From 78c9a5ebf3ae924ea928d1d872fbadddf0771dc4 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Mon, 21 Oct 2024 14:28:01 +0100 Subject: [PATCH 01/15] Add simde library and SIMD swizzle implementation --- build-scripts/config_common.cmake | 3 + build-scripts/runtime_lib.cmake | 4 + core/iwasm/common/wasm_runtime_common.h | 92 +++++++++++++++++++ core/iwasm/interpreter/iwasm_interp.cmake | 4 + core/iwasm/interpreter/wasm_interp_fast.c | 30 +++++- core/iwasm/libraries/simde/simde.cmake | 22 +++++ .../platforms/linux-sgx/CMakeLists.txt | 5 + 7 files changed, 159 insertions(+), 1 deletion(-) create mode 100644 core/iwasm/libraries/simde/simde.cmake diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index 3d0d6bef76..81c378c6f3 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -288,6 +288,9 @@ endif () if (WAMR_BUILD_LIB_RATS EQUAL 1) message (" Lib rats enabled") endif() +if (WAMR_BUILD_SIMDE EQUAL 1) + message (" Lib simde enabled") +endif() if (WAMR_BUILD_MINI_LOADER EQUAL 1) add_definitions (-DWASM_ENABLE_MINI_LOADER=1) message (" WASM mini loader enabled") diff --git a/build-scripts/runtime_lib.cmake b/build-scripts/runtime_lib.cmake index 3ab0cff4fb..7fa2a6a85c 100644 --- a/build-scripts/runtime_lib.cmake +++ b/build-scripts/runtime_lib.cmake @@ -142,6 +142,10 @@ if (WAMR_BUILD_LIB_RATS EQUAL 1) include (${IWASM_DIR}/libraries/lib-rats/lib_rats.cmake) endif () +if (WAMR_BUILD_SIMDE EQUAL 1) + include (${IWASM_DIR}/libraries/simde/simde.cmake) +endif () + if (WAMR_BUILD_WASM_CACHE EQUAL 1) include (${WAMR_ROOT_DIR}/build-scripts/involve_boringssl.cmake) endif () diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h index 0b89edf5e8..8ff5180f77 100644 --- a/core/iwasm/common/wasm_runtime_common.h +++ b/core/iwasm/common/wasm_runtime_common.h @@ -73,6 +73,12 @@ STORE_U8(void *addr, uint8_t value) *(uint8 *)addr = value; } +static inline void +STORE_V128(void *addr, V128 value) +{ + *(V128 *)addr = value; +} + /* For LOAD opcodes */ #define LOAD_I64(addr) (*(int64 *)(addr)) #define LOAD_F64(addr) (*(float64 *)(addr)) @@ -80,6 +86,7 @@ STORE_U8(void *addr, uint8_t value) #define LOAD_U32(addr) (*(uint32 *)(addr)) #define LOAD_I16(addr) (*(int16 *)(addr)) #define LOAD_U16(addr) (*(uint16 *)(addr)) +#define LOAD_V128(addr) (*(V128 *)(addr)) #define STORE_PTR(addr, ptr) \ do { \ @@ -264,7 +271,92 @@ STORE_U16(void *addr, uint16_t value) ((uint8_t *)(addr))[0] = u.u8[0]; ((uint8_t *)(addr))[1] = u.u8[1]; } + +#define STORE_V128(addr, value) \ + do { \ + uintptr_t addr_ = (uintptr_t)(addr); \ + union { \ + V128 val; \ + uint64 u64[2]; \ + uint32 u32[4]; \ + uint16 u16[8]; \ + uint8 u8[16]; \ + } u; \ + if ((addr_ & (uintptr_t)15) == 0) \ + *(V128 *)(addr) = (V128)(value); \ + else { \ + u.val = (V128)(value); \ + if ((addr_ & (uintptr_t)7) == 0) { \ + ((uint64 *)(addr))[0] = u.u64[0]; \ + ((uint64 *)(addr))[1] = u.u64[1]; \ + } \ + else if ((addr_ & (uintptr_t)3) == 0) { \ + ((uint32 *)(addr))[0] = u.u32[0]; \ + ((uint32 *)(addr))[1] = u.u32[1]; \ + ((uint32 *)(addr))[2] = u.u32[2]; \ + ((uint32 *)(addr))[3] = u.u32[3]; \ + } \ + else if ((addr_ & (uintptr_t)1) == 0) { \ + ((uint16 *)(addr))[0] = u.u16[0]; \ + ((uint16 *)(addr))[1] = u.u16[1]; \ + ((uint16 *)(addr))[2] = u.u16[2]; \ + ((uint16 *)(addr))[3] = u.u16[3]; \ + ((uint16 *)(addr))[4] = u.u16[4]; \ + ((uint16 *)(addr))[5] = u.u16[5]; \ + ((uint16 *)(addr))[6] = u.u16[6]; \ + ((uint16 *)(addr))[7] = u.u16[7]; \ + } \ + else { \ + int32 t; \ + for (t = 0; t < 16; t++) \ + ((uint8 *)(addr))[t] = u.u8[t]; \ + } \ + } \ + } while (0) + /* For LOAD opcodes */ +static inline V128 +LOAD_V128(void *addr) +{ + uintptr_t addr1 = (uintptr_t)addr; + union { + V128 val; + uint64 u64[2]; + uint32 u32[4]; + uint16 u16[8]; + uint8 u8[16]; + } u; + if ((addr1 & (uintptr_t)15) == 0) + return *(V128 *)addr; + + if ((addr1 & (uintptr_t)7) == 0) { + u.u64[0] = ((uint64 *)addr)[0]; + u.u64[1] = ((uint64 *)addr)[1]; + } + else if ((addr1 & (uintptr_t)3) == 0) { + u.u32[0] = ((uint32 *)addr)[0]; + u.u32[1] = ((uint32 *)addr)[1]; + u.u32[2] = ((uint32 *)addr)[2]; + u.u32[3] = ((uint32 *)addr)[3]; + } + else if ((addr1 & (uintptr_t)1) == 0) { + u.u16[0] = ((uint16 *)addr)[0]; + u.u16[1] = ((uint16 *)addr)[1]; + u.u16[2] = ((uint16 *)addr)[2]; + u.u16[3] = ((uint16 *)addr)[3]; + u.u16[4] = ((uint16 *)addr)[4]; + u.u16[5] = ((uint16 *)addr)[5]; + u.u16[6] = ((uint16 *)addr)[6]; + u.u16[7] = ((uint16 *)addr)[7]; + } + else { + int32 t; + for (t = 0; t < 16; t++) + u.u8[t] = ((uint8 *)addr)[t]; + } + return u.val; +} + static inline int64 LOAD_I64(void *addr) { diff --git a/core/iwasm/interpreter/iwasm_interp.cmake b/core/iwasm/interpreter/iwasm_interp.cmake index e6e52e42c8..99d1c2bab2 100644 --- a/core/iwasm/interpreter/iwasm_interp.cmake +++ b/core/iwasm/interpreter/iwasm_interp.cmake @@ -19,6 +19,10 @@ else () set (LOADER "wasm_loader.c") endif () +if (WAMR_BUILD_SIMD) + set (WAMR_BUILD_SIMDE 1) +endif() + file (GLOB_RECURSE source_all ${IWASM_INTERP_DIR}/${LOADER} ${IWASM_INTERP_DIR}/wasm_runtime.c diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 9d3b743382..dfd3d9f86e 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -21,6 +21,10 @@ #include "../common/wasm_shared_memory.h" #endif +#if WASM_ENABLE_SIMD != 0 +#include "simde/wasm/simd128.h" +#endif + typedef int32 CellType_I32; typedef int64 CellType_I64; typedef float32 CellType_F32; @@ -5647,6 +5651,16 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, goto call_func_from_entry; } #if WASM_ENABLE_SIMD != 0 +/* TODO: Add x86 with additional #if */ +#define SIMD_V128_TO_SIMDE_V128(v) \ + vreinterpretq_s32_u8(vld1q_u8((uint8_t *)&(v))) + +#define SIMDE_V128_TO_SIMD_V128(sv, v) \ + do { \ + uint8x16_t temp = vreinterpretq_u8_s32(sv); \ + vst1q_u8((uint8_t *)&(v), temp); \ + } while (0) + HANDLE_OP(WASM_OP_SIMD_PREFIX) { GET_OPCODE(); @@ -5682,11 +5696,25 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } case SIMD_v8x16_shuffle: - case SIMD_v8x16_swizzle: { wasm_set_exception(module, "unsupported SIMD opcode"); break; } + case SIMD_v8x16_swizzle: + { + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + + simde_v128_t simde_result = simde_wasm_i8x16_swizzle( + SIMD_V128_TO_SIMDE_V128(v1), + SIMD_V128_TO_SIMDE_V128(v2)); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + } /* Splat */ case SIMD_i8x16_splat: diff --git a/core/iwasm/libraries/simde/simde.cmake b/core/iwasm/libraries/simde/simde.cmake new file mode 100644 index 0000000000..98ffbedad9 --- /dev/null +++ b/core/iwasm/libraries/simde/simde.cmake @@ -0,0 +1,22 @@ +# simde is a header only library + +set (LIB_SIMDE_DIR ${CMAKE_CURRENT_LIST_DIR}) + +add_definitions (-DWASM_ENABLE_LIB_SIMDE=1) + +include_directories(${LIB_SIMDE_DIR} ${LIB_SIMDE_DIR}/simde) + +include(FetchContent) + +FetchContent_Declare( + simde + GIT_REPOSITORY https://github.com/simd-everywhere/simde + GIT_TAG v0.8.2 +) + +FetchContent_GetProperties(simde) +if (NOT simde_POPULATED) + message("-- Fetching simde ..") + FetchContent_MakeAvailable(simde) + include_directories("${simde_SOURCE_DIR}") +endif() diff --git a/product-mini/platforms/linux-sgx/CMakeLists.txt b/product-mini/platforms/linux-sgx/CMakeLists.txt index 20b3fdfac1..e7bdbb40ce 100644 --- a/product-mini/platforms/linux-sgx/CMakeLists.txt +++ b/product-mini/platforms/linux-sgx/CMakeLists.txt @@ -68,6 +68,11 @@ if (NOT DEFINED WAMR_BUILD_LIB_RATS) set (WAMR_BUILD_LIB_RATS 0) endif() +if (NOT DEFINED WAMR_BUILD_SIMDE) + # Disable lib simde by default + set (WAMR_BUILD_SIMDE 0) +endif() + if (NOT DEFINED WAMR_BUILD_FAST_INTERP) # Enable fast interpreter set (WAMR_BUILD_FAST_INTERP 1) From 71d129ff9c369204574209b9b2dc0ee19ea53a91 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Mon, 21 Oct 2024 15:35:40 +0100 Subject: [PATCH 02/15] Add some loads, stores, more eqs --- core/iwasm/interpreter/wasm_interp_fast.c | 38 ++++++++++++++++++++++- core/iwasm/interpreter/wasm_loader.c | 8 +++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index dfd3d9f86e..a4c990c57a 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5668,6 +5668,17 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, switch (opcode) { /* Memory */ case SIMD_v128_load: + { + uint32 offset, addr; + offset = read_uint32( + frame_ip); // TODO: Check with an offset! + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + CHECK_MEMORY_OVERFLOW(16); + PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr)); + break; + } case SIMD_v128_load8x8_s: case SIMD_v128_load8x8_u: case SIMD_v128_load16x4_s: @@ -5680,7 +5691,16 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_v128_load64_splat: case SIMD_v128_store: { - wasm_set_exception(module, "unsupported SIMD opcode"); + uint32 offset, addr; + offset = read_uint32(frame_ip); + addr = GET_OPERAND(uint32, I32, 2); + + V128 data; + data = POP_V128(); + frame_ip += 2; + + CHECK_MEMORY_OVERFLOW(16); + STORE_V128(maddr, data); break; } @@ -5751,6 +5771,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* i8x16 comparison operations */ case SIMD_i8x16_eq: { + // TODO: Use simde V128 v1 = POP_V128(); V128 v2 = POP_V128(); int i; @@ -5780,6 +5801,21 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* i16x8 comparison operations */ case SIMD_i16x8_eq: + { + // TODO: Use simde + V128 v1 = POP_V128(); + V128 v2 = POP_V128(); + int i; + addr_ret = GET_OFFSET(); + + V128 result; + for (i = 0; i < 8; i++) { + result.i16x8[i] = + v1.i16x8[i] == v2.i16x8[i] ? 0xffff : 0; + } + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + break; + } case SIMD_i16x8_ne: case SIMD_i16x8_lt_s: case SIMD_i16x8_lt_u: diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 5005fc63bf..06ee97143a 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -14934,6 +14934,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, read_leb_mem_offset(p, p_end, mem_offset); /* offset */ +#if WASM_ENABLE_FAST_INTERP != 0 + emit_uint32(loader_ctx, mem_offset); +#endif + POP_AND_PUSH(mem_offset_type, VALUE_TYPE_V128); #if WASM_ENABLE_JIT != 0 || WASM_ENABLE_WAMR_COMPILER != 0 func->has_memory_operations = true; @@ -14953,6 +14957,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, read_leb_mem_offset(p, p_end, mem_offset); /* offset */ +#if WASM_ENABLE_FAST_INTERP != 0 + emit_uint32(loader_ctx, mem_offset); +#endif + POP_V128(); POP_MEM_OFFSET(); #if WASM_ENABLE_JIT != 0 || WASM_ENABLE_WAMR_COMPILER != 0 From 566caebd938601ca7e9803257f4fad1a3cd31b00 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Tue, 22 Oct 2024 10:21:28 +0100 Subject: [PATCH 03/15] Fix SIMD128 conversion macro --- core/iwasm/interpreter/wasm_interp_fast.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index a4c990c57a..f7871587dd 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5651,14 +5651,18 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, goto call_func_from_entry; } #if WASM_ENABLE_SIMD != 0 -/* TODO: Add x86 with additional #if */ -#define SIMD_V128_TO_SIMDE_V128(v) \ - vreinterpretq_s32_u8(vld1q_u8((uint8_t *)&(v))) - -#define SIMDE_V128_TO_SIMD_V128(sv, v) \ - do { \ - uint8x16_t temp = vreinterpretq_u8_s32(sv); \ - vst1q_u8((uint8_t *)&(v), temp); \ +#define SIMD_V128_TO_SIMDE_V128(v) \ + ({ \ + bh_assert(sizeof(V128) == sizeof(simde_v128_t)); \ + simde_v128_t result; \ + bh_memcpy_s(&result, sizeof(simde_v128_t), &(v), sizeof(V128)); \ + result; \ + }) + +#define SIMDE_V128_TO_SIMD_V128(sv, v) \ + do { \ + bh_assert(sizeof(V128) == sizeof(simde_v128_t)); \ + bh_memcpy_s(&(v), sizeof(V128), &(sv), sizeof(simde_v128_t)); \ } while (0) HANDLE_OP(WASM_OP_SIMD_PREFIX) @@ -5725,7 +5729,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, V128 v2 = POP_V128(); V128 v1 = POP_V128(); addr_ret = GET_OFFSET(); - simde_v128_t simde_result = simde_wasm_i8x16_swizzle( SIMD_V128_TO_SIMDE_V128(v1), SIMD_V128_TO_SIMDE_V128(v2)); From d60d267adf734b9719b7b07f2039cc44e6cfc2e6 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Wed, 23 Oct 2024 22:07:09 +0100 Subject: [PATCH 04/15] Loads, splats, comparisons --- core/iwasm/interpreter/wasm_interp_fast.c | 321 +++++++++++++++++++++- core/iwasm/interpreter/wasm_loader.c | 1 - 2 files changed, 318 insertions(+), 4 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index f7871587dd..2d76894a0f 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5684,15 +5684,213 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } case SIMD_v128_load8x8_s: + { + uint32 offset, addr; + offset = read_uint32(frame_ip); + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + CHECK_MEMORY_OVERFLOW(16); + + simde_v128_t simde_result = + simde_wasm_i16x8_load8x8(maddr); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + V128 reversed_result; + for (int i = 0; i < 8; i++) { + reversed_result.i16x8[i] = result.i16x8[7 - i]; + } + + PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); + break; + } case SIMD_v128_load8x8_u: + { + uint32 offset, addr; + offset = read_uint32(frame_ip); + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + CHECK_MEMORY_OVERFLOW(16); + + simde_v128_t simde_result = + simde_wasm_u16x8_load8x8(maddr); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + V128 reversed_result; + for (int i = 0; i < 8; i++) { + reversed_result.i16x8[i] = result.i16x8[7 - i]; + } + + PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); + break; + } case SIMD_v128_load16x4_s: + { + uint32 offset, addr; + offset = read_uint32(frame_ip); + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + CHECK_MEMORY_OVERFLOW(16); + + simde_v128_t simde_result = + simde_wasm_i32x4_load16x4(maddr); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + V128 reversed_result; + for (int i = 0; i < 4; i++) { + reversed_result.i32x4[i] = result.i32x4[3 - i]; + } + + PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); + break; + } case SIMD_v128_load16x4_u: + { + uint32 offset, addr; + offset = read_uint32(frame_ip); + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + CHECK_MEMORY_OVERFLOW(16); + + simde_v128_t simde_result = + simde_wasm_u32x4_load16x4(maddr); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + V128 reversed_result; + for (int i = 0; i < 4; i++) { + reversed_result.i32x4[i] = result.i32x4[3 - i]; + } + + PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); + break; + } case SIMD_v128_load32x2_s: + { + uint32 offset, addr; + offset = read_uint32(frame_ip); + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + CHECK_MEMORY_OVERFLOW(16); + + simde_v128_t simde_result = + simde_wasm_i64x2_load32x2(maddr); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + int32 temp = result.i64x2[0]; + result.i64x2[0] = result.i64x2[1]; + result.i64x2[1] = temp; + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + break; + } case SIMD_v128_load32x2_u: + { + uint32 offset, addr; + offset = read_uint32(frame_ip); + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + CHECK_MEMORY_OVERFLOW(16); + + simde_v128_t simde_result = + simde_wasm_u64x2_load32x2(maddr); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + int32 temp = result.i64x2[0]; + result.i64x2[0] = result.i64x2[1]; + result.i64x2[1] = temp; + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + break; + } case SIMD_v128_load8_splat: + { + uint32 offset, addr; + offset = read_uint32(frame_ip); + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + CHECK_MEMORY_OVERFLOW(16); + + simde_v128_t simde_result = + simde_wasm_v128_load8_splat(maddr); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + break; + } case SIMD_v128_load16_splat: + { + uint32 offset, addr; + offset = read_uint32(frame_ip); + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + CHECK_MEMORY_OVERFLOW(16); + + simde_v128_t simde_result = + simde_wasm_v128_load16_splat(maddr); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + break; + } case SIMD_v128_load32_splat: + { + uint32 offset, addr; + offset = read_uint32(frame_ip); + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + CHECK_MEMORY_OVERFLOW(16); + + simde_v128_t simde_result = + simde_wasm_v128_load32_splat(maddr); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + break; + } case SIMD_v128_load64_splat: + { + uint32 offset, addr; + offset = read_uint32(frame_ip); + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + CHECK_MEMORY_OVERFLOW(16); + + simde_v128_t simde_result = + simde_wasm_v128_load64_splat(maddr); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + break; + } case SIMD_v128_store: { uint32 offset, addr; @@ -5740,14 +5938,54 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } /* Splat */ +#define SIMD_SPLAT_OP(simde_func, pop_func, val_type) \ + do { \ + val_type val = pop_func(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = simde_func(val); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + +#define SIMD_SPLAT_OP_I32(simde_func) SIMD_SPLAT_OP(simde_func, POP_I32, uint32) +#define SIMD_SPLAT_OP_I64(simde_func) SIMD_SPLAT_OP(simde_func, POP_I64, uint64) +#define SIMD_SPLAT_OP_F32(simde_func) \ + SIMD_SPLAT_OP(simde_func, POP_F32, float32) +#define SIMD_SPLAT_OP_F64(simde_func) \ + SIMD_SPLAT_OP(simde_func, POP_F64, float64) + case SIMD_i8x16_splat: + { + SIMD_SPLAT_OP_I32(simde_wasm_i8x16_splat); + break; + } case SIMD_i16x8_splat: + { + SIMD_SPLAT_OP_I32(simde_wasm_i16x8_splat); + break; + } case SIMD_i32x4_splat: + { + SIMD_SPLAT_OP_I32(simde_wasm_i32x4_splat); + break; + } case SIMD_i64x2_splat: + { + SIMD_SPLAT_OP_I64(simde_wasm_i64x2_splat); + break; + } case SIMD_f32x4_splat: + { + SIMD_SPLAT_OP_F32(simde_wasm_f32x4_splat); + break; + } case SIMD_f64x2_splat: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SPLAT_OP_F64(simde_wasm_f64x2_splat); break; } @@ -5771,6 +6009,21 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } +#define SIMD_COMPARISON_OP(simde_func) \ + do { \ + V128 v1 = POP_V128(); \ + V128 v2 = POP_V128(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = simde_func(SIMD_V128_TO_SIMDE_V128(v1), \ + SIMD_V128_TO_SIMDE_V128(v2)); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + /* i8x16 comparison operations */ case SIMD_i8x16_eq: { @@ -5835,7 +6088,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* i32x4 comparison operations */ case SIMD_i32x4_eq: + { + SIMD_COMPARISON_OP(simde_wasm_i32x4_eq); + break; + } case SIMD_i32x4_ne: + { + SIMD_COMPARISON_OP(simde_wasm_i32x4_ne); + break; + } case SIMD_i32x4_lt_s: case SIMD_i32x4_lt_u: case SIMD_i32x4_gt_s: @@ -5851,25 +6112,65 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* f32x4 comparison operations */ case SIMD_f32x4_eq: + { + SIMD_COMPARISON_OP(simde_wasm_f32x4_eq); + break; + } case SIMD_f32x4_ne: + { + SIMD_COMPARISON_OP(simde_wasm_f32x4_ne); + break; + } case SIMD_f32x4_lt: + { + SIMD_COMPARISON_OP(simde_wasm_f32x4_lt); + break; + } case SIMD_f32x4_gt: + { + SIMD_COMPARISON_OP(simde_wasm_f32x4_gt); + break; + } case SIMD_f32x4_le: + { + SIMD_COMPARISON_OP(simde_wasm_f32x4_le); + break; + } case SIMD_f32x4_ge: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_COMPARISON_OP(simde_wasm_f32x4_ge); break; } /* f64x2 comparison operations */ case SIMD_f64x2_eq: + { + SIMD_COMPARISON_OP(simde_wasm_f32x4_eq); + break; + } case SIMD_f64x2_ne: + { + SIMD_COMPARISON_OP(simde_wasm_f32x4_ne); + break; + } case SIMD_f64x2_lt: + { + SIMD_COMPARISON_OP(simde_wasm_f32x4_lt); + break; + } case SIMD_f64x2_gt: + { + SIMD_COMPARISON_OP(simde_wasm_f32x4_gt); + break; + } case SIMD_f64x2_le: + { + SIMD_COMPARISON_OP(simde_wasm_f32x4_le); + break; + } case SIMD_f64x2_ge: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_COMPARISON_OP(simde_wasm_f32x4_ge); break; } @@ -6097,6 +6398,20 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, case SIMD_i64x2_sub: case SIMD_i64x2_mul: case SIMD_i64x2_eq: + { + V128 v1 = POP_V128(); + V128 v2 = POP_V128(); + addr_ret = GET_OFFSET(); + + V128 result; + for (int i = 0; i < 2; i++) { + result.i64x2[i] = (v1.i64x2[i] == v2.i64x2[i]) + ? 0xffffffffffffffffULL + : 0; + } + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + break; + } case SIMD_i64x2_ne: case SIMD_i64x2_lt_s: case SIMD_i64x2_gt_s: diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 06ee97143a..c0ad6f3794 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -15066,7 +15066,6 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, error_buf_size)) { goto fail; } - if (replace[opcode1 - SIMD_i8x16_extract_lane_s]) { if (!(wasm_loader_pop_frame_ref( loader_ctx, From 92ae702c61d03f8013c13997d08b595da8217233 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Wed, 23 Oct 2024 22:49:07 +0100 Subject: [PATCH 05/15] SIMD 128 Comparisons --- core/iwasm/interpreter/wasm_interp_fast.c | 155 ++++++++++++++++++---- 1 file changed, 127 insertions(+), 28 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 2d76894a0f..a0db65ca8f 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -6027,62 +6027,104 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* i8x16 comparison operations */ case SIMD_i8x16_eq: { - // TODO: Use simde - V128 v1 = POP_V128(); - V128 v2 = POP_V128(); - int i; - addr_ret = GET_OFFSET(); - - V128 result; - for (i = 0; i < 16; i++) { - result.i8x16[i] = - v1.i8x16[i] == v2.i8x16[i] ? 0xff : 0; - } - PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + SIMD_COMPARISON_OP(simde_wasm_i8x16_eq); break; } case SIMD_i8x16_ne: + { + SIMD_COMPARISON_OP(simde_wasm_i8x16_ne); + break; + } case SIMD_i8x16_lt_s: + { + SIMD_COMPARISON_OP(simde_wasm_i8x16_lt); + break; + } case SIMD_i8x16_lt_u: + { + SIMD_COMPARISON_OP(simde_wasm_i8x16_lt); + break; + } case SIMD_i8x16_gt_s: + { + SIMD_COMPARISON_OP(simde_wasm_i8x16_gt); + break; + } case SIMD_i8x16_gt_u: + { + SIMD_COMPARISON_OP(simde_wasm_i8x16_gt); + break; + } case SIMD_i8x16_le_s: + { + SIMD_COMPARISON_OP(simde_wasm_i8x16_le); + break; + } case SIMD_i8x16_le_u: + { + SIMD_COMPARISON_OP(simde_wasm_i8x16_le); + break; + } case SIMD_i8x16_ge_s: + { + SIMD_COMPARISON_OP(simde_wasm_i8x16_ge); + break; + } case SIMD_i8x16_ge_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_COMPARISON_OP(simde_wasm_i8x16_ge); break; } /* i16x8 comparison operations */ case SIMD_i16x8_eq: { - // TODO: Use simde - V128 v1 = POP_V128(); - V128 v2 = POP_V128(); - int i; - addr_ret = GET_OFFSET(); - - V128 result; - for (i = 0; i < 8; i++) { - result.i16x8[i] = - v1.i16x8[i] == v2.i16x8[i] ? 0xffff : 0; - } - PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + SIMD_COMPARISON_OP(simde_wasm_i16x8_eq); break; } case SIMD_i16x8_ne: + { + SIMD_COMPARISON_OP(simde_wasm_i16x8_ne); + break; + } case SIMD_i16x8_lt_s: + { + SIMD_COMPARISON_OP(simde_wasm_i16x8_lt); + break; + } case SIMD_i16x8_lt_u: + { + SIMD_COMPARISON_OP(simde_wasm_i16x8_lt); + break; + } case SIMD_i16x8_gt_s: + { + SIMD_COMPARISON_OP(simde_wasm_i16x8_gt); + break; + } case SIMD_i16x8_gt_u: + { + SIMD_COMPARISON_OP(simde_wasm_i16x8_gt); + break; + } case SIMD_i16x8_le_s: + { + SIMD_COMPARISON_OP(simde_wasm_i16x8_le); + break; + } case SIMD_i16x8_le_u: + { + SIMD_COMPARISON_OP(simde_wasm_i16x8_le); + break; + } case SIMD_i16x8_ge_s: + { + SIMD_COMPARISON_OP(simde_wasm_i16x8_ge); + break; + } case SIMD_i16x8_ge_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_COMPARISON_OP(simde_wasm_i16x8_ge); break; } @@ -6098,15 +6140,43 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } case SIMD_i32x4_lt_s: + { + SIMD_COMPARISON_OP(simde_wasm_i32x4_lt); + break; + } case SIMD_i32x4_lt_u: + { + SIMD_COMPARISON_OP(simde_wasm_i32x4_lt); + break; + } case SIMD_i32x4_gt_s: + { + SIMD_COMPARISON_OP(simde_wasm_i32x4_gt); + break; + } case SIMD_i32x4_gt_u: + { + SIMD_COMPARISON_OP(simde_wasm_i32x4_gt); + break; + } case SIMD_i32x4_le_s: + { + SIMD_COMPARISON_OP(simde_wasm_i32x4_le); + break; + } case SIMD_i32x4_le_u: + { + SIMD_COMPARISON_OP(simde_wasm_i32x4_le); + break; + } case SIMD_i32x4_ge_s: + { + SIMD_COMPARISON_OP(simde_wasm_i32x4_ge); + break; + } case SIMD_i32x4_ge_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_COMPARISON_OP(simde_wasm_i32x4_ge); break; } @@ -6254,18 +6324,47 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } +#define SIMD_SINGLE_OP(simde_func) \ + do { \ + V128 v1 = POP_V128(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = simde_func(SIMD_V128_TO_SIMDE_V128(v1)); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + /* Float conversion */ case SIMD_f32x4_demote_f64x2_zero: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_demote_f64x2_zero); + break; + } case SIMD_f64x2_promote_low_f32x4_zero: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SINGLE_OP(simde_wasm_f64x2_promote_low_f32x4); break; } /* i8x16 operations */ case SIMD_i8x16_abs: + { + SIMD_SINGLE_OP(simde_wasm_i8x16_abs); + break; + } case SIMD_i8x16_neg: + { + SIMD_SINGLE_OP(simde_wasm_i8x16_neg); + break; + } case SIMD_i8x16_popcnt: + { + SIMD_SINGLE_OP(simde_wasm_i8x16_popcnt); + break; + } case SIMD_i8x16_all_true: { V128 v = POP_V128(); From 2776da6c7c7b33606dcd714b7589345d18eaf668 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Thu, 24 Oct 2024 10:26:13 +0100 Subject: [PATCH 06/15] Fill out most trivial opcodes --- core/iwasm/interpreter/wasm_interp_fast.c | 642 +++++++++++++++++++--- 1 file changed, 571 insertions(+), 71 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index a0db65ca8f..bf208afec2 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5917,6 +5917,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, PUT_V128_TO_ADDR(frame_lp + addr_ret, *(V128 *)orig_ip); break; } + // TODO: case SIMD_v8x16_shuffle: { wasm_set_exception(module, "unsupported SIMD opcode"); @@ -5989,6 +5990,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } + // TODO: /* Lane */ case SIMD_i8x16_extract_lane_s: case SIMD_i8x16_extract_lane_u: @@ -6009,7 +6011,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } -#define SIMD_COMPARISON_OP(simde_func) \ +#define SIMD_DOUBLE_OP(simde_func) \ do { \ V128 v1 = POP_V128(); \ V128 v2 = POP_V128(); \ @@ -6027,220 +6029,220 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* i8x16 comparison operations */ case SIMD_i8x16_eq: { - SIMD_COMPARISON_OP(simde_wasm_i8x16_eq); + SIMD_DOUBLE_OP(simde_wasm_i8x16_eq); break; } case SIMD_i8x16_ne: { - SIMD_COMPARISON_OP(simde_wasm_i8x16_ne); + SIMD_DOUBLE_OP(simde_wasm_i8x16_ne); break; } case SIMD_i8x16_lt_s: { - SIMD_COMPARISON_OP(simde_wasm_i8x16_lt); + SIMD_DOUBLE_OP(simde_wasm_i8x16_lt); break; } case SIMD_i8x16_lt_u: { - SIMD_COMPARISON_OP(simde_wasm_i8x16_lt); + SIMD_DOUBLE_OP(simde_wasm_i8x16_lt); break; } case SIMD_i8x16_gt_s: { - SIMD_COMPARISON_OP(simde_wasm_i8x16_gt); + SIMD_DOUBLE_OP(simde_wasm_i8x16_gt); break; } case SIMD_i8x16_gt_u: { - SIMD_COMPARISON_OP(simde_wasm_i8x16_gt); + SIMD_DOUBLE_OP(simde_wasm_i8x16_gt); break; } case SIMD_i8x16_le_s: { - SIMD_COMPARISON_OP(simde_wasm_i8x16_le); + SIMD_DOUBLE_OP(simde_wasm_i8x16_le); break; } case SIMD_i8x16_le_u: { - SIMD_COMPARISON_OP(simde_wasm_i8x16_le); + SIMD_DOUBLE_OP(simde_wasm_i8x16_le); break; } case SIMD_i8x16_ge_s: { - SIMD_COMPARISON_OP(simde_wasm_i8x16_ge); + SIMD_DOUBLE_OP(simde_wasm_i8x16_ge); break; } case SIMD_i8x16_ge_u: { - SIMD_COMPARISON_OP(simde_wasm_i8x16_ge); + SIMD_DOUBLE_OP(simde_wasm_i8x16_ge); break; } /* i16x8 comparison operations */ case SIMD_i16x8_eq: { - SIMD_COMPARISON_OP(simde_wasm_i16x8_eq); + SIMD_DOUBLE_OP(simde_wasm_i16x8_eq); break; } case SIMD_i16x8_ne: { - SIMD_COMPARISON_OP(simde_wasm_i16x8_ne); + SIMD_DOUBLE_OP(simde_wasm_i16x8_ne); break; } case SIMD_i16x8_lt_s: { - SIMD_COMPARISON_OP(simde_wasm_i16x8_lt); + SIMD_DOUBLE_OP(simde_wasm_i16x8_lt); break; } case SIMD_i16x8_lt_u: { - SIMD_COMPARISON_OP(simde_wasm_i16x8_lt); + SIMD_DOUBLE_OP(simde_wasm_i16x8_lt); break; } case SIMD_i16x8_gt_s: { - SIMD_COMPARISON_OP(simde_wasm_i16x8_gt); + SIMD_DOUBLE_OP(simde_wasm_i16x8_gt); break; } case SIMD_i16x8_gt_u: { - SIMD_COMPARISON_OP(simde_wasm_i16x8_gt); + SIMD_DOUBLE_OP(simde_wasm_i16x8_gt); break; } case SIMD_i16x8_le_s: { - SIMD_COMPARISON_OP(simde_wasm_i16x8_le); + SIMD_DOUBLE_OP(simde_wasm_i16x8_le); break; } case SIMD_i16x8_le_u: { - SIMD_COMPARISON_OP(simde_wasm_i16x8_le); + SIMD_DOUBLE_OP(simde_wasm_i16x8_le); break; } case SIMD_i16x8_ge_s: { - SIMD_COMPARISON_OP(simde_wasm_i16x8_ge); + SIMD_DOUBLE_OP(simde_wasm_i16x8_ge); break; } case SIMD_i16x8_ge_u: { - SIMD_COMPARISON_OP(simde_wasm_i16x8_ge); + SIMD_DOUBLE_OP(simde_wasm_i16x8_ge); break; } /* i32x4 comparison operations */ case SIMD_i32x4_eq: { - SIMD_COMPARISON_OP(simde_wasm_i32x4_eq); + SIMD_DOUBLE_OP(simde_wasm_i32x4_eq); break; } case SIMD_i32x4_ne: { - SIMD_COMPARISON_OP(simde_wasm_i32x4_ne); + SIMD_DOUBLE_OP(simde_wasm_i32x4_ne); break; } case SIMD_i32x4_lt_s: { - SIMD_COMPARISON_OP(simde_wasm_i32x4_lt); + SIMD_DOUBLE_OP(simde_wasm_i32x4_lt); break; } case SIMD_i32x4_lt_u: { - SIMD_COMPARISON_OP(simde_wasm_i32x4_lt); + SIMD_DOUBLE_OP(simde_wasm_i32x4_lt); break; } case SIMD_i32x4_gt_s: { - SIMD_COMPARISON_OP(simde_wasm_i32x4_gt); + SIMD_DOUBLE_OP(simde_wasm_i32x4_gt); break; } case SIMD_i32x4_gt_u: { - SIMD_COMPARISON_OP(simde_wasm_i32x4_gt); + SIMD_DOUBLE_OP(simde_wasm_i32x4_gt); break; } case SIMD_i32x4_le_s: { - SIMD_COMPARISON_OP(simde_wasm_i32x4_le); + SIMD_DOUBLE_OP(simde_wasm_i32x4_le); break; } case SIMD_i32x4_le_u: { - SIMD_COMPARISON_OP(simde_wasm_i32x4_le); + SIMD_DOUBLE_OP(simde_wasm_i32x4_le); break; } case SIMD_i32x4_ge_s: { - SIMD_COMPARISON_OP(simde_wasm_i32x4_ge); + SIMD_DOUBLE_OP(simde_wasm_i32x4_ge); break; } case SIMD_i32x4_ge_u: { - SIMD_COMPARISON_OP(simde_wasm_i32x4_ge); + SIMD_DOUBLE_OP(simde_wasm_i32x4_ge); break; } /* f32x4 comparison operations */ case SIMD_f32x4_eq: { - SIMD_COMPARISON_OP(simde_wasm_f32x4_eq); + SIMD_DOUBLE_OP(simde_wasm_f32x4_eq); break; } case SIMD_f32x4_ne: { - SIMD_COMPARISON_OP(simde_wasm_f32x4_ne); + SIMD_DOUBLE_OP(simde_wasm_f32x4_ne); break; } case SIMD_f32x4_lt: { - SIMD_COMPARISON_OP(simde_wasm_f32x4_lt); + SIMD_DOUBLE_OP(simde_wasm_f32x4_lt); break; } case SIMD_f32x4_gt: { - SIMD_COMPARISON_OP(simde_wasm_f32x4_gt); + SIMD_DOUBLE_OP(simde_wasm_f32x4_gt); break; } case SIMD_f32x4_le: { - SIMD_COMPARISON_OP(simde_wasm_f32x4_le); + SIMD_DOUBLE_OP(simde_wasm_f32x4_le); break; } case SIMD_f32x4_ge: { - SIMD_COMPARISON_OP(simde_wasm_f32x4_ge); + SIMD_DOUBLE_OP(simde_wasm_f32x4_ge); break; } /* f64x2 comparison operations */ case SIMD_f64x2_eq: { - SIMD_COMPARISON_OP(simde_wasm_f32x4_eq); + SIMD_DOUBLE_OP(simde_wasm_f32x4_eq); break; } case SIMD_f64x2_ne: { - SIMD_COMPARISON_OP(simde_wasm_f32x4_ne); + SIMD_DOUBLE_OP(simde_wasm_f32x4_ne); break; } case SIMD_f64x2_lt: { - SIMD_COMPARISON_OP(simde_wasm_f32x4_lt); + SIMD_DOUBLE_OP(simde_wasm_f32x4_lt); break; } case SIMD_f64x2_gt: { - SIMD_COMPARISON_OP(simde_wasm_f32x4_gt); + SIMD_DOUBLE_OP(simde_wasm_f32x4_gt); break; } case SIMD_f64x2_le: { - SIMD_COMPARISON_OP(simde_wasm_f32x4_le); + SIMD_DOUBLE_OP(simde_wasm_f32x4_le); break; } case SIMD_f64x2_ge: { - SIMD_COMPARISON_OP(simde_wasm_f32x4_ge); + SIMD_DOUBLE_OP(simde_wasm_f32x4_ge); break; } @@ -6294,6 +6296,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, v1.i64x2[1] ^ v2.i64x2[1]); break; } + // TODO: case SIMD_v128_bitselect: { wasm_set_exception(module, "unsupported SIMD opcode"); @@ -6308,6 +6311,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } + // TODO: /* load lane operations */ case SIMD_v128_load8_lane: case SIMD_v128_load16_lane: @@ -6367,209 +6371,705 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } case SIMD_i8x16_all_true: { - V128 v = POP_V128(); - uint8_t *bytes = (uint8_t *)&v; - bool all_true = true; + V128 v1 = POP_V128(); - for (int i = 0; i < 16; i++) { - if (bytes[i] == 0) { - all_true = false; - break; - } - } + uint32_t result = simde_wasm_i8x16_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); - PUSH_I32(all_true ? 1 : 0); + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; break; } case SIMD_i8x16_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i8x16_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i8x16_narrow_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_narrow_i16x8); + break; + } case SIMD_i8x16_narrow_i16x8_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_narrow_i16x8); + break; + } case SIMD_f32x4_ceil: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_ceil); + break; + } case SIMD_f32x4_floor: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_floor); + break; + } case SIMD_f32x4_trunc: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_trunc); + break; + } case SIMD_f32x4_nearest: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_nearest); + break; + } + // TODO: case SIMD_i8x16_shl: case SIMD_i8x16_shr_s: case SIMD_i8x16_shr_u: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } case SIMD_i8x16_add: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_add); + break; + } case SIMD_i8x16_add_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_add_sat); + break; + } case SIMD_i8x16_add_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_add_sat); + break; + } case SIMD_i8x16_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_sub); + break; + } case SIMD_i8x16_sub_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_sub_sat); + break; + } case SIMD_i8x16_sub_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_sub_sat); + break; + } case SIMD_f64x2_ceil: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_ceil); + break; + } case SIMD_f64x2_floor: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_floor); + break; + } case SIMD_i8x16_min_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_min); + break; + } case SIMD_i8x16_min_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_min); + break; + } case SIMD_i8x16_max_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_max); + break; + } case SIMD_i8x16_max_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_max); + break; + } case SIMD_f64x2_trunc: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_trunc); + break; + } case SIMD_i8x16_avgr_u: + { + SIMD_DOUBLE_OP(simde_wasm_u8x16_avgr); + break; + } case SIMD_i16x8_extadd_pairwise_i8x16_s: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extadd_pairwise_i8x16); + break; + } case SIMD_i16x8_extadd_pairwise_i8x16_u: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extadd_pairwise_i8x16); + break; + } case SIMD_i32x4_extadd_pairwise_i16x8_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extadd_pairwise_i16x8); + break; + } case SIMD_i32x4_extadd_pairwise_i16x8_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SINGLE_OP(simde_wasm_i32x4_extadd_pairwise_i16x8); break; } /* i16x8 operations */ case SIMD_i16x8_abs: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_abs); + break; + } case SIMD_i16x8_neg: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_neg); + break; + } case SIMD_i16x8_q15mulr_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_q15mulr_sat); + break; + } + // TODO: case SIMD_i16x8_all_true: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } case SIMD_i16x8_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i16x8_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i16x8_narrow_i32x4_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_narrow_i32x4); + break; + } case SIMD_i16x8_narrow_i32x4_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_narrow_i32x4); + break; + } case SIMD_i16x8_extend_low_i8x16_s: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_low_i8x16); + break; + } case SIMD_i16x8_extend_high_i8x16_s: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_high_i8x16); + break; + } case SIMD_i16x8_extend_low_i8x16_u: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_low_i8x16); + break; + } case SIMD_i16x8_extend_high_i8x16_u: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_high_i8x16); + break; + } + // TODO: case SIMD_i16x8_shl: case SIMD_i16x8_shr_s: case SIMD_i16x8_shr_u: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } case SIMD_i16x8_add: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_add); + break; + } case SIMD_i16x8_add_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_add_sat); + break; + } case SIMD_i16x8_add_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_add_sat); + break; + } case SIMD_i16x8_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_sub); + break; + } case SIMD_i16x8_sub_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_sub_sat); + break; + } case SIMD_i16x8_sub_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_sub_sat); + break; + } case SIMD_f64x2_nearest: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_nearest); + break; + } case SIMD_i16x8_mul: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_mul); + break; + } case SIMD_i16x8_min_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_min); + break; + } case SIMD_i16x8_min_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_min); + break; + } case SIMD_i16x8_max_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_max); + break; + } case SIMD_i16x8_max_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_max); + break; + } case SIMD_i16x8_avgr_u: + { + SIMD_DOUBLE_OP(simde_wasm_u16x8_avgr); + break; + } case SIMD_i16x8_extmul_low_i8x16_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_low_i8x16); + break; + } case SIMD_i16x8_extmul_high_i8x16_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_high_i8x16); + break; + } case SIMD_i16x8_extmul_low_i8x16_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_low_i8x16); + break; + } case SIMD_i16x8_extmul_high_i8x16_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_high_i8x16); break; } /* i32x4 operations */ case SIMD_i32x4_abs: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_abs); + break; + } case SIMD_i32x4_neg: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_neg); + break; + } + // TODO: case SIMD_i32x4_all_true: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } case SIMD_i32x4_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i32x4_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i32x4_extend_low_i16x8_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_low_i16x8); + break; + } case SIMD_i32x4_extend_high_i16x8_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_high_i16x8); + break; + } case SIMD_i32x4_extend_low_i16x8_u: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_low_i16x8); + break; + } case SIMD_i32x4_extend_high_i16x8_u: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_high_i16x8); + break; + } + // TODO: case SIMD_i32x4_shl: case SIMD_i32x4_shr_s: case SIMD_i32x4_shr_u: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } case SIMD_i32x4_add: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_add); + break; + } case SIMD_i32x4_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_sub); + break; + } case SIMD_i32x4_mul: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_mul); + break; + } case SIMD_i32x4_min_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_min); + break; + } case SIMD_i32x4_min_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_min); + break; + } case SIMD_i32x4_max_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_max); + break; + } case SIMD_i32x4_max_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_max); + break; + } case SIMD_i32x4_dot_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_dot_i16x8); + break; + } case SIMD_i32x4_extmul_low_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_low_i16x8); + break; + } case SIMD_i32x4_extmul_high_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_high_i16x8); + break; + } case SIMD_i32x4_extmul_low_i16x8_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_low_i16x8); + break; + } case SIMD_i32x4_extmul_high_i16x8_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_high_i16x8); break; } /* i64x2 operations */ case SIMD_i64x2_abs: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_abs); + break; + } case SIMD_i64x2_neg: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_neg); + break; + } + // TODO: case SIMD_i64x2_all_true: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } case SIMD_i64x2_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i64x2_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i64x2_extend_low_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_low_i32x4); + break; + } case SIMD_i64x2_extend_high_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_high_i32x4); + break; + } case SIMD_i64x2_extend_low_i32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_low_i32x4); + break; + } case SIMD_i64x2_extend_high_i32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_high_i32x4); + break; + } + // TODO: case SIMD_i64x2_shl: case SIMD_i64x2_shr_s: case SIMD_i64x2_shr_u: + { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } case SIMD_i64x2_add: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_add); + break; + } case SIMD_i64x2_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_sub); + break; + } case SIMD_i64x2_mul: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_mul); + break; + } case SIMD_i64x2_eq: { - V128 v1 = POP_V128(); - V128 v2 = POP_V128(); - addr_ret = GET_OFFSET(); - - V128 result; - for (int i = 0; i < 2; i++) { - result.i64x2[i] = (v1.i64x2[i] == v2.i64x2[i]) - ? 0xffffffffffffffffULL - : 0; - } - PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + SIMD_DOUBLE_OP(simde_wasm_i64x2_eq); break; } case SIMD_i64x2_ne: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_ne); + break; + } case SIMD_i64x2_lt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_lt); + break; + } case SIMD_i64x2_gt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_gt); + break; + } case SIMD_i64x2_le_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_le); + break; + } case SIMD_i64x2_ge_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_ge); + break; + } case SIMD_i64x2_extmul_low_i32x4_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_low_i32x4); + break; + } case SIMD_i64x2_extmul_high_i32x4_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_high_i32x4); + break; + } case SIMD_i64x2_extmul_low_i32x4_u: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_low_i32x4); + break; + } case SIMD_i64x2_extmul_high_i32x4_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_high_i32x4); break; } /* f32x4 opertions */ case SIMD_f32x4_abs: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_abs); + break; + } case SIMD_f32x4_neg: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_neg); + break; + } case SIMD_f32x4_sqrt: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_sqrt); + break; + } case SIMD_f32x4_add: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_add); + break; + } case SIMD_f32x4_sub: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_sub); + break; + } case SIMD_f32x4_mul: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_mul); + break; + } case SIMD_f32x4_div: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_div); + break; + } case SIMD_f32x4_min: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_min); + break; + } case SIMD_f32x4_max: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_max); + break; + } case SIMD_f32x4_pmin: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_pmin); + break; + } case SIMD_f32x4_pmax: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_f32x4_pmax); break; } /* f64x2 operations */ case SIMD_f64x2_abs: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_abs); + break; + } case SIMD_f64x2_neg: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_neg); + break; + } case SIMD_f64x2_sqrt: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_sqrt); + break; + } case SIMD_f64x2_add: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_add); + break; + } case SIMD_f64x2_sub: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_sub); + break; + } case SIMD_f64x2_mul: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_mul); + break; + } case SIMD_f64x2_div: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_div); + break; + } case SIMD_f64x2_min: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_min); + break; + } case SIMD_f64x2_max: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_max); + break; + } case SIMD_f64x2_pmin: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_pmin); + break; + } case SIMD_f64x2_pmax: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_f64x2_pmax); break; } /* Conversion operations */ case SIMD_i32x4_trunc_sat_f32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f32x4); + break; + } case SIMD_i32x4_trunc_sat_f32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f32x4); + break; + } case SIMD_f32x4_convert_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_convert_i32x4); + break; + } case SIMD_f32x4_convert_i32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_convert_i32x4); + break; + } case SIMD_i32x4_trunc_sat_f64x2_s_zero: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f64x2_zero); + break; + } case SIMD_i32x4_trunc_sat_f64x2_u_zero: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f64x2_zero); + break; + } case SIMD_f64x2_convert_low_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_convert_low_i32x4); + break; + } case SIMD_f64x2_convert_low_i32x4_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SINGLE_OP(simde_wasm_f64x2_convert_low_i32x4); break; } From 061f7cb69b41306981138b5a42bf4bfebfe439f6 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Thu, 24 Oct 2024 10:45:02 +0100 Subject: [PATCH 07/15] Address PR comments --- build-scripts/config_common.cmake | 2 +- build-scripts/runtime_lib.cmake | 2 +- core/iwasm/common/wasm_runtime_common.h | 84 ++++++++--------------- core/iwasm/interpreter/iwasm_interp.cmake | 4 -- core/iwasm/interpreter/wasm_interp_fast.c | 4 +- 5 files changed, 32 insertions(+), 64 deletions(-) diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index 81c378c6f3..0c74dacdaf 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -288,7 +288,7 @@ endif () if (WAMR_BUILD_LIB_RATS EQUAL 1) message (" Lib rats enabled") endif() -if (WAMR_BUILD_SIMDE EQUAL 1) +if ((WAMR_BUILD_SIMDE EQUAL 1) AND (WAMR_BUILD_FAST_INTERP EQUAL 1)) message (" Lib simde enabled") endif() if (WAMR_BUILD_MINI_LOADER EQUAL 1) diff --git a/build-scripts/runtime_lib.cmake b/build-scripts/runtime_lib.cmake index 7fa2a6a85c..36910fe775 100644 --- a/build-scripts/runtime_lib.cmake +++ b/build-scripts/runtime_lib.cmake @@ -142,7 +142,7 @@ if (WAMR_BUILD_LIB_RATS EQUAL 1) include (${IWASM_DIR}/libraries/lib-rats/lib_rats.cmake) endif () -if (WAMR_BUILD_SIMDE EQUAL 1) +if ((WAMR_BUILD_SIMDE EQUAL 1) AND (WAMR_BUILD_FAST_INTERP EQUAL 1)) include (${IWASM_DIR}/libraries/simde/simde.cmake) endif () diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h index 8ff5180f77..e4675d15a3 100644 --- a/core/iwasm/common/wasm_runtime_common.h +++ b/core/iwasm/common/wasm_runtime_common.h @@ -272,46 +272,32 @@ STORE_U16(void *addr, uint16_t value) ((uint8_t *)(addr))[1] = u.u8[1]; } -#define STORE_V128(addr, value) \ - do { \ - uintptr_t addr_ = (uintptr_t)(addr); \ - union { \ - V128 val; \ - uint64 u64[2]; \ - uint32 u32[4]; \ - uint16 u16[8]; \ - uint8 u8[16]; \ - } u; \ - if ((addr_ & (uintptr_t)15) == 0) \ - *(V128 *)(addr) = (V128)(value); \ - else { \ - u.val = (V128)(value); \ - if ((addr_ & (uintptr_t)7) == 0) { \ - ((uint64 *)(addr))[0] = u.u64[0]; \ - ((uint64 *)(addr))[1] = u.u64[1]; \ - } \ - else if ((addr_ & (uintptr_t)3) == 0) { \ - ((uint32 *)(addr))[0] = u.u32[0]; \ - ((uint32 *)(addr))[1] = u.u32[1]; \ - ((uint32 *)(addr))[2] = u.u32[2]; \ - ((uint32 *)(addr))[3] = u.u32[3]; \ - } \ - else if ((addr_ & (uintptr_t)1) == 0) { \ - ((uint16 *)(addr))[0] = u.u16[0]; \ - ((uint16 *)(addr))[1] = u.u16[1]; \ - ((uint16 *)(addr))[2] = u.u16[2]; \ - ((uint16 *)(addr))[3] = u.u16[3]; \ - ((uint16 *)(addr))[4] = u.u16[4]; \ - ((uint16 *)(addr))[5] = u.u16[5]; \ - ((uint16 *)(addr))[6] = u.u16[6]; \ - ((uint16 *)(addr))[7] = u.u16[7]; \ - } \ - else { \ - int32 t; \ - for (t = 0; t < 16; t++) \ - ((uint8 *)(addr))[t] = u.u8[t]; \ - } \ - } \ +#define STORE_V128(addr, value) \ + do { \ + uintptr_t addr_ = (uintptr_t)(addr); \ + union { \ + V128 val; \ + uint64 u64[2]; \ + uint32 u32[4]; \ + uint16 u16[8]; \ + uint8 u8[16]; \ + } u; \ + if ((addr_ & (uintptr_t)15) == 0) \ + *(V128 *)(addr) = (V128)(value); \ + else { \ + u.val = (V128)(value); \ + if ((addr_ & (uintptr_t)7) == 0) { \ + ((uint64 *)(addr))[0] = u.u64[0]; \ + ((uint64 *)(addr))[1] = u.u64[1]; \ + } \ + else { \ + bh_assert((addr_ & (uintptr_t)3) == 0); \ + ((uint32 *)(addr))[0] = u.u32[0]; \ + ((uint32 *)(addr))[1] = u.u32[1]; \ + ((uint32 *)(addr))[2] = u.u32[2]; \ + ((uint32 *)(addr))[3] = u.u32[3]; \ + } \ + } \ } while (0) /* For LOAD opcodes */ @@ -333,27 +319,13 @@ LOAD_V128(void *addr) u.u64[0] = ((uint64 *)addr)[0]; u.u64[1] = ((uint64 *)addr)[1]; } - else if ((addr1 & (uintptr_t)3) == 0) { + else { + bh_assert((addr1 & (uintptr_t)3) == 0); u.u32[0] = ((uint32 *)addr)[0]; u.u32[1] = ((uint32 *)addr)[1]; u.u32[2] = ((uint32 *)addr)[2]; u.u32[3] = ((uint32 *)addr)[3]; } - else if ((addr1 & (uintptr_t)1) == 0) { - u.u16[0] = ((uint16 *)addr)[0]; - u.u16[1] = ((uint16 *)addr)[1]; - u.u16[2] = ((uint16 *)addr)[2]; - u.u16[3] = ((uint16 *)addr)[3]; - u.u16[4] = ((uint16 *)addr)[4]; - u.u16[5] = ((uint16 *)addr)[5]; - u.u16[6] = ((uint16 *)addr)[6]; - u.u16[7] = ((uint16 *)addr)[7]; - } - else { - int32 t; - for (t = 0; t < 16; t++) - u.u8[t] = ((uint8 *)addr)[t]; - } return u.val; } diff --git a/core/iwasm/interpreter/iwasm_interp.cmake b/core/iwasm/interpreter/iwasm_interp.cmake index 99d1c2bab2..e6e52e42c8 100644 --- a/core/iwasm/interpreter/iwasm_interp.cmake +++ b/core/iwasm/interpreter/iwasm_interp.cmake @@ -19,10 +19,6 @@ else () set (LOADER "wasm_loader.c") endif () -if (WAMR_BUILD_SIMD) - set (WAMR_BUILD_SIMDE 1) -endif() - file (GLOB_RECURSE source_all ${IWASM_INTERP_DIR}/${LOADER} ${IWASM_INTERP_DIR}/wasm_runtime.c diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index bf208afec2..aa13361ddb 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5895,11 +5895,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, { uint32 offset, addr; offset = read_uint32(frame_ip); - addr = GET_OPERAND(uint32, I32, 2); + frame_ip += 2; + addr = GET_OPERAND(uint32, I32, 0); V128 data; data = POP_V128(); - frame_ip += 2; CHECK_MEMORY_OVERFLOW(16); STORE_V128(maddr, data); From db71c1d4a49709e456936bddf3a72f95dd9d5433 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Thu, 24 Oct 2024 14:30:24 +0100 Subject: [PATCH 08/15] shifts, all_true, bitselect --- core/iwasm/interpreter/wasm_interp_fast.c | 108 ++++++++++++++++++---- 1 file changed, 90 insertions(+), 18 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index aa13361ddb..a84e924616 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -6296,11 +6296,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, v1.i64x2[1] ^ v2.i64x2[1]); break; } - // TODO: + // TODO: Test case SIMD_v128_bitselect: { - wasm_set_exception(module, "unsupported SIMD opcode"); - break; + V128 v1 = POP_V128(); + V128 v2 = POP_V128(); + V128 v3 = POP_V128(); + addr_ret = GET_OFFSET(); + + simde_v128_t simde_result = simde_wasm_v128_bitselect( + SIMD_V128_TO_SIMDE_V128(v1), + SIMD_V128_TO_SIMDE_V128(v2), + SIMD_V128_TO_SIMDE_V128(v3)); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); } case SIMD_v128_any_true: { @@ -6373,7 +6385,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, { V128 v1 = POP_V128(); - uint32_t result = simde_wasm_i8x16_all_true( + bool result = simde_wasm_i8x16_all_true( SIMD_V128_TO_SIMDE_V128(v1)); addr_ret = GET_OFFSET(); @@ -6422,12 +6434,34 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMD_SINGLE_OP(simde_wasm_f32x4_nearest); break; } - // TODO: +// TODO: Check count? +#define SIMD_LANE_SHIFT(simde_func) \ + do { \ + int32 count = POP_I32(); \ + V128 v1 = POP_V128(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = \ + simde_func(SIMD_V128_TO_SIMDE_V128(v1), count); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) case SIMD_i8x16_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i8x16_shl); + break; + } case SIMD_i8x16_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i8x16_shr); + break; + } case SIMD_i8x16_shr_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_LANE_SHIFT(simde_wasm_i8x16_shr); break; } case SIMD_i8x16_add: @@ -6537,10 +6571,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMD_DOUBLE_OP(simde_wasm_i16x8_q15mulr_sat); break; } - // TODO: case SIMD_i16x8_all_true: { - wasm_set_exception(module, "unsupported SIMD opcode"); + V128 v1 = POP_V128(); + + bool result = simde_wasm_i16x8_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; break; } case SIMD_i16x8_bitmask: @@ -6584,12 +6623,19 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMD_SINGLE_OP(simde_wasm_i16x8_extend_high_i8x16); break; } - // TODO: case SIMD_i16x8_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i16x8_shl); + break; + } case SIMD_i16x8_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i16x8_shr); + break; + } case SIMD_i16x8_shr_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_LANE_SHIFT(simde_wasm_i16x8_shr); break; } case SIMD_i16x8_add: @@ -6689,10 +6735,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMD_SINGLE_OP(simde_wasm_i32x4_neg); break; } - // TODO: case SIMD_i32x4_all_true: { - wasm_set_exception(module, "unsupported SIMD opcode"); + V128 v1 = POP_V128(); + + bool result = simde_wasm_i32x4_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; break; } case SIMD_i32x4_bitmask: @@ -6726,12 +6777,19 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMD_SINGLE_OP(simde_wasm_i32x4_extend_high_i16x8); break; } - // TODO: case SIMD_i32x4_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i32x4_shl); + break; + } case SIMD_i32x4_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i32x4_shr); + break; + } case SIMD_i32x4_shr_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_LANE_SHIFT(simde_wasm_i32x4_shr); break; } case SIMD_i32x4_add: @@ -6806,10 +6864,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMD_SINGLE_OP(simde_wasm_i64x2_neg); break; } - // TODO: case SIMD_i64x2_all_true: { - wasm_set_exception(module, "unsupported SIMD opcode"); + V128 v1 = POP_V128(); + + bool result = simde_wasm_i64x2_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; break; } case SIMD_i64x2_bitmask: @@ -6843,12 +6906,21 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, SIMD_SINGLE_OP(simde_wasm_i64x2_extend_high_i32x4); break; } - // TODO: + + // TODO: Verify count works case SIMD_i64x2_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i64x2_shl); + break; + } case SIMD_i64x2_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i64x2_shr); + break; + } case SIMD_i64x2_shr_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_LANE_SHIFT(simde_wasm_i64x2_shr); break; } case SIMD_i64x2_add: From 5d1c32437b23a6911653acf5c205531ccfce8c35 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Fri, 25 Oct 2024 14:32:19 +0100 Subject: [PATCH 09/15] Implement shuffle manually/in C --- core/iwasm/interpreter/wasm_interp_fast.c | 24 +++++++++++++++++++++-- core/iwasm/interpreter/wasm_loader.c | 9 +++++++-- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index a84e924616..1eb69a56c9 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5917,10 +5917,30 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, PUT_V128_TO_ADDR(frame_lp + addr_ret, *(V128 *)orig_ip); break; } - // TODO: + // TODO: Add a faster SIMD implementation case SIMD_v8x16_shuffle: { - wasm_set_exception(module, "unsupported SIMD opcode"); + V128 indices; + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + + bh_memcpy_s(&indices, sizeof(V128), frame_ip, + sizeof(V128)); + frame_ip += sizeof(V128); + + V128 result; + for (int i = 0; i < 16; i++) { + uint8_t index = indices.i8x16[i]; + if (index < 16) { + result.i8x16[i] = v1.i8x16[index]; + } + else { + result.i8x16[i] = v2.i8x16[index - 16]; + } + } + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); break; } case SIMD_v8x16_swizzle: diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index c0ad6f3794..0bacc31e76 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -14990,12 +14990,17 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, CHECK_BUF1(p, p_end, 16); mask = read_i8x16(p, error_buf, error_buf_size); - p += 16; if (!check_simd_shuffle_mask(mask, error_buf, error_buf_size)) { goto fail; } - +#if WASM_ENABLE_FAST_INTERP != 0 + uint64 high, low; + wasm_runtime_read_v128(p, &high, &low); + emit_uint64(loader_ctx, high); + emit_uint64(loader_ctx, low); +#endif + p += 16; POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); break; } From 642160594e7bb78e6601d51648026b3680d6cde1 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Wed, 30 Oct 2024 14:23:19 +0000 Subject: [PATCH 10/15] Tidy load macros --- core/iwasm/interpreter/wasm_interp_fast.c | 228 ++++++---------------- 1 file changed, 60 insertions(+), 168 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 1eb69a56c9..43765ca244 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -5683,212 +5683,104 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr)); break; } +#define SIMD_LOAD_OP(op_name, simde_func, element_size, num_elements) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + addr = GET_OPERAND(uint32, I32, 0); \ + frame_ip += 2; \ + addr_ret = GET_OFFSET(); \ + CHECK_MEMORY_OVERFLOW(16); \ + \ + simde_v128_t simde_result = simde_func(maddr); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + V128 reversed_result; \ + for (int i = 0; i < num_elements; i++) { \ + reversed_result.i##element_size##x##num_elements[i] = \ + result.i##element_size##x##num_elements[num_elements - 1 - i]; \ + } \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); \ + \ + break; \ + } while (0) case SIMD_v128_load8x8_s: { - uint32 offset, addr; - offset = read_uint32(frame_ip); - addr = GET_OPERAND(uint32, I32, 0); - frame_ip += 2; - addr_ret = GET_OFFSET(); - CHECK_MEMORY_OVERFLOW(16); - - simde_v128_t simde_result = - simde_wasm_i16x8_load8x8(maddr); - - V128 result; - SIMDE_V128_TO_SIMD_V128(simde_result, result); - - V128 reversed_result; - for (int i = 0; i < 8; i++) { - reversed_result.i16x8[i] = result.i16x8[7 - i]; - } - - PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); + SIMD_LOAD_OP(SIMD_v128_load8x8_s, + simde_wasm_i16x8_load8x8, 16, 8); break; } case SIMD_v128_load8x8_u: { - uint32 offset, addr; - offset = read_uint32(frame_ip); - addr = GET_OPERAND(uint32, I32, 0); - frame_ip += 2; - addr_ret = GET_OFFSET(); - CHECK_MEMORY_OVERFLOW(16); - - simde_v128_t simde_result = - simde_wasm_u16x8_load8x8(maddr); - - V128 result; - SIMDE_V128_TO_SIMD_V128(simde_result, result); - - V128 reversed_result; - for (int i = 0; i < 8; i++) { - reversed_result.i16x8[i] = result.i16x8[7 - i]; - } - - PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); + SIMD_LOAD_OP(SIMD_v128_load8x8_u, + simde_wasm_u16x8_load8x8, 16, 8); break; } case SIMD_v128_load16x4_s: { - uint32 offset, addr; - offset = read_uint32(frame_ip); - addr = GET_OPERAND(uint32, I32, 0); - frame_ip += 2; - addr_ret = GET_OFFSET(); - CHECK_MEMORY_OVERFLOW(16); - - simde_v128_t simde_result = - simde_wasm_i32x4_load16x4(maddr); - - V128 result; - SIMDE_V128_TO_SIMD_V128(simde_result, result); - - V128 reversed_result; - for (int i = 0; i < 4; i++) { - reversed_result.i32x4[i] = result.i32x4[3 - i]; - } - - PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); + SIMD_LOAD_OP(SIMD_v128_load16x4_s, + simde_wasm_i32x4_load16x4, 32, 4); break; } case SIMD_v128_load16x4_u: { - uint32 offset, addr; - offset = read_uint32(frame_ip); - addr = GET_OPERAND(uint32, I32, 0); - frame_ip += 2; - addr_ret = GET_OFFSET(); - CHECK_MEMORY_OVERFLOW(16); - - simde_v128_t simde_result = - simde_wasm_u32x4_load16x4(maddr); - - V128 result; - SIMDE_V128_TO_SIMD_V128(simde_result, result); - - V128 reversed_result; - for (int i = 0; i < 4; i++) { - reversed_result.i32x4[i] = result.i32x4[3 - i]; - } - - PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); + SIMD_LOAD_OP(SIMD_v128_load16x4_u, + simde_wasm_u32x4_load16x4, 32, 4); break; } case SIMD_v128_load32x2_s: { - uint32 offset, addr; - offset = read_uint32(frame_ip); - addr = GET_OPERAND(uint32, I32, 0); - frame_ip += 2; - addr_ret = GET_OFFSET(); - CHECK_MEMORY_OVERFLOW(16); - - simde_v128_t simde_result = - simde_wasm_i64x2_load32x2(maddr); - - V128 result; - SIMDE_V128_TO_SIMD_V128(simde_result, result); - - int32 temp = result.i64x2[0]; - result.i64x2[0] = result.i64x2[1]; - result.i64x2[1] = temp; - - PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + SIMD_LOAD_OP(SIMD_v128_load32x2_s, + simde_wasm_i64x2_load32x2, 64, 2); break; } case SIMD_v128_load32x2_u: { - uint32 offset, addr; - offset = read_uint32(frame_ip); - addr = GET_OPERAND(uint32, I32, 0); - frame_ip += 2; - addr_ret = GET_OFFSET(); - CHECK_MEMORY_OVERFLOW(16); - - simde_v128_t simde_result = - simde_wasm_u64x2_load32x2(maddr); - - V128 result; - SIMDE_V128_TO_SIMD_V128(simde_result, result); - - int32 temp = result.i64x2[0]; - result.i64x2[0] = result.i64x2[1]; - result.i64x2[1] = temp; - - PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + SIMD_LOAD_OP(SIMD_v128_load32x2_u, + simde_wasm_u64x2_load32x2, 64, 2); break; } +#define SIMD_LOAD_SPLAT_OP(op_name, simde_func) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + addr = GET_OPERAND(uint32, I32, 0); \ + frame_ip += 2; \ + addr_ret = GET_OFFSET(); \ + CHECK_MEMORY_OVERFLOW(16); \ + \ + simde_v128_t simde_result = simde_func(maddr); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + case SIMD_v128_load8_splat: { - uint32 offset, addr; - offset = read_uint32(frame_ip); - addr = GET_OPERAND(uint32, I32, 0); - frame_ip += 2; - addr_ret = GET_OFFSET(); - CHECK_MEMORY_OVERFLOW(16); - - simde_v128_t simde_result = - simde_wasm_v128_load8_splat(maddr); - - V128 result; - SIMDE_V128_TO_SIMD_V128(simde_result, result); - - PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + SIMD_LOAD_SPLAT_OP(SIMD_v128_load8_splat, + simde_wasm_v128_load8_splat); break; } case SIMD_v128_load16_splat: { - uint32 offset, addr; - offset = read_uint32(frame_ip); - addr = GET_OPERAND(uint32, I32, 0); - frame_ip += 2; - addr_ret = GET_OFFSET(); - CHECK_MEMORY_OVERFLOW(16); - - simde_v128_t simde_result = - simde_wasm_v128_load16_splat(maddr); - - V128 result; - SIMDE_V128_TO_SIMD_V128(simde_result, result); - - PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + SIMD_LOAD_SPLAT_OP(SIMD_v128_load16_splat, + simde_wasm_v128_load16_splat); break; } case SIMD_v128_load32_splat: { - uint32 offset, addr; - offset = read_uint32(frame_ip); - addr = GET_OPERAND(uint32, I32, 0); - frame_ip += 2; - addr_ret = GET_OFFSET(); - CHECK_MEMORY_OVERFLOW(16); - - simde_v128_t simde_result = - simde_wasm_v128_load32_splat(maddr); - - V128 result; - SIMDE_V128_TO_SIMD_V128(simde_result, result); - - PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + SIMD_LOAD_SPLAT_OP(SIMD_v128_load32_splat, + simde_wasm_v128_load32_splat); break; } case SIMD_v128_load64_splat: { - uint32 offset, addr; - offset = read_uint32(frame_ip); - addr = GET_OPERAND(uint32, I32, 0); - frame_ip += 2; - addr_ret = GET_OFFSET(); - CHECK_MEMORY_OVERFLOW(16); - - simde_v128_t simde_result = - simde_wasm_v128_load64_splat(maddr); - - V128 result; - SIMDE_V128_TO_SIMD_V128(simde_result, result); - - PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + SIMD_LOAD_SPLAT_OP(SIMD_v128_load64_splat, + simde_wasm_v128_load64_splat); break; } case SIMD_v128_store: From 060a0b1e9d6d117b3854ea8df3f68f407306cca7 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Wed, 30 Oct 2024 15:33:29 +0000 Subject: [PATCH 11/15] Only build SIMDE on ARM for now --- build-scripts/config_common.cmake | 3 +++ core/iwasm/interpreter/wasm_interp_fast.c | 8 ++++---- core/iwasm/interpreter/wasm_opcode.h | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index 0c74dacdaf..eaf06d7f32 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -324,6 +324,9 @@ if (WAMR_BUILD_SIMD EQUAL 1) else () message (" SIMD disabled due to not supported on target RISCV64") endif () + if(WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR "ARM.*") + add_definitions (-DWAMR_BUILD_SIMDE=1) + endif() endif () if (WAMR_BUILD_AOT_STACK_FRAME EQUAL 1) add_definitions (-DWASM_ENABLE_AOT_STACK_FRAME=1) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 43765ca244..76767bfa15 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -21,7 +21,7 @@ #include "../common/wasm_shared_memory.h" #endif -#if WASM_ENABLE_SIMD != 0 +#if WASM_ENABLE_SIMD != 0 && WAMR_BUILD_SIMDE != 0 #include "simde/wasm/simd128.h" #endif @@ -5650,7 +5650,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #endif goto call_func_from_entry; } -#if WASM_ENABLE_SIMD != 0 +#if WASM_ENABLE_SIMD != 0 && WAMR_BUILD_SIMDE != 0 #define SIMD_V128_TO_SIMDE_V128(v) \ ({ \ bh_assert(sizeof(V128) == sizeof(simde_v128_t)); \ @@ -5684,7 +5684,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } #define SIMD_LOAD_OP(op_name, simde_func, element_size, num_elements) \ - do { \ + do { \ uint32 offset, addr; \ offset = read_uint32(frame_ip); \ addr = GET_OPERAND(uint32, I32, 0); \ @@ -5743,7 +5743,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } #define SIMD_LOAD_SPLAT_OP(op_name, simde_func) \ - do { \ + do { \ uint32 offset, addr; \ offset = read_uint32(frame_ip); \ addr = GET_OPERAND(uint32, I32, 0); \ diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 1424840e79..55347316da 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -783,7 +783,7 @@ typedef enum WASMAtomicEXTOpcode { #define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode) #if (WASM_ENABLE_JIT != 0 || WASM_ENABLE_FAST_INTERP != 0) \ - && WASM_ENABLE_SIMD != 0 + && WASM_ENABLE_SIMD != 0 && WAMR_BUILD_SIMDE != 0 #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() \ SET_GOTO_TABLE_ELEM(WASM_OP_SIMD_PREFIX), #else From d35ed4de1833927611e93ec5d7d8d52143d97cf7 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Thu, 31 Oct 2024 14:24:04 +0000 Subject: [PATCH 12/15] Addressing PR comments --- build-scripts/config_common.cmake | 4 +- core/iwasm/common/wasm_runtime_common.h | 57 ++++++++++--------- core/iwasm/interpreter/wasm_interp_fast.c | 18 +++++- core/iwasm/interpreter/wasm_opcode.h | 2 +- core/iwasm/libraries/simde/simde.cmake | 2 + .../platforms/linux-sgx/CMakeLists.txt | 5 -- 6 files changed, 51 insertions(+), 37 deletions(-) diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index eaf06d7f32..de9afd3f8e 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -324,8 +324,8 @@ if (WAMR_BUILD_SIMD EQUAL 1) else () message (" SIMD disabled due to not supported on target RISCV64") endif () - if(WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR "ARM.*") - add_definitions (-DWAMR_BUILD_SIMDE=1) + if(WAMR_BUILD_SIMDE EQUAL 1 AND WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR "ARM.*") + add_definitions (-DWASM_ENABLE_SIMDE=1) endif() endif () if (WAMR_BUILD_AOT_STACK_FRAME EQUAL 1) diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h index e4675d15a3..0fe7bd4010 100644 --- a/core/iwasm/common/wasm_runtime_common.h +++ b/core/iwasm/common/wasm_runtime_common.h @@ -272,33 +272,36 @@ STORE_U16(void *addr, uint16_t value) ((uint8_t *)(addr))[1] = u.u8[1]; } -#define STORE_V128(addr, value) \ - do { \ - uintptr_t addr_ = (uintptr_t)(addr); \ - union { \ - V128 val; \ - uint64 u64[2]; \ - uint32 u32[4]; \ - uint16 u16[8]; \ - uint8 u8[16]; \ - } u; \ - if ((addr_ & (uintptr_t)15) == 0) \ - *(V128 *)(addr) = (V128)(value); \ - else { \ - u.val = (V128)(value); \ - if ((addr_ & (uintptr_t)7) == 0) { \ - ((uint64 *)(addr))[0] = u.u64[0]; \ - ((uint64 *)(addr))[1] = u.u64[1]; \ - } \ - else { \ - bh_assert((addr_ & (uintptr_t)3) == 0); \ - ((uint32 *)(addr))[0] = u.u32[0]; \ - ((uint32 *)(addr))[1] = u.u32[1]; \ - ((uint32 *)(addr))[2] = u.u32[2]; \ - ((uint32 *)(addr))[3] = u.u32[3]; \ - } \ - } \ - } while (0) +static inline void +STORE_V128(void *addr, V128 value) +{ + uintptr_t addr_ = (uintptr_t)(addr); + union { + V128 val; + uint64 u64[2]; + uint32 u32[4]; + uint16 u16[8]; + uint8 u8[16]; + } u; + + if ((addr_ & (uintptr_t)15) == 0) { + *(V128 *)addr = value; + } + else { + u.val = value; + if ((addr_ & (uintptr_t)7) == 0) { + ((uint64 *)(addr))[0] = u.u64[0]; + ((uint64 *)(addr))[1] = u.u64[1]; + } + else { + bh_assert((addr_ & (uintptr_t)3) == 0); + ((uint32 *)addr)[0] = u.u32[0]; + ((uint32 *)addr)[1] = u.u32[1]; + ((uint32 *)addr)[2] = u.u32[2]; + ((uint32 *)addr)[3] = u.u32[3]; + } + } +} /* For LOAD opcodes */ static inline V128 diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 76767bfa15..e97e2f50b4 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -21,7 +21,7 @@ #include "../common/wasm_shared_memory.h" #endif -#if WASM_ENABLE_SIMD != 0 && WAMR_BUILD_SIMDE != 0 +#if WASM_ENABLE_SIMDE != 0 #include "simde/wasm/simd128.h" #endif @@ -5650,7 +5650,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #endif goto call_func_from_entry; } -#if WASM_ENABLE_SIMD != 0 && WAMR_BUILD_SIMDE != 0 +#if WASM_ENABLE_SIMDE != 0 #define SIMD_V128_TO_SIMDE_V128(v) \ ({ \ bh_assert(sizeof(V128) == sizeof(simde_v128_t)); \ @@ -7063,6 +7063,20 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } #endif + +#if (WASM_ENABLE_SIMD != 0) && defined(WASM_ENABLE_SIMDE) \ + && (WASM_ENABLE_SIMDE != 1) + HANDLE_OP(WASM_OP_SIMD_PREFIX) + { + GET_OPCODE(); + switch (opcode) { + wasm_set_exception(module, "unsupported SIMD opcode"); + break; + } + + HANDLE_OP_END(); + } +#endif HANDLE_OP(WASM_OP_CALL) { #if WASM_ENABLE_THREAD_MGR != 0 diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 55347316da..1424840e79 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -783,7 +783,7 @@ typedef enum WASMAtomicEXTOpcode { #define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode) #if (WASM_ENABLE_JIT != 0 || WASM_ENABLE_FAST_INTERP != 0) \ - && WASM_ENABLE_SIMD != 0 && WAMR_BUILD_SIMDE != 0 + && WASM_ENABLE_SIMD != 0 #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() \ SET_GOTO_TABLE_ELEM(WASM_OP_SIMD_PREFIX), #else diff --git a/core/iwasm/libraries/simde/simde.cmake b/core/iwasm/libraries/simde/simde.cmake index 98ffbedad9..d869553954 100644 --- a/core/iwasm/libraries/simde/simde.cmake +++ b/core/iwasm/libraries/simde/simde.cmake @@ -1,3 +1,5 @@ +# Copyright (C) 2024 Amazon Inc. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # simde is a header only library set (LIB_SIMDE_DIR ${CMAKE_CURRENT_LIST_DIR}) diff --git a/product-mini/platforms/linux-sgx/CMakeLists.txt b/product-mini/platforms/linux-sgx/CMakeLists.txt index e7bdbb40ce..20b3fdfac1 100644 --- a/product-mini/platforms/linux-sgx/CMakeLists.txt +++ b/product-mini/platforms/linux-sgx/CMakeLists.txt @@ -68,11 +68,6 @@ if (NOT DEFINED WAMR_BUILD_LIB_RATS) set (WAMR_BUILD_LIB_RATS 0) endif() -if (NOT DEFINED WAMR_BUILD_SIMDE) - # Disable lib simde by default - set (WAMR_BUILD_SIMDE 0) -endif() - if (NOT DEFINED WAMR_BUILD_FAST_INTERP) # Enable fast interpreter set (WAMR_BUILD_FAST_INTERP 1) From bcaa7c640205b69a36387c56e54721e2478d8dae Mon Sep 17 00:00:00 2001 From: James Marsh Date: Mon, 4 Nov 2024 14:53:21 +0000 Subject: [PATCH 13/15] Addressing comments, fixing CI --- build-scripts/config_common.cmake | 5 +---- build-scripts/runtime_lib.cmake | 2 +- core/iwasm/interpreter/wasm_interp_fast.c | 13 ------------- core/iwasm/interpreter/wasm_opcode.h | 3 ++- core/iwasm/libraries/simde/simde.cmake | 13 ++++++------- 5 files changed, 10 insertions(+), 26 deletions(-) diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index de9afd3f8e..7dd115fb65 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -288,7 +288,7 @@ endif () if (WAMR_BUILD_LIB_RATS EQUAL 1) message (" Lib rats enabled") endif() -if ((WAMR_BUILD_SIMDE EQUAL 1) AND (WAMR_BUILD_FAST_INTERP EQUAL 1)) +if ((WAMR_BUILD_LIB_SIMDE EQUAL 1)) message (" Lib simde enabled") endif() if (WAMR_BUILD_MINI_LOADER EQUAL 1) @@ -324,9 +324,6 @@ if (WAMR_BUILD_SIMD EQUAL 1) else () message (" SIMD disabled due to not supported on target RISCV64") endif () - if(WAMR_BUILD_SIMDE EQUAL 1 AND WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR "ARM.*") - add_definitions (-DWASM_ENABLE_SIMDE=1) - endif() endif () if (WAMR_BUILD_AOT_STACK_FRAME EQUAL 1) add_definitions (-DWASM_ENABLE_AOT_STACK_FRAME=1) diff --git a/build-scripts/runtime_lib.cmake b/build-scripts/runtime_lib.cmake index 36910fe775..1c67579940 100644 --- a/build-scripts/runtime_lib.cmake +++ b/build-scripts/runtime_lib.cmake @@ -142,7 +142,7 @@ if (WAMR_BUILD_LIB_RATS EQUAL 1) include (${IWASM_DIR}/libraries/lib-rats/lib_rats.cmake) endif () -if ((WAMR_BUILD_SIMDE EQUAL 1) AND (WAMR_BUILD_FAST_INTERP EQUAL 1)) +if (WAMR_BUILD_LIB_SIMDE EQUAL 1) include (${IWASM_DIR}/libraries/simde/simde.cmake) endif () diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index e97e2f50b4..b5620d8902 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -7064,19 +7064,6 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } #endif -#if (WASM_ENABLE_SIMD != 0) && defined(WASM_ENABLE_SIMDE) \ - && (WASM_ENABLE_SIMDE != 1) - HANDLE_OP(WASM_OP_SIMD_PREFIX) - { - GET_OPCODE(); - switch (opcode) { - wasm_set_exception(module, "unsupported SIMD opcode"); - break; - } - - HANDLE_OP_END(); - } -#endif HANDLE_OP(WASM_OP_CALL) { #if WASM_ENABLE_THREAD_MGR != 0 diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 1424840e79..a72098db2b 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -782,7 +782,8 @@ typedef enum WASMAtomicEXTOpcode { #define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode) -#if (WASM_ENABLE_JIT != 0 || WASM_ENABLE_FAST_INTERP != 0) \ +#if (WASM_ENABLE_JIT != 0 \ + || (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0)) \ && WASM_ENABLE_SIMD != 0 #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() \ SET_GOTO_TABLE_ELEM(WASM_OP_SIMD_PREFIX), diff --git a/core/iwasm/libraries/simde/simde.cmake b/core/iwasm/libraries/simde/simde.cmake index d869553954..9c779ece5d 100644 --- a/core/iwasm/libraries/simde/simde.cmake +++ b/core/iwasm/libraries/simde/simde.cmake @@ -4,7 +4,9 @@ set (LIB_SIMDE_DIR ${CMAKE_CURRENT_LIST_DIR}) -add_definitions (-DWASM_ENABLE_LIB_SIMDE=1) +if (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR "ARM.*") + add_definitions (-DWASM_ENABLE_SIMDE=1) +endif() include_directories(${LIB_SIMDE_DIR} ${LIB_SIMDE_DIR}/simde) @@ -16,9 +18,6 @@ FetchContent_Declare( GIT_TAG v0.8.2 ) -FetchContent_GetProperties(simde) -if (NOT simde_POPULATED) - message("-- Fetching simde ..") - FetchContent_MakeAvailable(simde) - include_directories("${simde_SOURCE_DIR}") -endif() +message("-- Fetching simde ..") +FetchContent_MakeAvailable(simde) +include_directories("${simde_SOURCE_DIR}") From 2f651f188effc9c60d3abf775b71aa33ecc01b6f Mon Sep 17 00:00:00 2001 From: James Marsh Date: Mon, 4 Nov 2024 15:29:21 +0000 Subject: [PATCH 14/15] Define WASM_ENABLE_SIMDE=0 for platforms with build issues --- core/iwasm/interpreter/wasm_interp_fast.c | 4 ++-- core/iwasm/interpreter/wasm_opcode.h | 5 +++-- core/iwasm/libraries/simde/simde.cmake | 4 ++++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index b5620d8902..1bbc11fc44 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -21,7 +21,7 @@ #include "../common/wasm_shared_memory.h" #endif -#if WASM_ENABLE_SIMDE != 0 +#if defined(WASM_ENABLE_SIMDE) && WASM_ENABLE_SIMDE != 0 #include "simde/wasm/simd128.h" #endif @@ -5650,7 +5650,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #endif goto call_func_from_entry; } -#if WASM_ENABLE_SIMDE != 0 +#if defined(WASM_ENABLE_SIMDE) && WASM_ENABLE_SIMDE != 0 #define SIMD_V128_TO_SIMDE_V128(v) \ ({ \ bh_assert(sizeof(V128) == sizeof(simde_v128_t)); \ diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index a72098db2b..c863a3978d 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -782,8 +782,9 @@ typedef enum WASMAtomicEXTOpcode { #define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode) -#if (WASM_ENABLE_JIT != 0 \ - || (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0)) \ +#if (WASM_ENABLE_JIT != 0 \ + || (WASM_ENABLE_FAST_INTERP != 0 && defined(WASM_ENABLE_SIMDE) \ + && WASM_ENABLE_SIMDE != 0)) \ && WASM_ENABLE_SIMD != 0 #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() \ SET_GOTO_TABLE_ELEM(WASM_OP_SIMD_PREFIX), diff --git a/core/iwasm/libraries/simde/simde.cmake b/core/iwasm/libraries/simde/simde.cmake index 9c779ece5d..e1c6cbc30c 100644 --- a/core/iwasm/libraries/simde/simde.cmake +++ b/core/iwasm/libraries/simde/simde.cmake @@ -7,6 +7,10 @@ set (LIB_SIMDE_DIR ${CMAKE_CURRENT_LIST_DIR}) if (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR "ARM.*") add_definitions (-DWASM_ENABLE_SIMDE=1) endif() +else () + add_definitions (-DWASM_ENABLE_SIMDE=0) + message (" SIMDE disabled on current platform") +endif () include_directories(${LIB_SIMDE_DIR} ${LIB_SIMDE_DIR}/simde) From b20c300bc0210f5ffb971fe52ad4614a559ccea6 Mon Sep 17 00:00:00 2001 From: James Marsh Date: Tue, 5 Nov 2024 13:33:57 +0000 Subject: [PATCH 15/15] Define WASM_ENABLE_SIMDE in config.h --- core/config.h | 6 ++++++ core/iwasm/interpreter/wasm_interp_fast.c | 4 ++-- core/iwasm/interpreter/wasm_opcode.h | 6 ++---- core/iwasm/libraries/simde/simde.cmake | 4 ---- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/core/config.h b/core/config.h index 50f7989224..16eec8487b 100644 --- a/core/config.h +++ b/core/config.h @@ -318,6 +318,12 @@ #define WASM_ENABLE_SIMD 0 #endif +/* Disable SIMDe (used in the fast interpreter for SIMD opcodes) +unless used elsewhere */ +#ifndef WASM_ENABLE_SIMDE +#define WASM_ENABLE_SIMDE 0 +#endif + /* GC performance profiling */ #ifndef WASM_ENABLE_GC_PERF_PROFILING #define WASM_ENABLE_GC_PERF_PROFILING 0 diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 1bbc11fc44..b5620d8902 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -21,7 +21,7 @@ #include "../common/wasm_shared_memory.h" #endif -#if defined(WASM_ENABLE_SIMDE) && WASM_ENABLE_SIMDE != 0 +#if WASM_ENABLE_SIMDE != 0 #include "simde/wasm/simd128.h" #endif @@ -5650,7 +5650,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #endif goto call_func_from_entry; } -#if defined(WASM_ENABLE_SIMDE) && WASM_ENABLE_SIMDE != 0 +#if WASM_ENABLE_SIMDE != 0 #define SIMD_V128_TO_SIMDE_V128(v) \ ({ \ bh_assert(sizeof(V128) == sizeof(simde_v128_t)); \ diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index c863a3978d..75d30c9b31 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -779,12 +779,10 @@ typedef enum WASMAtomicEXTOpcode { #else #define DEF_DEBUG_BREAK_HANDLE() #endif - #define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode) -#if (WASM_ENABLE_JIT != 0 \ - || (WASM_ENABLE_FAST_INTERP != 0 && defined(WASM_ENABLE_SIMDE) \ - && WASM_ENABLE_SIMDE != 0)) \ +#if (WASM_ENABLE_JIT != 0 \ + || (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0)) \ && WASM_ENABLE_SIMD != 0 #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() \ SET_GOTO_TABLE_ELEM(WASM_OP_SIMD_PREFIX), diff --git a/core/iwasm/libraries/simde/simde.cmake b/core/iwasm/libraries/simde/simde.cmake index e1c6cbc30c..b36e356945 100644 --- a/core/iwasm/libraries/simde/simde.cmake +++ b/core/iwasm/libraries/simde/simde.cmake @@ -6,10 +6,6 @@ set (LIB_SIMDE_DIR ${CMAKE_CURRENT_LIST_DIR}) if (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR "ARM.*") add_definitions (-DWASM_ENABLE_SIMDE=1) -endif() -else () - add_definitions (-DWASM_ENABLE_SIMDE=0) - message (" SIMDE disabled on current platform") endif () include_directories(${LIB_SIMDE_DIR} ${LIB_SIMDE_DIR}/simde)