forked from bytecodealliance/wasmtime
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pulley: Add some macro-instructions related to bounds-checks
This commit starts down the path of optimizing wasm loads/stores in Pulley with macro-instructions. It's expected that these instructions are so common that it's worth putting them in the 1-byte namespace of opcodes. Locally this gets a 10% speedup on the sightglass bz2 benchmark.
- Loading branch information
1 parent
5030709
commit 862f09c
Showing
5 changed files
with
292 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
;;! target = "pulley32" | ||
;;! test = "compile" | ||
|
||
(module | ||
(memory 1) | ||
|
||
(func $load8 (param i32) (result i32) | ||
(i32.load8_u (local.get 0))) | ||
|
||
(func $load16 (param i32) (result i32) | ||
(i32.load16_u (local.get 0))) | ||
|
||
(func $load32 (param i32) (result i32) | ||
(i32.load (local.get 0))) | ||
|
||
(func $load64 (param i32) (result i64) | ||
(i64.load (local.get 0))) | ||
|
||
(func $load8_offset (param i32) (result i32) | ||
(i32.load8_u offset=32 (local.get 0))) | ||
|
||
(func $load16_offset (param i32) (result i32) | ||
(i32.load16_u offset=32 (local.get 0))) | ||
|
||
(func $load32_offset (param i32) (result i32) | ||
(i32.load offset=32 (local.get 0))) | ||
|
||
(func $load64_offset (param i32) (result i64) | ||
(i64.load offset=32 (local.get 0))) | ||
) | ||
;; wasm[0]::function[0]::load8: | ||
;; push_frame | ||
;; xload32le_offset8 x6, x0, 52 | ||
;; xbc32_bound32_trap x2, x6, 1 | ||
;; xload32le_offset8 x7, x0, 48 | ||
;; xadd32 x7, x7, x2 | ||
;; xload8_u32_offset8 x0, x7, 0 | ||
;; pop_frame | ||
;; ret | ||
;; | ||
;; wasm[0]::function[1]::load16: | ||
;; push_frame | ||
;; xload32le_offset8 x6, x0, 52 | ||
;; xbc32_bound32_trap x2, x6, 2 | ||
;; xload32le_offset8 x7, x0, 48 | ||
;; xadd32 x7, x7, x2 | ||
;; xload16le_u32_offset8 x0, x7, 0 | ||
;; pop_frame | ||
;; ret | ||
;; | ||
;; wasm[0]::function[2]::load32: | ||
;; push_frame | ||
;; xload32le_offset8 x6, x0, 52 | ||
;; xbc32_bound32_trap x2, x6, 4 | ||
;; xload32le_offset8 x7, x0, 48 | ||
;; xadd32 x7, x7, x2 | ||
;; xload32le_offset8 x0, x7, 0 | ||
;; pop_frame | ||
;; ret | ||
;; | ||
;; wasm[0]::function[3]::load64: | ||
;; push_frame | ||
;; xload32le_offset8 x6, x0, 52 | ||
;; xbc32_bound32_trap x2, x6, 8 | ||
;; xload32le_offset8 x7, x0, 48 | ||
;; xadd32 x7, x7, x2 | ||
;; xload64le_offset8 x0, x7, 0 | ||
;; pop_frame | ||
;; ret | ||
;; | ||
;; wasm[0]::function[4]::load8_offset: | ||
;; push_frame | ||
;; xload32le_offset8 x6, x0, 52 | ||
;; xbc32_bound32_trap x2, x6, 33 | ||
;; xload32le_offset8 x7, x0, 48 | ||
;; xadd32 x7, x7, x2 | ||
;; xload8_u32_offset8 x0, x7, 32 | ||
;; pop_frame | ||
;; ret | ||
;; | ||
;; wasm[0]::function[5]::load16_offset: | ||
;; push_frame | ||
;; xload32le_offset8 x6, x0, 52 | ||
;; xbc32_bound32_trap x2, x6, 34 | ||
;; xload32le_offset8 x7, x0, 48 | ||
;; xadd32 x7, x7, x2 | ||
;; xload16le_u32_offset8 x0, x7, 32 | ||
;; pop_frame | ||
;; ret | ||
;; | ||
;; wasm[0]::function[6]::load32_offset: | ||
;; push_frame | ||
;; xload32le_offset8 x6, x0, 52 | ||
;; xbc32_bound32_trap x2, x6, 36 | ||
;; xload32le_offset8 x7, x0, 48 | ||
;; xadd32 x7, x7, x2 | ||
;; xload32le_offset8 x0, x7, 32 | ||
;; pop_frame | ||
;; ret | ||
;; | ||
;; wasm[0]::function[7]::load64_offset: | ||
;; push_frame | ||
;; xload32le_offset8 x6, x0, 52 | ||
;; xbc32_bound32_trap x2, x6, 40 | ||
;; xload32le_offset8 x7, x0, 48 | ||
;; xadd32 x7, x7, x2 | ||
;; xload64le_offset8 x0, x7, 32 | ||
;; pop_frame | ||
;; ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
;;! target = "pulley64" | ||
;;! test = "compile" | ||
|
||
(module | ||
(memory 1) | ||
|
||
(func $load8 (param i32) (result i32) | ||
(i32.load8_u (local.get 0))) | ||
|
||
(func $load16 (param i32) (result i32) | ||
(i32.load16_u (local.get 0))) | ||
|
||
(func $load32 (param i32) (result i32) | ||
(i32.load (local.get 0))) | ||
|
||
(func $load64 (param i32) (result i64) | ||
(i64.load (local.get 0))) | ||
|
||
(func $load8_offset (param i32) (result i32) | ||
(i32.load8_u offset=32 (local.get 0))) | ||
|
||
(func $load16_offset (param i32) (result i32) | ||
(i32.load16_u offset=32 (local.get 0))) | ||
|
||
(func $load32_offset (param i32) (result i32) | ||
(i32.load offset=32 (local.get 0))) | ||
|
||
(func $load64_offset (param i32) (result i64) | ||
(i64.load offset=32 (local.get 0))) | ||
) | ||
;; wasm[0]::function[0]::load8: | ||
;; push_frame | ||
;; xload64le_offset8 x8, x0, 104 | ||
;; zext32 x7, x2 | ||
;; xbc32_bound64_trap x2, x8, 1 | ||
;; xload64le_offset8 x8, x0, 96 | ||
;; xadd64 x8, x8, x7 | ||
;; xload8_u32_offset8 x0, x8, 0 | ||
;; pop_frame | ||
;; ret | ||
;; | ||
;; wasm[0]::function[1]::load16: | ||
;; push_frame | ||
;; xload64le_offset8 x8, x0, 104 | ||
;; zext32 x7, x2 | ||
;; xbc32_bound64_trap x2, x8, 2 | ||
;; xload64le_offset8 x8, x0, 96 | ||
;; xadd64 x8, x8, x7 | ||
;; xload16le_u32_offset8 x0, x8, 0 | ||
;; pop_frame | ||
;; ret | ||
;; | ||
;; wasm[0]::function[2]::load32: | ||
;; push_frame | ||
;; xload64le_offset8 x8, x0, 104 | ||
;; zext32 x7, x2 | ||
;; xbc32_bound64_trap x2, x8, 4 | ||
;; xload64le_offset8 x8, x0, 96 | ||
;; xadd64 x8, x8, x7 | ||
;; xload32le_offset8 x0, x8, 0 | ||
;; pop_frame | ||
;; ret | ||
;; | ||
;; wasm[0]::function[3]::load64: | ||
;; push_frame | ||
;; xload64le_offset8 x8, x0, 104 | ||
;; zext32 x7, x2 | ||
;; xbc32_bound64_trap x2, x8, 8 | ||
;; xload64le_offset8 x8, x0, 96 | ||
;; xadd64 x8, x8, x7 | ||
;; xload64le_offset8 x0, x8, 0 | ||
;; pop_frame | ||
;; ret | ||
;; | ||
;; wasm[0]::function[4]::load8_offset: | ||
;; push_frame | ||
;; xload64le_offset8 x8, x0, 104 | ||
;; zext32 x7, x2 | ||
;; xbc32_bound64_trap x2, x8, 33 | ||
;; xload64le_offset8 x8, x0, 96 | ||
;; xadd64 x8, x8, x7 | ||
;; xload8_u32_offset8 x0, x8, 32 | ||
;; pop_frame | ||
;; ret | ||
;; | ||
;; wasm[0]::function[5]::load16_offset: | ||
;; push_frame | ||
;; xload64le_offset8 x8, x0, 104 | ||
;; zext32 x7, x2 | ||
;; xbc32_bound64_trap x2, x8, 34 | ||
;; xload64le_offset8 x8, x0, 96 | ||
;; xadd64 x8, x8, x7 | ||
;; xload16le_u32_offset8 x0, x8, 32 | ||
;; pop_frame | ||
;; ret | ||
;; | ||
;; wasm[0]::function[6]::load32_offset: | ||
;; push_frame | ||
;; xload64le_offset8 x8, x0, 104 | ||
;; zext32 x7, x2 | ||
;; xbc32_bound64_trap x2, x8, 36 | ||
;; xload64le_offset8 x8, x0, 96 | ||
;; xadd64 x8, x8, x7 | ||
;; xload32le_offset8 x0, x8, 32 | ||
;; pop_frame | ||
;; ret | ||
;; | ||
;; wasm[0]::function[7]::load64_offset: | ||
;; push_frame | ||
;; xload64le_offset8 x8, x0, 104 | ||
;; zext32 x7, x2 | ||
;; xbc32_bound64_trap x2, x8, 40 | ||
;; xload64le_offset8 x8, x0, 96 | ||
;; xadd64 x8, x8, x7 | ||
;; xload64le_offset8 x0, x8, 32 | ||
;; pop_frame | ||
;; ret |