Skip to content

Commit

Permalink
pulley: Add some macro-instructions related to bounds-checks
Browse files Browse the repository at this point in the history
This commit starts down the path of optimizing wasm loads/stores in
Pulley with macro-instructions. It's expected that these instructions
are so common that it's worth putting them in the 1-byte namespace of
opcodes.

Locally this gets a 10% speedup on the sightglass bz2 benchmark.
  • Loading branch information
alexcrichton committed Jan 7, 2025
1 parent 5030709 commit 862f09c
Show file tree
Hide file tree
Showing 5 changed files with 292 additions and 0 deletions.
40 changes: 40 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,46 @@
(rule (lower (trapnz cond code))
(side_effect (pulley_trap_if (lower_cond cond) code)))

;; Special-cases for bounds-checks-related traps emitted for wasm loads/stores.
;; Each of these translates to a single "xbc" (x-register bounds check)
;; instruction
(rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan)
a
b @ (value_type $I32))
code))
(side_effect (pulley_xbc32_bound32_trap a b 0 code)))

;; a >= b == a > b - 1, so handle that here too.
(rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThanOrEqual)
a
b @ (value_type $I32))
code))
(side_effect (pulley_xbc32_bound32_trap a b 1 code)))

(rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan)
(uextend a @ (value_type $I32))
b @ (value_type $I64))
code))
(side_effect (pulley_xbc32_bound64_trap a b 0 code)))

(rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThanOrEqual)
(uextend a @ (value_type $I32))
b @ (value_type $I64))
code))
(side_effect (pulley_xbc32_bound64_trap a b 1 code)))

(rule 2 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan)
a
(isub b @ (value_type $I32) (u8_from_iconst c)))
code))
(side_effect (pulley_xbc32_bound32_trap a b c code)))

(rule 2 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan)
(uextend a @ (value_type $I32))
(isub b @ (value_type $I64) (u8_from_iconst c)))
code))
(side_effect (pulley_xbc32_bound64_trap a b c code)))

;;;; Rules for `get_stack_pointer` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (get_stack_pointer))
Expand Down
20 changes: 20 additions & 0 deletions pulley/src/interp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2371,6 +2371,26 @@ impl OpVisitor for Interpreter<'_> {
self.state[dst].set_i64(a.wrapping_abs());
ControlFlow::Continue(())
}

fn xbc32_bound64_trap(&mut self, addr: XReg, bound: XReg, off: u8) -> ControlFlow<Done> {
let bound = self.state[bound].get_u64();
let addr = u64::from(self.state[addr].get_u32());
if addr > bound.wrapping_sub(u64::from(off)) {
self.done_trap::<crate::XBc32Bound64Trap>()
} else {
ControlFlow::Continue(())
}
}

fn xbc32_bound32_trap(&mut self, addr: XReg, bound: XReg, off: u8) -> ControlFlow<Done> {
let bound = self.state[bound].get_u32();
let addr = self.state[addr].get_u32();
if addr > bound.wrapping_sub(u32::from(off)) {
self.done_trap::<crate::XBc32Bound32Trap>()
} else {
ControlFlow::Continue(())
}
}
}

impl ExtendedOpVisitor for Interpreter<'_> {
Expand Down
6 changes: 6 additions & 0 deletions pulley/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,12 @@ macro_rules! for_each_op {
xselect32 = XSelect32 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg };
/// `dst = low32(cond) ? if_nonzero : if_zero`
xselect64 = XSelect64 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg };

/// `trapif(zext(low32(addr)) > bound - off)` (unsigned)
xbc32_bound64_trap = XBc32Bound64Trap { addr: XReg, bound: XReg, off: u8 };

/// `trapif(zext(low32(addr)) > low32(bound) - off)` (unsigned)
xbc32_bound32_trap = XBc32Bound32Trap { addr: XReg, bound: XReg, off: u8 };
}
};
}
Expand Down
109 changes: 109 additions & 0 deletions tests/disas/pulley/pulley32_memory32.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
;;! target = "pulley32"
;;! test = "compile"

(module
(memory 1)

(func $load8 (param i32) (result i32)
(i32.load8_u (local.get 0)))

(func $load16 (param i32) (result i32)
(i32.load16_u (local.get 0)))

(func $load32 (param i32) (result i32)
(i32.load (local.get 0)))

(func $load64 (param i32) (result i64)
(i64.load (local.get 0)))

(func $load8_offset (param i32) (result i32)
(i32.load8_u offset=32 (local.get 0)))

(func $load16_offset (param i32) (result i32)
(i32.load16_u offset=32 (local.get 0)))

(func $load32_offset (param i32) (result i32)
(i32.load offset=32 (local.get 0)))

(func $load64_offset (param i32) (result i64)
(i64.load offset=32 (local.get 0)))
)
;; wasm[0]::function[0]::load8:
;; push_frame
;; xload32le_offset8 x6, x0, 52
;; xbc32_bound32_trap x2, x6, 1
;; xload32le_offset8 x7, x0, 48
;; xadd32 x7, x7, x2
;; xload8_u32_offset8 x0, x7, 0
;; pop_frame
;; ret
;;
;; wasm[0]::function[1]::load16:
;; push_frame
;; xload32le_offset8 x6, x0, 52
;; xbc32_bound32_trap x2, x6, 2
;; xload32le_offset8 x7, x0, 48
;; xadd32 x7, x7, x2
;; xload16le_u32_offset8 x0, x7, 0
;; pop_frame
;; ret
;;
;; wasm[0]::function[2]::load32:
;; push_frame
;; xload32le_offset8 x6, x0, 52
;; xbc32_bound32_trap x2, x6, 4
;; xload32le_offset8 x7, x0, 48
;; xadd32 x7, x7, x2
;; xload32le_offset8 x0, x7, 0
;; pop_frame
;; ret
;;
;; wasm[0]::function[3]::load64:
;; push_frame
;; xload32le_offset8 x6, x0, 52
;; xbc32_bound32_trap x2, x6, 8
;; xload32le_offset8 x7, x0, 48
;; xadd32 x7, x7, x2
;; xload64le_offset8 x0, x7, 0
;; pop_frame
;; ret
;;
;; wasm[0]::function[4]::load8_offset:
;; push_frame
;; xload32le_offset8 x6, x0, 52
;; xbc32_bound32_trap x2, x6, 33
;; xload32le_offset8 x7, x0, 48
;; xadd32 x7, x7, x2
;; xload8_u32_offset8 x0, x7, 32
;; pop_frame
;; ret
;;
;; wasm[0]::function[5]::load16_offset:
;; push_frame
;; xload32le_offset8 x6, x0, 52
;; xbc32_bound32_trap x2, x6, 34
;; xload32le_offset8 x7, x0, 48
;; xadd32 x7, x7, x2
;; xload16le_u32_offset8 x0, x7, 32
;; pop_frame
;; ret
;;
;; wasm[0]::function[6]::load32_offset:
;; push_frame
;; xload32le_offset8 x6, x0, 52
;; xbc32_bound32_trap x2, x6, 36
;; xload32le_offset8 x7, x0, 48
;; xadd32 x7, x7, x2
;; xload32le_offset8 x0, x7, 32
;; pop_frame
;; ret
;;
;; wasm[0]::function[7]::load64_offset:
;; push_frame
;; xload32le_offset8 x6, x0, 52
;; xbc32_bound32_trap x2, x6, 40
;; xload32le_offset8 x7, x0, 48
;; xadd32 x7, x7, x2
;; xload64le_offset8 x0, x7, 32
;; pop_frame
;; ret
117 changes: 117 additions & 0 deletions tests/disas/pulley/pulley64_memory32.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
;;! target = "pulley64"
;;! test = "compile"

(module
(memory 1)

(func $load8 (param i32) (result i32)
(i32.load8_u (local.get 0)))

(func $load16 (param i32) (result i32)
(i32.load16_u (local.get 0)))

(func $load32 (param i32) (result i32)
(i32.load (local.get 0)))

(func $load64 (param i32) (result i64)
(i64.load (local.get 0)))

(func $load8_offset (param i32) (result i32)
(i32.load8_u offset=32 (local.get 0)))

(func $load16_offset (param i32) (result i32)
(i32.load16_u offset=32 (local.get 0)))

(func $load32_offset (param i32) (result i32)
(i32.load offset=32 (local.get 0)))

(func $load64_offset (param i32) (result i64)
(i64.load offset=32 (local.get 0)))
)
;; wasm[0]::function[0]::load8:
;; push_frame
;; xload64le_offset8 x8, x0, 104
;; zext32 x7, x2
;; xbc32_bound64_trap x2, x8, 1
;; xload64le_offset8 x8, x0, 96
;; xadd64 x8, x8, x7
;; xload8_u32_offset8 x0, x8, 0
;; pop_frame
;; ret
;;
;; wasm[0]::function[1]::load16:
;; push_frame
;; xload64le_offset8 x8, x0, 104
;; zext32 x7, x2
;; xbc32_bound64_trap x2, x8, 2
;; xload64le_offset8 x8, x0, 96
;; xadd64 x8, x8, x7
;; xload16le_u32_offset8 x0, x8, 0
;; pop_frame
;; ret
;;
;; wasm[0]::function[2]::load32:
;; push_frame
;; xload64le_offset8 x8, x0, 104
;; zext32 x7, x2
;; xbc32_bound64_trap x2, x8, 4
;; xload64le_offset8 x8, x0, 96
;; xadd64 x8, x8, x7
;; xload32le_offset8 x0, x8, 0
;; pop_frame
;; ret
;;
;; wasm[0]::function[3]::load64:
;; push_frame
;; xload64le_offset8 x8, x0, 104
;; zext32 x7, x2
;; xbc32_bound64_trap x2, x8, 8
;; xload64le_offset8 x8, x0, 96
;; xadd64 x8, x8, x7
;; xload64le_offset8 x0, x8, 0
;; pop_frame
;; ret
;;
;; wasm[0]::function[4]::load8_offset:
;; push_frame
;; xload64le_offset8 x8, x0, 104
;; zext32 x7, x2
;; xbc32_bound64_trap x2, x8, 33
;; xload64le_offset8 x8, x0, 96
;; xadd64 x8, x8, x7
;; xload8_u32_offset8 x0, x8, 32
;; pop_frame
;; ret
;;
;; wasm[0]::function[5]::load16_offset:
;; push_frame
;; xload64le_offset8 x8, x0, 104
;; zext32 x7, x2
;; xbc32_bound64_trap x2, x8, 34
;; xload64le_offset8 x8, x0, 96
;; xadd64 x8, x8, x7
;; xload16le_u32_offset8 x0, x8, 32
;; pop_frame
;; ret
;;
;; wasm[0]::function[6]::load32_offset:
;; push_frame
;; xload64le_offset8 x8, x0, 104
;; zext32 x7, x2
;; xbc32_bound64_trap x2, x8, 36
;; xload64le_offset8 x8, x0, 96
;; xadd64 x8, x8, x7
;; xload32le_offset8 x0, x8, 32
;; pop_frame
;; ret
;;
;; wasm[0]::function[7]::load64_offset:
;; push_frame
;; xload64le_offset8 x8, x0, 104
;; zext32 x7, x2
;; xbc32_bound64_trap x2, x8, 40
;; xload64le_offset8 x8, x0, 96
;; xadd64 x8, x8, x7
;; xload64le_offset8 x0, x8, 32
;; pop_frame
;; ret

0 comments on commit 862f09c

Please sign in to comment.