From 862f09c08fa16eb8aeb3c71a1febaf6cee30338c Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Sun, 15 Dec 2024 11:09:05 -0800 Subject: [PATCH] pulley: Add some macro-instructions related to bounds-checks This commit starts down the path of optimizing wasm loads/stores in Pulley with macro-instructions. It's expected that these instructions are so common that it's worth putting them in the 1-byte namespace of opcodes. Locally this gets a 10% speedup on the sightglass bz2 benchmark. --- .../codegen/src/isa/pulley_shared/lower.isle | 40 ++++++ pulley/src/interp.rs | 20 +++ pulley/src/lib.rs | 6 + tests/disas/pulley/pulley32_memory32.wat | 109 ++++++++++++++++ tests/disas/pulley/pulley64_memory32.wat | 117 ++++++++++++++++++ 5 files changed, 292 insertions(+) create mode 100644 tests/disas/pulley/pulley32_memory32.wat create mode 100644 tests/disas/pulley/pulley64_memory32.wat diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index fe19e20bc41c..ee32194e4994 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -119,6 +119,46 @@ (rule (lower (trapnz cond code)) (side_effect (pulley_trap_if (lower_cond cond) code))) +;; Special-cases for bounds-checks-related traps emitted for wasm loads/stores. +;; Each of these translates to a single "xbc" (x-register bounds check) +;; instruction +(rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan) + a + b @ (value_type $I32)) + code)) + (side_effect (pulley_xbc32_bound32_trap a b 0 code))) + +;; a >= b == a > b - 1, so handle that here too. +(rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThanOrEqual) + a + b @ (value_type $I32)) + code)) + (side_effect (pulley_xbc32_bound32_trap a b 1 code))) + +(rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan) + (uextend a @ (value_type $I32)) + b @ (value_type $I64)) + code)) + (side_effect (pulley_xbc32_bound64_trap a b 0 code))) + +(rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThanOrEqual) + (uextend a @ (value_type $I32)) + b @ (value_type $I64)) + code)) + (side_effect (pulley_xbc32_bound64_trap a b 1 code))) + +(rule 2 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan) + a + (isub b @ (value_type $I32) (u8_from_iconst c))) + code)) + (side_effect (pulley_xbc32_bound32_trap a b c code))) + +(rule 2 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan) + (uextend a @ (value_type $I32)) + (isub b @ (value_type $I64) (u8_from_iconst c))) + code)) + (side_effect (pulley_xbc32_bound64_trap a b c code))) + ;;;; Rules for `get_stack_pointer` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (get_stack_pointer)) diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 9202acb6a8c4..378f799ffff1 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -2371,6 +2371,26 @@ impl OpVisitor for Interpreter<'_> { self.state[dst].set_i64(a.wrapping_abs()); ControlFlow::Continue(()) } + + fn xbc32_bound64_trap(&mut self, addr: XReg, bound: XReg, off: u8) -> ControlFlow { + let bound = self.state[bound].get_u64(); + let addr = u64::from(self.state[addr].get_u32()); + if addr > bound.wrapping_sub(u64::from(off)) { + self.done_trap::() + } else { + ControlFlow::Continue(()) + } + } + + fn xbc32_bound32_trap(&mut self, addr: XReg, bound: XReg, off: u8) -> ControlFlow { + let bound = self.state[bound].get_u32(); + let addr = self.state[addr].get_u32(); + if addr > bound.wrapping_sub(u32::from(off)) { + self.done_trap::() + } else { + ControlFlow::Continue(()) + } + } } impl ExtendedOpVisitor for Interpreter<'_> { diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index 777e54d7f4a3..ddacfd4e8b4f 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -580,6 +580,12 @@ macro_rules! for_each_op { xselect32 = XSelect32 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg }; /// `dst = low32(cond) ? if_nonzero : if_zero` xselect64 = XSelect64 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg }; + + /// `trapif(zext(low32(addr)) > bound - off)` (unsigned) + xbc32_bound64_trap = XBc32Bound64Trap { addr: XReg, bound: XReg, off: u8 }; + + /// `trapif(zext(low32(addr)) > low32(bound) - off)` (unsigned) + xbc32_bound32_trap = XBc32Bound32Trap { addr: XReg, bound: XReg, off: u8 }; } }; } diff --git a/tests/disas/pulley/pulley32_memory32.wat b/tests/disas/pulley/pulley32_memory32.wat new file mode 100644 index 000000000000..6fccb99acc75 --- /dev/null +++ b/tests/disas/pulley/pulley32_memory32.wat @@ -0,0 +1,109 @@ +;;! target = "pulley32" +;;! test = "compile" + +(module + (memory 1) + + (func $load8 (param i32) (result i32) + (i32.load8_u (local.get 0))) + + (func $load16 (param i32) (result i32) + (i32.load16_u (local.get 0))) + + (func $load32 (param i32) (result i32) + (i32.load (local.get 0))) + + (func $load64 (param i32) (result i64) + (i64.load (local.get 0))) + + (func $load8_offset (param i32) (result i32) + (i32.load8_u offset=32 (local.get 0))) + + (func $load16_offset (param i32) (result i32) + (i32.load16_u offset=32 (local.get 0))) + + (func $load32_offset (param i32) (result i32) + (i32.load offset=32 (local.get 0))) + + (func $load64_offset (param i32) (result i64) + (i64.load offset=32 (local.get 0))) +) +;; wasm[0]::function[0]::load8: +;; push_frame +;; xload32le_offset8 x6, x0, 52 +;; xbc32_bound32_trap x2, x6, 1 +;; xload32le_offset8 x7, x0, 48 +;; xadd32 x7, x7, x2 +;; xload8_u32_offset8 x0, x7, 0 +;; pop_frame +;; ret +;; +;; wasm[0]::function[1]::load16: +;; push_frame +;; xload32le_offset8 x6, x0, 52 +;; xbc32_bound32_trap x2, x6, 2 +;; xload32le_offset8 x7, x0, 48 +;; xadd32 x7, x7, x2 +;; xload16le_u32_offset8 x0, x7, 0 +;; pop_frame +;; ret +;; +;; wasm[0]::function[2]::load32: +;; push_frame +;; xload32le_offset8 x6, x0, 52 +;; xbc32_bound32_trap x2, x6, 4 +;; xload32le_offset8 x7, x0, 48 +;; xadd32 x7, x7, x2 +;; xload32le_offset8 x0, x7, 0 +;; pop_frame +;; ret +;; +;; wasm[0]::function[3]::load64: +;; push_frame +;; xload32le_offset8 x6, x0, 52 +;; xbc32_bound32_trap x2, x6, 8 +;; xload32le_offset8 x7, x0, 48 +;; xadd32 x7, x7, x2 +;; xload64le_offset8 x0, x7, 0 +;; pop_frame +;; ret +;; +;; wasm[0]::function[4]::load8_offset: +;; push_frame +;; xload32le_offset8 x6, x0, 52 +;; xbc32_bound32_trap x2, x6, 33 +;; xload32le_offset8 x7, x0, 48 +;; xadd32 x7, x7, x2 +;; xload8_u32_offset8 x0, x7, 32 +;; pop_frame +;; ret +;; +;; wasm[0]::function[5]::load16_offset: +;; push_frame +;; xload32le_offset8 x6, x0, 52 +;; xbc32_bound32_trap x2, x6, 34 +;; xload32le_offset8 x7, x0, 48 +;; xadd32 x7, x7, x2 +;; xload16le_u32_offset8 x0, x7, 32 +;; pop_frame +;; ret +;; +;; wasm[0]::function[6]::load32_offset: +;; push_frame +;; xload32le_offset8 x6, x0, 52 +;; xbc32_bound32_trap x2, x6, 36 +;; xload32le_offset8 x7, x0, 48 +;; xadd32 x7, x7, x2 +;; xload32le_offset8 x0, x7, 32 +;; pop_frame +;; ret +;; +;; wasm[0]::function[7]::load64_offset: +;; push_frame +;; xload32le_offset8 x6, x0, 52 +;; xbc32_bound32_trap x2, x6, 40 +;; xload32le_offset8 x7, x0, 48 +;; xadd32 x7, x7, x2 +;; xload64le_offset8 x0, x7, 32 +;; pop_frame +;; ret diff --git a/tests/disas/pulley/pulley64_memory32.wat b/tests/disas/pulley/pulley64_memory32.wat new file mode 100644 index 000000000000..e32f59d3731e --- /dev/null +++ b/tests/disas/pulley/pulley64_memory32.wat @@ -0,0 +1,117 @@ +;;! target = "pulley64" +;;! test = "compile" + +(module + (memory 1) + + (func $load8 (param i32) (result i32) + (i32.load8_u (local.get 0))) + + (func $load16 (param i32) (result i32) + (i32.load16_u (local.get 0))) + + (func $load32 (param i32) (result i32) + (i32.load (local.get 0))) + + (func $load64 (param i32) (result i64) + (i64.load (local.get 0))) + + (func $load8_offset (param i32) (result i32) + (i32.load8_u offset=32 (local.get 0))) + + (func $load16_offset (param i32) (result i32) + (i32.load16_u offset=32 (local.get 0))) + + (func $load32_offset (param i32) (result i32) + (i32.load offset=32 (local.get 0))) + + (func $load64_offset (param i32) (result i64) + (i64.load offset=32 (local.get 0))) +) +;; wasm[0]::function[0]::load8: +;; push_frame +;; xload64le_offset8 x8, x0, 104 +;; zext32 x7, x2 +;; xbc32_bound64_trap x2, x8, 1 +;; xload64le_offset8 x8, x0, 96 +;; xadd64 x8, x8, x7 +;; xload8_u32_offset8 x0, x8, 0 +;; pop_frame +;; ret +;; +;; wasm[0]::function[1]::load16: +;; push_frame +;; xload64le_offset8 x8, x0, 104 +;; zext32 x7, x2 +;; xbc32_bound64_trap x2, x8, 2 +;; xload64le_offset8 x8, x0, 96 +;; xadd64 x8, x8, x7 +;; xload16le_u32_offset8 x0, x8, 0 +;; pop_frame +;; ret +;; +;; wasm[0]::function[2]::load32: +;; push_frame +;; xload64le_offset8 x8, x0, 104 +;; zext32 x7, x2 +;; xbc32_bound64_trap x2, x8, 4 +;; xload64le_offset8 x8, x0, 96 +;; xadd64 x8, x8, x7 +;; xload32le_offset8 x0, x8, 0 +;; pop_frame +;; ret +;; +;; wasm[0]::function[3]::load64: +;; push_frame +;; xload64le_offset8 x8, x0, 104 +;; zext32 x7, x2 +;; xbc32_bound64_trap x2, x8, 8 +;; xload64le_offset8 x8, x0, 96 +;; xadd64 x8, x8, x7 +;; xload64le_offset8 x0, x8, 0 +;; pop_frame +;; ret +;; +;; wasm[0]::function[4]::load8_offset: +;; push_frame +;; xload64le_offset8 x8, x0, 104 +;; zext32 x7, x2 +;; xbc32_bound64_trap x2, x8, 33 +;; xload64le_offset8 x8, x0, 96 +;; xadd64 x8, x8, x7 +;; xload8_u32_offset8 x0, x8, 32 +;; pop_frame +;; ret +;; +;; wasm[0]::function[5]::load16_offset: +;; push_frame +;; xload64le_offset8 x8, x0, 104 +;; zext32 x7, x2 +;; xbc32_bound64_trap x2, x8, 34 +;; xload64le_offset8 x8, x0, 96 +;; xadd64 x8, x8, x7 +;; xload16le_u32_offset8 x0, x8, 32 +;; pop_frame +;; ret +;; +;; wasm[0]::function[6]::load32_offset: +;; push_frame +;; xload64le_offset8 x8, x0, 104 +;; zext32 x7, x2 +;; xbc32_bound64_trap x2, x8, 36 +;; xload64le_offset8 x8, x0, 96 +;; xadd64 x8, x8, x7 +;; xload32le_offset8 x0, x8, 32 +;; pop_frame +;; ret +;; +;; wasm[0]::function[7]::load64_offset: +;; push_frame +;; xload64le_offset8 x8, x0, 104 +;; zext32 x7, x2 +;; xbc32_bound64_trap x2, x8, 40 +;; xload64le_offset8 x8, x0, 96 +;; xadd64 x8, x8, x7 +;; xload64le_offset8 x0, x8, 32 +;; pop_frame +;; ret