Skip to content

Commit

Permalink
pulley: Add immediate payloads to more opcodes (bytecodealliance#9861)
Browse files Browse the repository at this point in the history
* pulley: Add immediate payloads to more opcodes

This commit adds immediate payloads to the following instructions:

* `xmul32` - `xmul32_s8` / `xmul32_s32`
* `xmul64` - `xmul64_s8` / `xmul64_s32`
* `xband32` - `xband32_s8` / `xband32_s32`
* `xband64` - `xband64_s8` / `xband64_s32`
* `xbor32` - `xbor32_s8` / `xbor32_s32`
* `xbor64` - `xbor64_s8` / `xbor64_s32`
* `xbxor32` - `xbxor32_s8` / `xbxor32_s32`
* `xbxor64` - `xbxor64_s8` / `xbxor64_s32`
* `xshl32` - `xshl32_u6`
* `xshl64` - `xshl64_u6`
* `xshr32_u` - `xshl32_u_u6`
* `xshr64_u` - `xshl64_u_u6`
* `xshr32_s` - `xshl32_s_u6`
* `xshr64_s` - `xshl64_s_u6`

For shifts there's no need to have 32-bit immediates (or even 8-bit)
since 6 bits is enough to encode all the immediates. This means that the
6-bit immediate is packed within `BinaryOperands` as a new `U6` type.

This commit unfortunately does not shrink `spidermonkey.cwasm`
significantly beyond the prior 29M. This is nevertheless expected to be
relatively important for performance.

* Fix test expectations
  • Loading branch information
alexcrichton authored Dec 19, 2024
1 parent 7a05ab0 commit 1e4c470
Show file tree
Hide file tree
Showing 19 changed files with 1,289 additions and 21 deletions.
15 changes: 10 additions & 5 deletions cranelift/codegen/meta/src/pulley.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,14 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> {
pat.push_str(",");
format_string.push_str(&format!(" // trap={{{name}:?}}"));
}
Operand::Binop { .. } => {
Operand::Binop { src2, .. } => {
pat.push_str("dst, src1, src2,");
format_string.push_str(" {dst}, {src1}, {src2}");
locals.push_str(&format!("let dst = reg_name(*dst.to_reg());\n"));
locals.push_str(&format!("let src1 = reg_name(**src1);\n"));
locals.push_str(&format!("let src2 = reg_name(**src2);\n"));
if src2.contains("Reg") {
locals.push_str(&format!("let src2 = reg_name(**src2);\n"));
}
}
}
}
Expand Down Expand Up @@ -189,11 +191,14 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> {
}
}
Operand::TrapCode { .. } => {}
Operand::Binop { .. } => {
pat.push_str("dst, src1, src2,");
Operand::Binop { src2, .. } => {
pat.push_str("dst, src1,");
uses.push("src1");
uses.push("src2");
defs.push("dst");
if src2.contains("Reg") {
pat.push_str("src2,");
uses.push("src2");
}
}
}
}
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/isa/pulley_shared/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
(rule (raw_inst_to_inst inst) (MInst.Raw inst))
(convert RawInst MInst raw_inst_to_inst)

(type U6 (primitive U6))
(type BoxCallInfo (primitive BoxCallInfo))
(type BoxCallIndInfo (primitive BoxCallIndInfo))
(type BoxReturnCallInfo (primitive BoxReturnCallInfo))
Expand Down
93 changes: 80 additions & 13 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,15 @@
(rule (lower (has_type $I32 (imul a b))) (pulley_xmul32 a b))
(rule (lower (has_type $I64 (imul a b))) (pulley_xmul64 a b))

(rule 1 (lower (has_type (ty_int (fits_in_32 _)) (imul a (i32_from_iconst b))))
(pulley_xmul32_s32 a b))
(rule 2 (lower (has_type $I64 (imul a (i32_from_iconst b))))
(pulley_xmul64_s32 a b))
(rule 3 (lower (has_type (ty_int (fits_in_32 _)) (imul a (i8_from_iconst b))))
(pulley_xmul32_s8 a b))
(rule 4 (lower (has_type $I64 (imul a (i8_from_iconst b))))
(pulley_xmul64_s8 a b))

(rule (lower (has_type $I8X16 (imul a b))) (pulley_vmuli8x16 a b))
(rule (lower (has_type $I16X8 (imul a b))) (pulley_vmuli16x8 a b))
(rule (lower (has_type $I32X4 (imul a b))) (pulley_vmuli32x4 a b))
Expand Down Expand Up @@ -294,11 +303,31 @@
(rule (lower (has_type $I64 (ishl a b)))
(pulley_xshl64 a b))

;; Special-case constant shift amounts.
(rule 1 (lower (has_type $I32 (ishl a b)))
(if-let n (u6_shift_from_iconst b))
(pulley_xshl32_u6 a n))
(rule 1 (lower (has_type $I64 (ishl a b)))
(if-let n (u6_shift_from_iconst b))
(pulley_xshl64_u6 a n))

;; vector shifts

(rule (lower (has_type $I8X16 (ishl a b))) (pulley_vshli8x16 a b))
(rule (lower (has_type $I16X8 (ishl a b))) (pulley_vshli16x8 a b))
(rule (lower (has_type $I32X4 (ishl a b))) (pulley_vshli32x4 a b))
(rule (lower (has_type $I64X2 (ishl a b))) (pulley_vshli64x2 a b))

;; Helper to extract a constant from `Value`, mask it to 6 bits, and then make a
;; `U6`.
(decl pure partial u6_shift_from_iconst (Value) U6)
(rule (u6_shift_from_iconst (u64_from_iconst val))
(if-let (u6_from_u8 x) (u64_as_u8 (u64_and val 0x3f)))
x)

(decl u6_from_u8 (U6) u8)
(extern extractor u6_from_u8 u6_from_u8)

;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I8 (ushr a b)))
Expand All @@ -313,6 +342,16 @@
(rule (lower (has_type $I64 (ushr a b)))
(pulley_xshr64_u a b))

;; Special-case constant shift amounts.
(rule 1 (lower (has_type $I32 (ushr a b)))
(if-let n (u6_shift_from_iconst b))
(pulley_xshr32_u_u6 a n))
(rule 1 (lower (has_type $I64 (ushr a b)))
(if-let n (u6_shift_from_iconst b))
(pulley_xshr64_u_u6 a n))

;; vector shifts

(rule (lower (has_type $I8X16 (ushr a b))) (pulley_vshri8x16_u a b))
(rule (lower (has_type $I16X8 (ushr a b))) (pulley_vshri16x8_u a b))
(rule (lower (has_type $I32X4 (ushr a b))) (pulley_vshri32x4_u a b))
Expand All @@ -332,40 +371,68 @@
(rule (lower (has_type $I64 (sshr a b)))
(pulley_xshr64_s a b))

;; Special-case constant shift amounts.
(rule 1 (lower (has_type $I32 (sshr a b)))
(if-let n (u6_shift_from_iconst b))
(pulley_xshr32_s_u6 a n))
(rule 1 (lower (has_type $I64 (sshr a b)))
(if-let n (u6_shift_from_iconst b))
(pulley_xshr64_s_u6 a n))

;; vector shifts

(rule (lower (has_type $I8X16 (sshr a b))) (pulley_vshri8x16_s a b))
(rule (lower (has_type $I16X8 (sshr a b))) (pulley_vshri16x8_s a b))
(rule (lower (has_type $I32X4 (sshr a b))) (pulley_vshri32x4_s a b))
(rule (lower (has_type $I64X2 (sshr a b))) (pulley_vshri64x2_s a b))

;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 0 (lower (has_type (fits_in_32 _) (band a b)))
(pulley_xband32 a b))
(rule 0 (lower (has_type (fits_in_32 _) (band a b))) (pulley_xband32 a b))
(rule 1 (lower (has_type $I64 (band a b))) (pulley_xband64 a b))

(rule 1 (lower (has_type $I64 (band a b)))
(pulley_xband64 a b))
(rule 3 (lower (has_type (ty_int (fits_in_32 _)) (band a (i32_from_iconst b))))
(pulley_xband32_s32 a b))
(rule 4 (lower (has_type $I64 (band a (i32_from_iconst b))))
(pulley_xband64_s32 a b))
(rule 5 (lower (has_type (ty_int (fits_in_32 _)) (band a (i8_from_iconst b))))
(pulley_xband32_s8 a b))
(rule 6 (lower (has_type $I64 (band a (i8_from_iconst b))))
(pulley_xband64_s8 a b))

(rule 2 (lower (has_type (ty_vec128 _) (band a b)))
(pulley_vband128 a b))

;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 0 (lower (has_type (fits_in_32 _) (bor a b)))
(pulley_xbor32 a b))
(rule 0 (lower (has_type (fits_in_32 _) (bor a b))) (pulley_xbor32 a b))
(rule 1 (lower (has_type $I64 (bor a b))) (pulley_xbor64 a b))

(rule 1 (lower (has_type $I64 (bor a b)))
(pulley_xbor64 a b))
(rule 3 (lower (has_type (ty_int (fits_in_32 _)) (bor a (i32_from_iconst b))))
(pulley_xbor32_s32 a b))
(rule 4 (lower (has_type $I64 (bor a (i32_from_iconst b))))
(pulley_xbor64_s32 a b))
(rule 5 (lower (has_type (ty_int (fits_in_32 _)) (bor a (i8_from_iconst b))))
(pulley_xbor32_s8 a b))
(rule 6 (lower (has_type $I64 (bor a (i8_from_iconst b))))
(pulley_xbor64_s8 a b))

(rule 2 (lower (has_type (ty_vec128 _) (bor a b)))
(pulley_vbor128 a b))

;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 0 (lower (has_type (fits_in_32 _) (bxor a b)))
(pulley_xbxor32 a b))

(rule 1 (lower (has_type $I64 (bxor a b)))
(pulley_xbxor64 a b))
(rule 0 (lower (has_type (fits_in_32 _) (bxor a b))) (pulley_xbxor32 a b))
(rule 1 (lower (has_type $I64 (bxor a b))) (pulley_xbxor64 a b))

(rule 3 (lower (has_type (ty_int (fits_in_32 _)) (bxor a (i32_from_iconst b))))
(pulley_xbxor32_s32 a b))
(rule 4 (lower (has_type $I64 (bxor a (i32_from_iconst b))))
(pulley_xbxor64_s32 a b))
(rule 5 (lower (has_type (ty_int (fits_in_32 _)) (bxor a (i8_from_iconst b))))
(pulley_xbxor32_s8 a b))
(rule 6 (lower (has_type $I64 (bxor a (i8_from_iconst b))))
(pulley_xbxor64_s8 a b))

(rule 2 (lower (has_type (ty_vec128 _) (bxor a b)))
(pulley_vbxor128 a b))
Expand Down
5 changes: 5 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use crate::machinst::{
CallInfo, IsTailCall, MachInst, Reg, VCodeConstant, VCodeConstantData,
};
use alloc::boxed::Box;
use pulley_interpreter::U6;
use regalloc2::PReg;
type Unit = ();
type VecArgPair = Vec<ArgPair>;
Expand Down Expand Up @@ -120,6 +121,10 @@ where
fn cond_invert(&mut self, cond: &Cond) -> Cond {
cond.invert()
}

fn u6_from_u8(&mut self, imm: u8) -> Option<U6> {
U6::new(imm)
}
}

/// The main entry point for lowering with ISLE.
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isle_prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,10 @@ macro_rules! isle_common_prelude_methods {
val.try_into().ok()
}

fn i32_as_i8(&mut self, val: i32) -> Option<i8> {
val.try_into().ok()
}

fn u8_as_i8(&mut self, val: u8) -> i8 {
val as i8
}
Expand Down
3 changes: 3 additions & 0 deletions cranelift/codegen/src/prelude.isle
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@
(decl u32_as_u16 (u16) u32)
(extern extractor u32_as_u16 u32_as_u16)

(decl i32_as_i8 (i8) i32)
(extern extractor i32_as_i8 i32_as_i8)

(decl pure u64_as_i32 (u64) i32)
(extern constructor u64_as_i32 u64_as_i32)

Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/prelude_lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,10 @@
(extractor (u64_from_iconst x)
(def_inst (iconst (u64_from_imm64 x))))

(decl i8_from_iconst (i8) Value)
(extractor (i8_from_iconst x)
(i32_from_iconst (i32_as_i8 x)))

;; Extract a constant `i32` from a value defined by an `iconst`.
;; The value is sign extended to 32 bits.
(spec (i32_from_iconst arg)
Expand Down
Loading

0 comments on commit 1e4c470

Please sign in to comment.