diff --git a/cranelift/codegen/meta/src/pulley.rs b/cranelift/codegen/meta/src/pulley.rs index 6fb8316b9270..4477422bee60 100644 --- a/cranelift/codegen/meta/src/pulley.rs +++ b/cranelift/codegen/meta/src/pulley.rs @@ -68,7 +68,7 @@ impl Inst<'_> { let src2 = parts.next().unwrap_or(dst); Operand::Binop { dst, src1, src2 } } - ("dst", ty) => Operand::Writable { name, ty }, + (name, ty) if name.starts_with("dst") => Operand::Writable { name, ty }, (name, "RegSet < XReg >") => Operand::Normal { name, ty: "XRegSet", @@ -137,7 +137,7 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> { format_string.push_str(name); format_string.push_str("}"); if ty.contains("Reg") { - if name == "dst" { + if matches!(op, Operand::Writable { .. }) { locals.push_str(&format!("let {name} = reg_name(*{name}.to_reg());\n")); } else { locals.push_str(&format!("let {name} = reg_name(**{name});\n")); @@ -342,7 +342,7 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> { let mut rule = String::new(); isle.push_str(&format!("(decl pulley_{snake_name} (")); rule.push_str(&format!("(rule (pulley_{snake_name} ")); - let mut result = None; + let mut results = Vec::new(); let mut ops = Vec::new(); for op in inst.operands() { match op { @@ -352,16 +352,14 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> { ops.push(name); } Operand::Writable { name: _, ty } => { - assert!(result.is_none(), "{} has >1 result", inst.snake_name); - result = Some(ty); + results.push(ty); } Operand::Binop { dst, src1, src2 } => { isle.push_str(&format!("{src1} {src2}")); rule.push_str("src1 src2"); ops.push("src1"); ops.push("src2"); - assert!(result.is_none(), "{} has >1 result", inst.snake_name); - result = Some(dst); + results.push(dst); } } isle.push_str(" "); @@ -370,8 +368,8 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> { isle.push_str(") "); rule.push_str(")"); let ops = ops.join(" "); - match result { - Some(result) => { + match &results[..] { + [result] => { isle.push_str(result); rule.push_str(&format!( " @@ -384,12 +382,28 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> { result.to_lowercase() )); } - None => { + [a, b] => { + isle.push_str("ValueRegs"); + rule.push_str(&format!( + " + (let ( + (dst1 Writable{a} (temp_writable_{})) + (dst2 Writable{b} (temp_writable_{})) + (_ Unit (emit (RawInst.{name} dst1 dst2 {ops}))) + ) + (value_regs dst1 dst2)))\ +\n", + a.to_lowercase(), + b.to_lowercase(), + )); + } + [] => { isle.push_str("SideEffectNoResult"); rule.push_str(&format!( " (SideEffectNoResult.Inst (RawInst.{name} {ops})))\n", )); } + other => panic!("cannot codegen results {other:?}"), } isle.push_str(")\n"); diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index fe19e20bc41c..86a21e86ef82 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -215,6 +215,17 @@ (if-let neg_u32 (u32_try_from_u64 neg_u64)) neg_u32) +;; 128-bit addition +(rule 1 (lower (has_type $I128 (iadd a b))) + (let ((a ValueRegs a) + (b ValueRegs b)) + (pulley_xadd128 + (value_regs_get a 0) + (value_regs_get a 1) + (value_regs_get b 0) + (value_regs_get b 1)))) + +;; vector addition (rule 1 (lower (has_type $I8X16 (iadd a b))) (pulley_vaddi8x16 a b)) (rule 1 (lower (has_type $I16X8 (iadd a b))) (pulley_vaddi16x8 a b)) (rule 1 (lower (has_type $I32X4 (iadd a b))) (pulley_vaddi32x4 a b)) @@ -260,6 +271,17 @@ (if-let c (u8_from_negated_iconst b)) (pulley_xadd64_u8 a c)) +;; 128-bit subtraction +(rule 1 (lower (has_type $I128 (isub a b))) + (let ((a ValueRegs a) + (b ValueRegs b)) + (pulley_xsub128 + (value_regs_get a 0) + (value_regs_get a 1) + (value_regs_get b 0) + (value_regs_get b 1)))) + +;; vector subtraction (rule 1 (lower (has_type $I8X16 (isub a b))) (pulley_vsubi8x16 a b)) (rule 1 (lower (has_type $I16X8 (isub a b))) (pulley_vsubi16x8 a b)) (rule 1 (lower (has_type $I32X4 (isub a b))) (pulley_vsubi32x4 a b)) @@ -286,6 +308,13 @@ (rule 4 (lower (has_type $I64 (imul a (i8_from_iconst b)))) (pulley_xmul64_s8 a b)) +;; 128-bit (or wide) multiplication +(rule (lower (has_type $I128 (imul (uextend a) (uextend b)))) + (pulley_xwidemul64_u (zext64 a) (zext64 b))) +(rule (lower (has_type $I128 (imul (sextend a) (sextend b)))) + (pulley_xwidemul64_s (sext64 a) (sext64 b))) + +;; vector multiplication (rule (lower (has_type $I8X16 (imul a b))) (pulley_vmuli8x16 a b)) (rule (lower (has_type $I16X8 (imul a b))) (pulley_vmuli16x8 a b)) (rule (lower (has_type $I32X4 (imul a b))) (pulley_vmuli32x4 a b)) diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index d023a006114b..2dcc8e1537c6 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -304,13 +304,6 @@ impl Compiler { if config.threads() { return true; } - // Unsupported proposals. Note that other proposals have partial - // support at this time (pulley is a work-in-progress) and so - // individual tests are listed below as "should fail" even if - // they're not covered in this list. - if config.wide_arithmetic() { - return true; - } } } diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 9202acb6a8c4..cb64ef094730 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -996,6 +996,17 @@ impl Interpreter<'_> { } ControlFlow::Continue(()) } + + fn get_i128(&self, lo: XReg, hi: XReg) -> i128 { + let lo = self.state[lo].get_u64(); + let hi = self.state[hi].get_i64(); + i128::from(lo) | (i128::from(hi) << 64) + } + + fn set_i128(&mut self, lo: XReg, hi: XReg, val: i128) { + self.state[lo].set_u64(val as u64); + self.state[hi].set_u64((val >> 64) as u64); + } } #[test] @@ -4791,4 +4802,64 @@ impl ExtendedOpVisitor for Interpreter<'_> { self.state[dst].set_f64x2(a); ControlFlow::Continue(()) } + + fn xadd128( + &mut self, + dst_lo: XReg, + dst_hi: XReg, + lhs_lo: XReg, + lhs_hi: XReg, + rhs_lo: XReg, + rhs_hi: XReg, + ) -> ControlFlow { + let lhs = self.get_i128(lhs_lo, lhs_hi); + let rhs = self.get_i128(rhs_lo, rhs_hi); + let result = lhs.wrapping_add(rhs); + self.set_i128(dst_lo, dst_hi, result); + ControlFlow::Continue(()) + } + + fn xsub128( + &mut self, + dst_lo: XReg, + dst_hi: XReg, + lhs_lo: XReg, + lhs_hi: XReg, + rhs_lo: XReg, + rhs_hi: XReg, + ) -> ControlFlow { + let lhs = self.get_i128(lhs_lo, lhs_hi); + let rhs = self.get_i128(rhs_lo, rhs_hi); + let result = lhs.wrapping_sub(rhs); + self.set_i128(dst_lo, dst_hi, result); + ControlFlow::Continue(()) + } + + fn xwidemul64_s( + &mut self, + dst_lo: XReg, + dst_hi: XReg, + lhs: XReg, + rhs: XReg, + ) -> ControlFlow { + let lhs = self.state[lhs].get_i64(); + let rhs = self.state[rhs].get_i64(); + let result = i128::from(lhs).wrapping_mul(i128::from(rhs)); + self.set_i128(dst_lo, dst_hi, result); + ControlFlow::Continue(()) + } + + fn xwidemul64_u( + &mut self, + dst_lo: XReg, + dst_hi: XReg, + lhs: XReg, + rhs: XReg, + ) -> ControlFlow { + let lhs = self.state[lhs].get_u64(); + let rhs = self.state[rhs].get_u64(); + let result = u128::from(lhs).wrapping_mul(u128::from(rhs)); + self.set_i128(dst_lo, dst_hi, result as i128); + ControlFlow::Continue(()) + } } diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index 777e54d7f4a3..6e6be2017bbe 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -1278,6 +1278,39 @@ macro_rules! for_each_extended_op { vfma32x4 = Vfma32x4 { dst: VReg, a: VReg, b: VReg, c: VReg }; /// `dst = ieee_fma(a, b, c)` vfma64x2 = Vfma64x2 { dst: VReg, a: VReg, b: VReg, c: VReg }; + + /// `dst_lo:dst_hi = lhs_lo:lhs_hi + rhs_lo:rhs_hi` + xadd128 = Xadd128 { + dst_lo: XReg, + dst_hi: XReg, + lhs_lo: XReg, + lhs_hi: XReg, + rhs_lo: XReg, + rhs_hi: XReg + }; + /// `dst_lo:dst_hi = lhs_lo:lhs_hi - rhs_lo:rhs_hi` + xsub128 = Xsub128 { + dst_lo: XReg, + dst_hi: XReg, + lhs_lo: XReg, + lhs_hi: XReg, + rhs_lo: XReg, + rhs_hi: XReg + }; + /// `dst_lo:dst_hi = sext(lhs) * sext(rhs)` + xwidemul64_s = Xwidemul64S { + dst_lo: XReg, + dst_hi: XReg, + lhs: XReg, + rhs: XReg + }; + /// `dst_lo:dst_hi = zext(lhs) * zext(rhs)` + xwidemul64_u = Xwidemul64U { + dst_lo: XReg, + dst_hi: XReg, + lhs: XReg, + rhs: XReg + }; } }; }