From e51d610925b639c68692f6eb02ceffb7e8aef887 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 19 Dec 2024 10:29:36 -0800 Subject: [PATCH] pulley: Implement some float simd ops Gets a few more wast tests passing --- .../codegen/src/isa/pulley_shared/lower.isle | 6 +++ crates/wast-util/src/lib.rs | 4 -- pulley/src/interp.rs | 52 +++++++++++++++++++ pulley/src/lib.rs | 13 +++++ 4 files changed, 71 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 15133e563edc..c6047902cac4 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -1121,11 +1121,15 @@ (rule (lower (has_type $F32 (fmax a b))) (pulley_fmaximum32 a b)) (rule (lower (has_type $F64 (fmax a b))) (pulley_fmaximum64 a b)) +(rule (lower (has_type $F32X4 (fmax a b))) (pulley_vmaximumf32x4 a b)) +(rule (lower (has_type $F64X2 (fmax a b))) (pulley_vmaximumf64x2 a b)) ;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F32 (fmin a b))) (pulley_fminimum32 a b)) (rule (lower (has_type $F64 (fmin a b))) (pulley_fminimum64 a b)) +(rule (lower (has_type $F32X4 (fmin a b))) (pulley_vminimumf32x4 a b)) +(rule (lower (has_type $F64X2 (fmin a b))) (pulley_vminimumf64x2 a b)) ;;;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1173,6 +1177,8 @@ (rule (lower (has_type $F32 (fabs a))) (pulley_fabs32 a)) (rule (lower (has_type $F64 (fabs a))) (pulley_fabs64 a)) +(rule (lower (has_type $F32X4 (fabs a))) (pulley_vabsf32x4 a)) +(rule (lower (has_type $F64X2 (fabs a))) (pulley_vabsf64x2 a)) ;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index 5ab270879069..621ec9e5098b 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -410,20 +410,16 @@ impl WastTest { "spec_testsuite/proposals/relaxed-simd/i8x16_relaxed_swizzle.wast", "spec_testsuite/proposals/relaxed-simd/relaxed_dot_product.wast", "spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast", - "spec_testsuite/proposals/relaxed-simd/relaxed_min_max.wast", "spec_testsuite/proposals/memory64/simd_lane.wast", - "spec_testsuite/proposals/memory64/relaxed_min_max.wast", "spec_testsuite/proposals/memory64/relaxed_madd_nmadd.wast", "spec_testsuite/proposals/memory64/relaxed_dot_product.wast", "spec_testsuite/proposals/memory64/i16x8_relaxed_q15mulr_s.wast", "spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast", "spec_testsuite/proposals/memory64/i8x16_relaxed_swizzle.wast", - "spec_testsuite/simd_f32x4.wast", "spec_testsuite/simd_f32x4_arith.wast", "spec_testsuite/simd_f32x4_cmp.wast", "spec_testsuite/simd_f32x4_pmin_pmax.wast", "spec_testsuite/simd_f32x4_rounding.wast", - "spec_testsuite/simd_f64x2.wast", "spec_testsuite/simd_f64x2_arith.wast", "spec_testsuite/simd_f64x2_cmp.wast", "spec_testsuite/simd_f64x2_pmin_pmax.wast", diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index cd1dbd1b4e15..4bcd46a766fe 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -4079,4 +4079,56 @@ impl ExtendedOpVisitor for Interpreter<'_> { self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg())); ControlFlow::Continue(()) } + + fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let a = self.state[src].get_f32x4(); + self.state[dst].set_f32x4(a.map(|i| i.wasm_abs())); + ControlFlow::Continue(()) + } + + fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let a = self.state[src].get_f64x2(); + self.state[dst].set_f64x2(a.map(|i| i.wasm_abs())); + ControlFlow::Continue(()) + } + + fn vmaximumf32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let mut a = self.state[operands.src1].get_f32x4(); + let b = self.state[operands.src2].get_f32x4(); + for (a, b) in a.iter_mut().zip(&b) { + *a = a.wasm_maximum(*b); + } + self.state[operands.dst].set_f32x4(a); + ControlFlow::Continue(()) + } + + fn vmaximumf64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let mut a = self.state[operands.src1].get_f64x2(); + let b = self.state[operands.src2].get_f64x2(); + for (a, b) in a.iter_mut().zip(&b) { + *a = a.wasm_maximum(*b); + } + self.state[operands.dst].set_f64x2(a); + ControlFlow::Continue(()) + } + + fn vminimumf32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let mut a = self.state[operands.src1].get_f32x4(); + let b = self.state[operands.src2].get_f32x4(); + for (a, b) in a.iter_mut().zip(&b) { + *a = a.wasm_minimum(*b); + } + self.state[operands.dst].set_f32x4(a); + ControlFlow::Continue(()) + } + + fn vminimumf64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let mut a = self.state[operands.src1].get_f64x2(); + let b = self.state[operands.src2].get_f64x2(); + for (a, b) in a.iter_mut().zip(&b) { + *a = a.wasm_minimum(*b); + } + self.state[operands.dst].set_f64x2(a); + ControlFlow::Continue(()) + } } diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index db951407b614..15ee315095c1 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -1116,6 +1116,19 @@ macro_rules! for_each_extended_op { vneg32x4 = Vneg32x4 { dst: VReg, src: VReg }; /// `dst = -src` vneg64x2 = Vneg64x2 { dst: VReg, src: VReg }; + + /// `dst = |src|` + vabsf32x4 = Vabsf32x4 { dst: VReg, src: VReg }; + /// `dst = |src|` + vabsf64x2 = Vabsf64x2 { dst: VReg, src: VReg }; + /// `dst = ieee_maximum(src1, src2)` + vmaximumf32x4 = Vmaximumf32x4 { operands: BinaryOperands }; + /// `dst = ieee_maximum(src1, src2)` + vmaximumf64x2 = Vmaximumf64x2 { operands: BinaryOperands }; + /// `dst = ieee_minimum(src1, src2)` + vminimumf32x4 = Vminimumf32x4 { operands: BinaryOperands }; + /// `dst = ieee_minimum(src1, src2)` + vminimumf64x2 = Vminimumf64x2 { operands: BinaryOperands }; } }; }