Skip to content

Commit

Permalink
pulley: Implement some float simd ops (bytecodealliance#9869)
Browse files Browse the repository at this point in the history
* pulley: Implement some float simd ops

Gets a few more wast tests passing

* Enable some cranelift runtests
  • Loading branch information
alexcrichton authored Dec 20, 2024
1 parent 68976ba commit 99c5eb8
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 4 deletions.
6 changes: 6 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1131,11 +1131,15 @@

(rule (lower (has_type $F32 (fmax a b))) (pulley_fmaximum32 a b))
(rule (lower (has_type $F64 (fmax a b))) (pulley_fmaximum64 a b))
(rule (lower (has_type $F32X4 (fmax a b))) (pulley_vmaximumf32x4 a b))
(rule (lower (has_type $F64X2 (fmax a b))) (pulley_vmaximumf64x2 a b))

;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32 (fmin a b))) (pulley_fminimum32 a b))
(rule (lower (has_type $F64 (fmin a b))) (pulley_fminimum64 a b))
(rule (lower (has_type $F32X4 (fmin a b))) (pulley_vminimumf32x4 a b))
(rule (lower (has_type $F64X2 (fmin a b))) (pulley_vminimumf64x2 a b))

;;;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -1183,6 +1187,8 @@

(rule (lower (has_type $F32 (fabs a))) (pulley_fabs32 a))
(rule (lower (has_type $F64 (fabs a))) (pulley_fabs64 a))
(rule (lower (has_type $F32X4 (fabs a))) (pulley_vabsf32x4 a))
(rule (lower (has_type $F64X2 (fabs a))) (pulley_vabsf64x2 a))

;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fabs.clif
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ target x86_64
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %fabs_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-fmax-fmin.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ target x86_64 skylake
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
Expand Down
4 changes: 0 additions & 4 deletions crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,20 +410,16 @@ impl WastTest {
"spec_testsuite/proposals/relaxed-simd/i8x16_relaxed_swizzle.wast",
"spec_testsuite/proposals/relaxed-simd/relaxed_dot_product.wast",
"spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast",
"spec_testsuite/proposals/relaxed-simd/relaxed_min_max.wast",
"spec_testsuite/proposals/memory64/simd_lane.wast",
"spec_testsuite/proposals/memory64/relaxed_min_max.wast",
"spec_testsuite/proposals/memory64/relaxed_madd_nmadd.wast",
"spec_testsuite/proposals/memory64/relaxed_dot_product.wast",
"spec_testsuite/proposals/memory64/i16x8_relaxed_q15mulr_s.wast",
"spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast",
"spec_testsuite/proposals/memory64/i8x16_relaxed_swizzle.wast",
"spec_testsuite/simd_f32x4.wast",
"spec_testsuite/simd_f32x4_arith.wast",
"spec_testsuite/simd_f32x4_cmp.wast",
"spec_testsuite/simd_f32x4_pmin_pmax.wast",
"spec_testsuite/simd_f32x4_rounding.wast",
"spec_testsuite/simd_f64x2.wast",
"spec_testsuite/simd_f64x2_arith.wast",
"spec_testsuite/simd_f64x2_cmp.wast",
"spec_testsuite/simd_f64x2_pmin_pmax.wast",
Expand Down
52 changes: 52 additions & 0 deletions pulley/src/interp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4079,4 +4079,56 @@ impl ExtendedOpVisitor for Interpreter<'_> {
self.state[dst].set_i64x2(a.map(|i| i.wrapping_neg()));
ControlFlow::Continue(())
}

fn vabsf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_f32x4();
self.state[dst].set_f32x4(a.map(|i| i.wasm_abs()));
ControlFlow::Continue(())
}

fn vabsf64x2(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_f64x2();
self.state[dst].set_f64x2(a.map(|i| i.wasm_abs()));
ControlFlow::Continue(())
}

fn vmaximumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_f32x4();
let b = self.state[operands.src2].get_f32x4();
for (a, b) in a.iter_mut().zip(&b) {
*a = a.wasm_maximum(*b);
}
self.state[operands.dst].set_f32x4(a);
ControlFlow::Continue(())
}

fn vmaximumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_f64x2();
let b = self.state[operands.src2].get_f64x2();
for (a, b) in a.iter_mut().zip(&b) {
*a = a.wasm_maximum(*b);
}
self.state[operands.dst].set_f64x2(a);
ControlFlow::Continue(())
}

fn vminimumf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_f32x4();
let b = self.state[operands.src2].get_f32x4();
for (a, b) in a.iter_mut().zip(&b) {
*a = a.wasm_minimum(*b);
}
self.state[operands.dst].set_f32x4(a);
ControlFlow::Continue(())
}

fn vminimumf64x2(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
let mut a = self.state[operands.src1].get_f64x2();
let b = self.state[operands.src2].get_f64x2();
for (a, b) in a.iter_mut().zip(&b) {
*a = a.wasm_minimum(*b);
}
self.state[operands.dst].set_f64x2(a);
ControlFlow::Continue(())
}
}
13 changes: 13 additions & 0 deletions pulley/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1116,6 +1116,19 @@ macro_rules! for_each_extended_op {
vneg32x4 = Vneg32x4 { dst: VReg, src: VReg };
/// `dst = -src`
vneg64x2 = Vneg64x2 { dst: VReg, src: VReg };

/// `dst = |src|`
vabsf32x4 = Vabsf32x4 { dst: VReg, src: VReg };
/// `dst = |src|`
vabsf64x2 = Vabsf64x2 { dst: VReg, src: VReg };
/// `dst = ieee_maximum(src1, src2)`
vmaximumf32x4 = Vmaximumf32x4 { operands: BinaryOperands<VReg> };
/// `dst = ieee_maximum(src1, src2)`
vmaximumf64x2 = Vmaximumf64x2 { operands: BinaryOperands<VReg> };
/// `dst = ieee_minimum(src1, src2)`
vminimumf32x4 = Vminimumf32x4 { operands: BinaryOperands<VReg> };
/// `dst = ieee_minimum(src1, src2)`
vminimumf64x2 = Vminimumf64x2 { operands: BinaryOperands<VReg> };
}
};
}
Expand Down

0 comments on commit 99c5eb8

Please sign in to comment.