Skip to content

Commit

Permalink
pulley: Get simd_boolean.wast test passing
Browse files Browse the repository at this point in the history
Fill out some bitmask/test instructions for vectors.
  • Loading branch information
alexcrichton committed Dec 17, 2024
1 parent de17231 commit 3fa6fa2
Show file tree
Hide file tree
Showing 7 changed files with 169 additions and 3 deletions.
29 changes: 29 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -843,3 +843,32 @@
(rule (lower (has_type $I64X2 (splat a))) (pulley_vsplatx64 a))
(rule (lower (has_type $F32X4 (splat a))) (pulley_vsplatf32 a))
(rule (lower (has_type $F64X2 (splat a))) (pulley_vsplatf64 a))

;;;; Rules for `vhigh_bits` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type (fits_in_32 _) (vhigh_bits a @ (value_type $I8X16))))
(pulley_vbitmask8x16 a))
(rule (lower (has_type (fits_in_32 _) (vhigh_bits a @ (value_type $I16X8))))
(pulley_vbitmask16x8 a))
(rule (lower (has_type (fits_in_32 _) (vhigh_bits a @ (value_type $I32X4))))
(pulley_vbitmask32x4 a))
(rule (lower (has_type (fits_in_32 _) (vhigh_bits a @ (value_type $I64X2))))
(pulley_vbitmask64x2 a))

;;;; Rules for `vall_true`; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (vall_true a @ (value_type $I8X16))) (pulley_valltrue8x16 a))
(rule (lower (vall_true a @ (value_type $I16X8))) (pulley_valltrue16x8 a))
(rule (lower (vall_true a @ (value_type $I32X4))) (pulley_valltrue32x4 a))
(rule (lower (vall_true a @ (value_type $I64X2))) (pulley_valltrue64x2 a))
(rule (lower (vall_true a @ (value_type $F32X4))) (pulley_valltrue32x4 a))
(rule (lower (vall_true a @ (value_type $F64X2))) (pulley_valltrue64x2 a))

;;;; Rules for `vany_true`; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (vany_true a @ (value_type $I8X16))) (pulley_vanytrue8x16 a))
(rule (lower (vany_true a @ (value_type $I16X8))) (pulley_vanytrue16x8 a))
(rule (lower (vany_true a @ (value_type $I32X4))) (pulley_vanytrue32x4 a))
(rule (lower (vany_true a @ (value_type $I64X2))) (pulley_vanytrue64x2 a))
(rule (lower (vany_true a @ (value_type $F32X4))) (pulley_vanytrue32x4 a))
(rule (lower (vany_true a @ (value_type $F64X2))) (pulley_vanytrue64x2 a))
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-valltrue.clif
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %vall_true_i8x16(i8x16) -> i8 {
block0(v0: i8x16):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-vanytrue.clif
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %vany_true_i8x16(i8x16) -> i8 {
block0(v0: i8x16):
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-vhighbits.clif
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %vhighbits_i8x16(i8x16) -> i16 {
block0(v0: i8x16):
Expand Down
3 changes: 0 additions & 3 deletions crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,10 +405,8 @@ impl WastTest {
"misc_testsuite/simd/almost-extmul.wast",
"misc_testsuite/simd/canonicalize-nan.wast",
"misc_testsuite/simd/cvt-from-uint.wast",
"misc_testsuite/simd/issue4807.wast",
"misc_testsuite/simd/issue6725-no-egraph-panic.wast",
"misc_testsuite/simd/issue_3327_bnot_lowering.wast",
"misc_testsuite/simd/load_splat_out_of_bounds.wast",
"misc_testsuite/simd/replace-lane-preserve.wast",
"misc_testsuite/simd/spillslot-size-fuzzbug.wast",
"misc_testsuite/simd/v128-select.wast",
Expand All @@ -430,7 +428,6 @@ impl WastTest {
"spec_testsuite/proposals/memory64/i16x8_relaxed_q15mulr_s.wast",
"spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast",
"spec_testsuite/proposals/memory64/i8x16_relaxed_swizzle.wast",
"spec_testsuite/simd_boolean.wast",
"spec_testsuite/simd_conversions.wast",
"spec_testsuite/simd_f32x4.wast",
"spec_testsuite/simd_f32x4_arith.wast",
Expand Down
100 changes: 100 additions & 0 deletions pulley/src/interp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2825,6 +2825,106 @@ impl OpVisitor for Interpreter<'_> {
self.state[dst].set_u128((c & x) | (!c & y));
ControlFlow::Continue(())
}

fn vbitmask8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_u8x16();
let mut result = 0;
for item in a.iter().rev() {
result <<= 1;
result |= (*item >> 7) as u32;
}
self.state[dst].set_u32(result);
ControlFlow::Continue(())
}

fn vbitmask16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_u16x8();
let mut result = 0;
for item in a.iter().rev() {
result <<= 1;
result |= (*item >> 15) as u32;
}
self.state[dst].set_u32(result);
ControlFlow::Continue(())
}

fn vbitmask32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_u32x4();
let mut result = 0;
for item in a.iter().rev() {
result <<= 1;
result |= *item >> 31;
}
self.state[dst].set_u32(result);
ControlFlow::Continue(())
}

fn vbitmask64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_u64x2();
let mut result = 0;
for item in a.iter().rev() {
result <<= 1;
result |= (*item >> 63) as u32;
}
self.state[dst].set_u32(result);
ControlFlow::Continue(())
}

fn valltrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_u8x16();
let result = a.iter().all(|a| *a != 0);
self.state[dst].set_u32(u32::from(result));
ControlFlow::Continue(())
}

fn valltrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_u16x8();
let result = a.iter().all(|a| *a != 0);
self.state[dst].set_u32(u32::from(result));
ControlFlow::Continue(())
}

fn valltrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_u32x4();
let result = a.iter().all(|a| *a != 0);
self.state[dst].set_u32(u32::from(result));
ControlFlow::Continue(())
}

fn valltrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_u64x2();
let result = a.iter().all(|a| *a != 0);
self.state[dst].set_u32(u32::from(result));
ControlFlow::Continue(())
}

fn vanytrue8x16(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_u8x16();
let result = a.iter().any(|a| *a != 0);
self.state[dst].set_u32(u32::from(result));
ControlFlow::Continue(())
}

fn vanytrue16x8(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_u16x8();
let result = a.iter().any(|a| *a != 0);
self.state[dst].set_u32(u32::from(result));
ControlFlow::Continue(())
}

fn vanytrue32x4(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_u32x4();
let result = a.iter().any(|a| *a != 0);
self.state[dst].set_u32(u32::from(result));
ControlFlow::Continue(())
}

fn vanytrue64x2(&mut self, dst: XReg, src: VReg) -> ControlFlow<Done> {
let a = self.state[src].get_u64x2();
let result = a.iter().any(|a| *a != 0);
self.state[dst].set_u32(u32::from(result));
ControlFlow::Continue(())
}
}

impl ExtendedOpVisitor for Interpreter<'_> {
Expand Down
28 changes: 28 additions & 0 deletions pulley/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,34 @@ macro_rules! for_each_op {
vbnot128 = VBnot128 { dst: VReg, src: VReg };
/// `dst = (c & x) | (!c & y)`
vbitselect128 = VBitselect128 { dst: VReg, c: VReg, x: VReg, y: VReg };
/// Collect high bits of each lane into the low 32-bits of the
/// destination.
vbitmask8x16 = Vbitmask8x16 { dst: XReg, src: VReg };
/// Collect high bits of each lane into the low 32-bits of the
/// destination.
vbitmask16x8 = Vbitmask16x8 { dst: XReg, src: VReg };
/// Collect high bits of each lane into the low 32-bits of the
/// destination.
vbitmask32x4 = Vbitmask32x4 { dst: XReg, src: VReg };
/// Collect high bits of each lane into the low 32-bits of the
/// destination.
vbitmask64x2 = Vbitmask64x2 { dst: XReg, src: VReg };
/// Store whether all lanes are nonzero in `dst`.
valltrue8x16 = Valltrue8x16 { dst: XReg, src: VReg };
/// Store whether all lanes are nonzero in `dst`.
valltrue16x8 = Valltrue16x8 { dst: XReg, src: VReg };
/// Store whether all lanes are nonzero in `dst`.
valltrue32x4 = Valltrue32x4 { dst: XReg, src: VReg };
/// Store whether any lanes are nonzero in `dst`.
valltrue64x2 = Valltrue64x2 { dst: XReg, src: VReg };
/// Store whether any lanes are nonzero in `dst`.
vanytrue8x16 = Vanytrue8x16 { dst: XReg, src: VReg };
/// Store whether any lanes are nonzero in `dst`.
vanytrue16x8 = Vanytrue16x8 { dst: XReg, src: VReg };
/// Store whether any lanes are nonzero in `dst`.
vanytrue32x4 = Vanytrue32x4 { dst: XReg, src: VReg };
/// Store whether any lanes are nonzero in `dst`.
vanytrue64x2 = Vanytrue64x2 { dst: XReg, src: VReg };
}
};
}
Expand Down

0 comments on commit 3fa6fa2

Please sign in to comment.