From c21fd5280ddbef5f96b56c37a292ea2f32d5331b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 19 Dec 2024 13:23:48 -0800 Subject: [PATCH] pulley: Use immediate-taking instructions more Refactor some existing usage of `pulley_xconst*` instructions to instead use instructions-taking-immediates instead now that they've been added to Pulley. --- .../codegen/src/isa/pulley_shared/lower.isle | 54 +++++++++++-------- .../filetests/isa/pulley32/brif.clif | 6 +-- .../filetests/isa/pulley32/trap.clif | 52 +++++++----------- .../filetests/isa/pulley64/brif.clif | 6 +-- .../filetests/isa/pulley64/trap.clif | 52 +++++++----------- 5 files changed, 75 insertions(+), 95 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 15133e563edc..4c656c037c8a 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -14,7 +14,7 @@ (decl lower_cond (Value) Cond) (rule 0 (lower_cond val @ (value_type (fits_in_32 _))) (Cond.If32 (zext32 val))) (rule 1 (lower_cond val @ (value_type $I64)) - (Cond.IfXneq64 val (pulley_xconst8 0))) + (Cond.IfXneq64I32 val 0)) ;; Peel away explicit `uextend` values to take a look at the inner value. (rule 2 (lower_cond (uextend val)) (lower_cond val)) @@ -282,13 +282,16 @@ ;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (umulhi a b))) - (pulley_xshr32_u (pulley_xmul32 (zext32 a) (zext32 b)) (pulley_xconst8 8))) + (if-let (u6_from_u8 shift) (u64_as_u8 8)) + (pulley_xshr32_u_u6 (pulley_xmul32 (zext32 a) (zext32 b)) shift)) (rule (lower (has_type $I16 (umulhi a b))) - (pulley_xshr32_u (pulley_xmul32 (zext32 a) (zext32 b)) (pulley_xconst8 16))) + (if-let (u6_from_u8 shift) (u64_as_u8 16)) + (pulley_xshr32_u_u6 (pulley_xmul32 (zext32 a) (zext32 b)) shift)) (rule (lower (has_type $I32 (umulhi a b))) - (pulley_xshr64_u (pulley_xmul64 (zext64 a) (zext64 b)) (pulley_xconst8 32))) + (if-let (u6_from_u8 shift) (u64_as_u8 32)) + (pulley_xshr64_u_u6 (pulley_xmul64 (zext64 a) (zext64 b)) shift)) (rule (lower (has_type $I64 (umulhi a b))) (pulley_xmulhi64_u a b)) @@ -296,13 +299,16 @@ ;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (smulhi a b))) - (pulley_xshr32_s (pulley_xmul32 (sext32 a) (sext32 b)) (pulley_xconst8 8))) + (if-let (u6_from_u8 shift) (u64_as_u8 8)) + (pulley_xshr32_s_u6 (pulley_xmul32 (sext32 a) (sext32 b)) shift)) (rule (lower (has_type $I16 (smulhi a b))) - (pulley_xshr32_s (pulley_xmul32 (sext32 a) (sext32 b)) (pulley_xconst8 16))) + (if-let (u6_from_u8 shift) (u64_as_u8 16)) + (pulley_xshr32_s_u6 (pulley_xmul32 (sext32 a) (sext32 b)) shift)) (rule (lower (has_type $I32 (smulhi a b))) - (pulley_xshr64_s (pulley_xmul64 (sext64 a) (sext64 b)) (pulley_xconst8 32))) + (if-let (u6_from_u8 shift) (u64_as_u8 32)) + (pulley_xshr64_s_u6 (pulley_xmul64 (sext64 a) (sext64 b)) shift)) (rule (lower (has_type $I64 (smulhi a b))) (pulley_xmulhi64_s a b)) @@ -334,10 +340,10 @@ ;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (ishl a b))) - (pulley_xshl32 a (pulley_xband32 b (pulley_xconst8 7)))) + (pulley_xshl32 a (pulley_xband32_s8 b 7))) (rule (lower (has_type $I16 (ishl a b))) - (pulley_xshl32 a (pulley_xband32 b (pulley_xconst8 15)))) + (pulley_xshl32 a (pulley_xband32_s8 b 15))) (rule (lower (has_type $I32 (ishl a b))) (pulley_xshl32 a b)) @@ -373,10 +379,10 @@ ;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (ushr a b))) - (pulley_xshr32_u (zext32 a) (pulley_xband32 b (pulley_xconst8 7)))) + (pulley_xshr32_u (zext32 a) (pulley_xband32_s8 b 7))) (rule (lower (has_type $I16 (ushr a b))) - (pulley_xshr32_u (zext32 a) (pulley_xband32 b (pulley_xconst8 15)))) + (pulley_xshr32_u (zext32 a) (pulley_xband32_s8 b 15))) (rule (lower (has_type $I32 (ushr a b))) (pulley_xshr32_u a b)) @@ -402,10 +408,10 @@ ;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (sshr a b))) - (pulley_xshr32_u (sext32 a) (pulley_xband32 b (pulley_xconst8 7)))) + (pulley_xshr32_u (sext32 a) (pulley_xband32_s8 b 7))) (rule (lower (has_type $I16 (sshr a b))) - (pulley_xshr32_u (sext32 a) (pulley_xband32 b (pulley_xconst8 15)))) + (pulley_xshr32_u (sext32 a) (pulley_xband32_s8 b 15))) (rule (lower (has_type $I32 (sshr a b))) (pulley_xshr32_s a b)) @@ -531,18 +537,18 @@ ;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (ctz a))) - (pulley_xctz32 (pulley_xbor32 a (pulley_xconst16 0x100)))) + (pulley_xctz32 (pulley_xbor32_s32 a 0x100))) (rule (lower (has_type $I16 (ctz a))) - (pulley_xctz32 (pulley_xbor32 a (pulley_xconst32 0x10000)))) + (pulley_xctz32 (pulley_xbor32_s32 a 0x10000))) (rule (lower (has_type $I32 (ctz a))) (pulley_xctz32 a)) (rule (lower (has_type $I64 (ctz a))) (pulley_xctz64 a)) ;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (clz a))) - (pulley_xsub32 (pulley_xclz32 (zext32 a)) (pulley_xconst8 24))) + (pulley_xsub32_u8 (pulley_xclz32 (zext32 a)) 24)) (rule (lower (has_type $I16 (clz a))) - (pulley_xsub32 (pulley_xclz32 (zext32 a)) (pulley_xconst8 16))) + (pulley_xsub32_u8 (pulley_xclz32 (zext32 a)) 16)) (rule (lower (has_type $I32 (clz a))) (pulley_xclz32 a)) (rule (lower (has_type $I64 (clz a))) (pulley_xclz64 a)) @@ -641,13 +647,13 @@ ;; complement `=`-related conditions to get ones that don't use `=`. (rule 2 (lower_icmp $I128 cc @ (IntCC.SignedLessThanOrEqual) x y) - (pulley_xbxor32 (lower_icmp $I128 (intcc_complement cc) x y) (pulley_xconst8 1))) + (pulley_xbxor32_s8 (lower_icmp $I128 (intcc_complement cc) x y) 1)) (rule 2 (lower_icmp $I128 cc @ (IntCC.SignedGreaterThanOrEqual) x y) - (pulley_xbxor32 (lower_icmp $I128 (intcc_complement cc) x y) (pulley_xconst8 1))) + (pulley_xbxor32_s8 (lower_icmp $I128 (intcc_complement cc) x y) 1)) (rule 2 (lower_icmp $I128 cc @ (IntCC.UnsignedLessThanOrEqual) x y) - (pulley_xbxor32 (lower_icmp $I128 (intcc_complement cc) x y) (pulley_xconst8 1))) + (pulley_xbxor32_s8 (lower_icmp $I128 (intcc_complement cc) x y) 1)) (rule 2 (lower_icmp $I128 cc @ (IntCC.UnsignedGreaterThanOrEqual) x y) - (pulley_xbxor32 (lower_icmp $I128 (intcc_complement cc) x y) (pulley_xconst8 1))) + (pulley_xbxor32_s8 (lower_icmp $I128 (intcc_complement cc) x y) 1)) ;; Compare both the bottom and upper halves of the 128-bit values. If ;; the top half is equal use the bottom comparison, otherwise use the upper @@ -862,8 +868,9 @@ (sext64 val)) (rule 1 (lower (has_type $I128 (sextend val))) + (if-let (u6_from_u8 shift) (u64_as_u8 63)) (let ((lo XReg (sext64 val)) - (hi XReg (pulley_xshr64_s lo (pulley_xconst8 63)))) + (hi XReg (pulley_xshr64_s_u6 lo shift))) (value_regs lo hi))) ;;;; Rules for `ireduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1181,7 +1188,8 @@ ;;;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I16 (bswap a))) - (pulley_xshr32_u (pulley_bswap32 a) (pulley_xconst8 16))) + (if-let (u6_from_u8 shift) (u64_as_u8 16)) + (pulley_xshr32_u_u6 (pulley_bswap32 a) shift)) (rule (lower (has_type $I32 (bswap a))) (pulley_bswap32 a)) (rule (lower (has_type $I64 (bswap a))) (pulley_bswap64 a)) diff --git a/cranelift/filetests/filetests/isa/pulley32/brif.clif b/cranelift/filetests/filetests/isa/pulley32/brif.clif index f342c34b08ea..5e29ea3bac5a 100644 --- a/cranelift/filetests/filetests/isa/pulley32/brif.clif +++ b/cranelift/filetests/filetests/isa/pulley32/brif.clif @@ -110,8 +110,7 @@ block2: ; VCode: ; block0: -; xconst8 x4, 0 -; br_if_xneq64 x0, x4, label2; jump label1 +; br_if_xneq64_i32 x0, 0, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -120,8 +119,7 @@ block2: ; ret ; ; Disassembled: -; xconst8 x4, 0 -; br_if_xneq64 x0, x4, 0xb // target = 0xe +; br_if_xneq64_i8 x0, 0, 0xb // target = 0xb ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 diff --git a/cranelift/filetests/filetests/isa/pulley32/trap.clif b/cranelift/filetests/filetests/isa/pulley32/trap.clif index 7bd7ba27fbae..c94c85bc5f83 100644 --- a/cranelift/filetests/filetests/isa/pulley32/trap.clif +++ b/cranelift/filetests/filetests/isa/pulley32/trap.clif @@ -102,29 +102,23 @@ block2: ; VCode: ; block0: -; xconst8 x6, 0 -; br_if_xneq64 x0, x6, label2; jump label1 +; br_if_xneq64_i32 x0, 0, label2; jump label1 ; block1: -; xconst8 x7, 0 -; xconst8 x8, 0 -; trap_if_xneq64 x7, x8 // code = TrapCode(1) +; xconst8 x4, 0 +; trap_if_xneq64_i32 x4, 0 // code = TrapCode(1) ; ret ; block2: -; xconst8 x9, 42 -; xconst8 x10, 0 -; trap_if_xneq64 x9, x10 // code = TrapCode(1) +; xconst8 x6, 42 +; trap_if_xneq64_i32 x6, 0 // code = TrapCode(1) ; ret ; ; Disassembled: -; xconst8 x6, 0 -; br_if_xneq64 x0, x6, 0x15 // target = 0x18 -; xconst8 x7, 0 -; xconst8 x8, 0 -; br_if_xneq64 x7, x8, 0x16 // target = 0x26 +; br_if_xneq64_i8 x0, 0, 0x12 // target = 0x12 +; xconst8 x4, 0 +; br_if_xneq64_i8 x4, 0, 0x13 // target = 0x1d ; ret -; xconst8 x9, 42 -; xconst8 x10, 0 -; br_if_xneq64 x9, x10, 0xb // target = 0x29 +; xconst8 x6, 42 +; br_if_xneq64_i8 x6, 0, 0xb // target = 0x20 ; ret ; trap ; trap @@ -146,29 +140,23 @@ block2: ; VCode: ; block0: -; xconst8 x6, 0 -; br_if_xneq64 x0, x6, label2; jump label1 +; br_if_xneq64_i32 x0, 0, label2; jump label1 ; block1: -; xconst8 x7, 0 -; xconst8 x8, 0 -; trap_if_xeq64 x7, x8 // code = TrapCode(1) +; xconst8 x4, 0 +; trap_if_xeq64_i32 x4, 0 // code = TrapCode(1) ; ret ; block2: -; xconst8 x9, 42 -; xconst8 x10, 0 -; trap_if_xeq64 x9, x10 // code = TrapCode(1) +; xconst8 x6, 42 +; trap_if_xeq64_i32 x6, 0 // code = TrapCode(1) ; ret ; ; Disassembled: -; xconst8 x6, 0 -; br_if_xneq64 x0, x6, 0x15 // target = 0x18 -; xconst8 x7, 0 -; xconst8 x8, 0 -; br_if_xeq64 x7, x8, 0x16 // target = 0x26 +; br_if_xneq64_i8 x0, 0, 0x12 // target = 0x12 +; xconst8 x4, 0 +; br_if_xeq64_i8 x4, 0, 0x13 // target = 0x1d ; ret -; xconst8 x9, 42 -; xconst8 x10, 0 -; br_if_xeq64 x9, x10, 0xb // target = 0x29 +; xconst8 x6, 42 +; br_if_xeq64_i8 x6, 0, 0xb // target = 0x20 ; ret ; trap ; trap diff --git a/cranelift/filetests/filetests/isa/pulley64/brif.clif b/cranelift/filetests/filetests/isa/pulley64/brif.clif index d8cea25a080e..07c1311b6d2d 100644 --- a/cranelift/filetests/filetests/isa/pulley64/brif.clif +++ b/cranelift/filetests/filetests/isa/pulley64/brif.clif @@ -110,8 +110,7 @@ block2: ; VCode: ; block0: -; xconst8 x4, 0 -; br_if_xneq64 x0, x4, label2; jump label1 +; br_if_xneq64_i32 x0, 0, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -120,8 +119,7 @@ block2: ; ret ; ; Disassembled: -; xconst8 x4, 0 -; br_if_xneq64 x0, x4, 0xb // target = 0xe +; br_if_xneq64_i8 x0, 0, 0xb // target = 0xb ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 diff --git a/cranelift/filetests/filetests/isa/pulley64/trap.clif b/cranelift/filetests/filetests/isa/pulley64/trap.clif index d38ac59dd9f1..6af273784851 100644 --- a/cranelift/filetests/filetests/isa/pulley64/trap.clif +++ b/cranelift/filetests/filetests/isa/pulley64/trap.clif @@ -102,29 +102,23 @@ block2: ; VCode: ; block0: -; xconst8 x6, 0 -; br_if_xneq64 x0, x6, label2; jump label1 +; br_if_xneq64_i32 x0, 0, label2; jump label1 ; block1: -; xconst8 x7, 0 -; xconst8 x8, 0 -; trap_if_xneq64 x7, x8 // code = TrapCode(1) +; xconst8 x4, 0 +; trap_if_xneq64_i32 x4, 0 // code = TrapCode(1) ; ret ; block2: -; xconst8 x9, 42 -; xconst8 x10, 0 -; trap_if_xneq64 x9, x10 // code = TrapCode(1) +; xconst8 x6, 42 +; trap_if_xneq64_i32 x6, 0 // code = TrapCode(1) ; ret ; ; Disassembled: -; xconst8 x6, 0 -; br_if_xneq64 x0, x6, 0x15 // target = 0x18 -; xconst8 x7, 0 -; xconst8 x8, 0 -; br_if_xneq64 x7, x8, 0x16 // target = 0x26 +; br_if_xneq64_i8 x0, 0, 0x12 // target = 0x12 +; xconst8 x4, 0 +; br_if_xneq64_i8 x4, 0, 0x13 // target = 0x1d ; ret -; xconst8 x9, 42 -; xconst8 x10, 0 -; br_if_xneq64 x9, x10, 0xb // target = 0x29 +; xconst8 x6, 42 +; br_if_xneq64_i8 x6, 0, 0xb // target = 0x20 ; ret ; trap ; trap @@ -146,29 +140,23 @@ block2: ; VCode: ; block0: -; xconst8 x6, 0 -; br_if_xneq64 x0, x6, label2; jump label1 +; br_if_xneq64_i32 x0, 0, label2; jump label1 ; block1: -; xconst8 x7, 0 -; xconst8 x8, 0 -; trap_if_xeq64 x7, x8 // code = TrapCode(1) +; xconst8 x4, 0 +; trap_if_xeq64_i32 x4, 0 // code = TrapCode(1) ; ret ; block2: -; xconst8 x9, 42 -; xconst8 x10, 0 -; trap_if_xeq64 x9, x10 // code = TrapCode(1) +; xconst8 x6, 42 +; trap_if_xeq64_i32 x6, 0 // code = TrapCode(1) ; ret ; ; Disassembled: -; xconst8 x6, 0 -; br_if_xneq64 x0, x6, 0x15 // target = 0x18 -; xconst8 x7, 0 -; xconst8 x8, 0 -; br_if_xeq64 x7, x8, 0x16 // target = 0x26 +; br_if_xneq64_i8 x0, 0, 0x12 // target = 0x12 +; xconst8 x4, 0 +; br_if_xeq64_i8 x4, 0, 0x13 // target = 0x1d ; ret -; xconst8 x9, 42 -; xconst8 x10, 0 -; br_if_xeq64 x9, x10, 0xb // target = 0x29 +; xconst8 x6, 42 +; br_if_xeq64_i8 x6, 0, 0xb // target = 0x20 ; ret ; trap ; trap