Skip to content

Commit

Permalink
cranelift: 32bit div_s, rem_u, rem_s for aarch64 (bytecodealliance#9850)
Browse files Browse the repository at this point in the history
* rename put_non_zero_in_reg to put_nonzero_in_reg_maybe_zext

* handle 32bit sdiv

* i32 rem_u/s

* fix tests

* fix rem

* fmt

* improve load_constant_full

* fix tests, and review edits
  • Loading branch information
MarinPostma authored Dec 19, 2024
1 parent 71ca453 commit a179f95
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 138 deletions.
13 changes: 9 additions & 4 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3598,12 +3598,17 @@
(orr_imm ty (zero_reg) n)
m k k))

(decl load_constant64_full (Type ImmExtend u64) Reg)
(extern constructor load_constant64_full load_constant64_full)
(decl load_constant_full (Type ImmExtend OperandSize u64) Reg)
(extern constructor load_constant_full load_constant_full)

;; Fallback for integral 32-bit constants
(rule (imm (fits_in_32 (integral_ty ty)) extend n)
(load_constant_full ty extend (operand_size $I32) n))

;; Fallback for integral 64-bit constants
(rule (imm (integral_ty ty) extend n)
(load_constant64_full ty extend n))
(rule -1 (imm (integral_ty $I64) extend n)
(load_constant_full $I64 extend (operand_size $I64) n))


;; Sign extension helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
3 changes: 1 addition & 2 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -758,8 +758,7 @@ impl MachInstEmit for Inst {
ALUOp::EorNot => 0b01001010_001,
ALUOp::AddS => 0b00101011_000,
ALUOp::SubS => 0b01101011_000,
ALUOp::SDiv => 0b10011010_110,
ALUOp::UDiv => 0b00011010_110,
ALUOp::SDiv | ALUOp::UDiv => 0b00011010_110,
ALUOp::RotR | ALUOp::Lsr | ALUOp::Asr | ALUOp::Lsl => 0b00011010_110,
ALUOp::SMulH => 0b10011011_010,
ALUOp::UMulH => 0b10011011_110,
Expand Down
116 changes: 69 additions & 47 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1028,43 +1028,48 @@

;;;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Note that aarch64's `udiv` doesn't trap so to respect the semantics of
;; CLIF's `udiv` the check for zero needs to be manually performed.

(rule udiv 1 (lower (has_type $I64 (udiv x y)))
(a64_udiv $I64 (put_in_reg x) (put_nonzero_in_reg y)))
;; Enum representing the types of extensions
(type ExtType
(enum
(Signed)
(Unsigned)))

(rule udiv (lower (has_type (fits_in_32 ty) (udiv x y)))
(a64_udiv $I32 (put_in_reg_zext32 x) (put_nonzero_in_reg y)))

;; helpers for udiv:
;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
(decl put_nonzero_in_reg (Value) Reg)
;; It takes a value and extension type, and performs the appropriate checks.
;; TODO: restore spec
; (spec (put_nonzero_in_reg_sext64 x)
; (provide (= (sign_ext 64 x) result))
; (require (not (= #x0000000000000000 result))))
(decl put_nonzero_in_reg (Value ExtType Type) Reg)

;; Special case where if a `Value` is known to be nonzero we can trivially
;; move it into a register.
(rule (put_nonzero_in_reg (and (value_type ty) (iconst (nonzero_u64_from_imm64 n))))

;; zero-extend non-zero constant
(rule (put_nonzero_in_reg (iconst (nonzero_u64_from_imm64 n)) (ExtType.Unsigned) ty)
(imm ty (ImmExtend.Zero) n))

(rule -1 (put_nonzero_in_reg (and (value_type $I64) val))
;; sign-extend non-zero constant
(rule (put_nonzero_in_reg (iconst (nonzero_u64_from_imm64 n)) (ExtType.Signed) ty)
(imm ty (ImmExtend.Sign) n))

(rule -1 (put_nonzero_in_reg val _ $I64)
(trap_if_zero_divisor (put_in_reg val) (operand_size $I64)))

(rule -2 (put_nonzero_in_reg (and (value_type (fits_in_32 _)) val))
(rule -2 (put_nonzero_in_reg val (ExtType.Signed) (fits_in_32 _))
(trap_if_zero_divisor (put_in_reg_sext32 val) (operand_size $I32)))

(rule -2 (put_nonzero_in_reg val (ExtType.Unsigned) (fits_in_32 _))
(trap_if_zero_divisor (put_in_reg_zext32 val) (operand_size $I32)))

;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero and extending it to 64 bits.
(spec (put_nonzero_in_reg_zext64 x)
(provide (= result (zero_ext 64 x)))
(require (not (= result #x0000000000000000))))
(decl put_nonzero_in_reg_zext64 (Value) Reg)
(rule -1 (put_nonzero_in_reg_zext64 (and (value_type ty) val))
(trap_if_zero_divisor (put_in_reg_zext64 val) (operand_size ty)))
;; Note that aarch64's `udiv` doesn't trap so to respect the semantics of
;; CLIF's `udiv` the check for zero needs to be manually performed.

;; Special case where if a `Value` is known to be nonzero we can trivially
;; move it into a register.
(rule (put_nonzero_in_reg_zext64 (and (value_type ty)
(iconst (nonzero_u64_from_imm64 n))))
(imm ty (ImmExtend.Zero) n))
(rule udiv 1 (lower (has_type $I64 (udiv x y)))
(a64_udiv $I64 (put_in_reg x) (put_nonzero_in_reg y (ExtType.Unsigned) $I64)))

(rule udiv (lower (has_type (fits_in_32 ty) (udiv x y)))
(a64_udiv $I32 (put_in_reg_zext32 x) (put_nonzero_in_reg y (ExtType.Unsigned) ty)))

;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand All @@ -1088,33 +1093,34 @@
;;
;; TODO: if `y` is -1 then a check that `x` is not INT_MIN is all that's
;; necessary, but right now `y` is checked to not be -1 as well.
(rule sdiv_base_case (lower (has_type (fits_in_64 ty) (sdiv x y)))

(rule sdiv_base_case (lower (has_type $I64 (sdiv x y)))
(let ((x64 Reg (put_in_reg_sext64 x))
(y64 Reg (put_nonzero_in_reg_sext64 y))
(intmin_check_x Reg (intmin_check ty x64))
(valid_x64 Reg (trap_if_div_overflow ty intmin_check_x x64 y64))
(y64 Reg (put_nonzero_in_reg y (ExtType.Signed) $I64))
(intmin_check_x Reg (intmin_check $I64 x64))
(valid_x64 Reg (trap_if_div_overflow $I64 intmin_check_x x64 y64))
(result Reg (a64_sdiv $I64 valid_x64 y64)))
result))

(rule sdiv_base_case -1 (lower (has_type (fits_in_32 ty) (sdiv x y)))
(let ((x32 Reg (put_in_reg_sext32 x))
(y32 Reg (put_nonzero_in_reg y (ExtType.Signed) ty))
(intmin_check_x Reg (intmin_check ty x32))
(valid_x32 Reg (trap_if_div_overflow ty intmin_check_x x32 y32))
(result Reg (a64_sdiv ty valid_x32 y32)))
result))

;; Special case for `sdiv` where no checks are needed due to division by a
;; constant meaning the checks are always passed.
(rule sdiv_safe_divisor 1 (lower (has_type (fits_in_64 ty) (sdiv x (iconst imm))))
(rule sdiv_safe_divisor 2 (lower (has_type $I64 (sdiv x (iconst imm))))
(if-let y (safe_divisor_from_imm64 $I64 imm))
(a64_sdiv $I64 (put_in_reg_sext64 x) (imm $I64 (ImmExtend.Sign) y)))

(rule sdiv_safe_divisor 1 (lower (has_type (fits_in_32 ty) (sdiv x (iconst imm))))
(if-let y (safe_divisor_from_imm64 ty imm))
(a64_sdiv $I64 (put_in_reg_sext64 x) (imm ty (ImmExtend.Sign) y)))
(a64_sdiv ty (put_in_reg_sext32 x) (imm ty (ImmExtend.Sign) y)))

;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
(spec (put_nonzero_in_reg_sext64 x)
(provide (= (sign_ext 64 x) result))
(require (not (= #x0000000000000000 result))))
(decl put_nonzero_in_reg_sext64 (Value) Reg)
(rule -1 (put_nonzero_in_reg_sext64 val)
(trap_if_zero_divisor (put_in_reg_sext64 val) (operand_size $I64)))

;; Note that this has a special case where if the `Value` is a constant that's
;; not zero we can skip the zero check.
(rule (put_nonzero_in_reg_sext64 (and (value_type ty)
(iconst (nonzero_u64_from_imm64 n))))
(imm ty (ImmExtend.Sign) n))

;;;; Rules for `urem` and `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand All @@ -1130,20 +1136,36 @@
;; div rd, x, y ; rd = x / y
;; msub rd, rd, y, x ; rd = x - rd * y

(rule urem (lower (has_type (fits_in_64 ty) (urem x y)))
;; TODO: we can avoid a 0 check, if the dividend is a non-0 constant

(rule urem (lower (has_type $I64 (urem x y)))
(let ((x64 Reg (put_in_reg_zext64 x))
(y64 Reg (put_nonzero_in_reg_zext64 y))
(y64 Reg (put_nonzero_in_reg y (ExtType.Unsigned) $I64))
(div Reg (a64_udiv $I64 x64 y64))
(result Reg (msub $I64 div y64 x64)))
result))

(rule srem (lower (has_type (fits_in_64 ty) (srem x y)))
(rule urem -1 (lower (has_type (fits_in_32 ty) (urem x y)))
(let ((x64 Reg (put_in_reg_zext32 x))
(y64 Reg (put_nonzero_in_reg y (ExtType.Unsigned) ty))
(div Reg (a64_udiv ty x64 y64))
(result Reg (msub ty div y64 x64)))
result))

(rule srem (lower (has_type $I64 (srem x y)))
(let ((x64 Reg (put_in_reg_sext64 x))
(y64 Reg (put_nonzero_in_reg_sext64 y))
(y64 Reg (put_nonzero_in_reg y (ExtType.Signed) $I64))
(div Reg (a64_sdiv $I64 x64 y64))
(result Reg (msub $I64 div y64 x64)))
result))

(rule srem -1 (lower (has_type (fits_in_32 ty) (srem x y)))
(let ((x64 Reg (put_in_reg_sext32 x))
(y64 Reg (put_nonzero_in_reg y (ExtType.Signed) ty))
(div Reg (a64_sdiv ty x64 y64))
(result Reg (msub ty div y64 x64)))
result))

;;; Rules for integer min/max: umin, smin, umax, smax ;;;;;;;;;;;;;;;;;;;;;;;;;

;; `i64` and smaller.
Expand Down
31 changes: 22 additions & 9 deletions cranelift/codegen/src/isa/aarch64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Pull in the ISLE generated code.
pub mod generated_code;
use generated_code::Context;
use generated_code::{Context, ImmExtend};

// Types that the generated ISLE code uses via `use super::*`.
use super::{
Expand Down Expand Up @@ -30,6 +30,7 @@ use crate::{
abi::ArgPair, ty_bits, InstOutput, IsTailCall, MachInst, VCodeConstant, VCodeConstantData,
},
};
use core::u32;
use regalloc2::PReg;
use std::boxed::Box;
use std::vec::Vec;
Expand Down Expand Up @@ -206,24 +207,36 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
///
/// The logic here is nontrivial enough that it's not really worth porting
/// this over to ISLE.
fn load_constant64_full(
fn load_constant_full(
&mut self,
ty: Type,
extend: &generated_code::ImmExtend,
extend: &ImmExtend,
extend_to: &OperandSize,
value: u64,
) -> Reg {
let bits = ty.bits();
let value = if bits < 64 {
if *extend == generated_code::ImmExtend::Sign {

let value = match (extend_to, *extend) {
(OperandSize::Size32, ImmExtend::Sign) if bits < 32 => {
let shift = 32 - bits;
let value = value as i32;

// we cast first to a u32 and then to a u64, to ensure that we are representing a
// i32 in a u64, and not a i64. This is important, otherwise value will not fit in
// 32 bits
((value << shift) >> shift) as u32 as u64
}
(OperandSize::Size32, ImmExtend::Zero) if bits < 32 => {
value & !((u32::MAX as u64) << bits)
}
(OperandSize::Size64, ImmExtend::Sign) if bits < 64 => {
let shift = 64 - bits;
let value = value as i64;

((value << shift) >> shift) as u64
} else {
value & !(u64::MAX << bits)
}
} else {
value
(OperandSize::Size64, ImmExtend::Zero) if bits < 64 => value & !(u64::MAX << bits),
_ => value,
};

// Divide the value into 16-bit slices that we can manipulate using
Expand Down
85 changes: 46 additions & 39 deletions cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
Original file line number Diff line number Diff line change
Expand Up @@ -215,24 +215,20 @@ block0(v0: i32, v1: i32):

; VCode:
; block0:
; sxtw x3, w0
; sxtw x5, w1
; cbz x5, #trap=int_divz
; adds wzr, w5, #1
; ccmp w3, #1, #nzcv, eq
; cbz w1, #trap=int_divz
; adds wzr, w1, #1
; ccmp w0, #1, #nzcv, eq
; b.vs #trap=int_ovf
; sdiv x0, x3, x5
; sdiv w0, w0, w1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sxtw x3, w0
; sxtw x5, w1
; cbz x5, #0x20
; cmn w5, #1
; ccmp w3, #1, #0, eq
; b.vs #0x24
; sdiv x0, x3, x5
; cbz w1, #0x18
; cmn w1, #1
; ccmp w0, #1, #0, eq
; b.vs #0x1c
; sdiv w0, w0, w1
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_divz
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
Expand All @@ -246,16 +242,14 @@ block0(v0: i32):

; VCode:
; block0:
; sxtw x2, w0
; movz w4, #2
; sdiv x0, x2, x4
; movz w2, #2
; sdiv w0, w0, w2
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sxtw x2, w0
; mov w4, #2
; sdiv x0, x2, x4
; mov w2, #2
; sdiv w0, w0, w2
; ret

function %f14(i32, i32) -> i32 {
Expand Down Expand Up @@ -304,20 +298,16 @@ block0(v0: i32, v1: i32):

; VCode:
; block0:
; sxtw x3, w0
; sxtw x5, w1
; cbz x5, #trap=int_divz
; sdiv x8, x3, x5
; msub x0, x8, x5, x3
; cbz w1, #trap=int_divz
; sdiv w4, w0, w1
; msub w0, w4, w1, w0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sxtw x3, w0
; sxtw x5, w1
; cbz x5, #0x18
; sdiv x8, x3, x5
; msub x0, x8, x5, x3
; cbz w1, #0x10
; sdiv w4, w0, w1
; msub w0, w4, w1, w0
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_divz

Expand All @@ -329,20 +319,16 @@ block0(v0: i32, v1: i32):

; VCode:
; block0:
; mov w3, w0
; mov w5, w1
; cbz w5, #trap=int_divz
; udiv x8, x3, x5
; msub x0, x8, x5, x3
; cbz w1, #trap=int_divz
; udiv w4, w0, w1
; msub w0, w4, w1, w0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov w3, w0
; mov w5, w1
; cbz w5, #0x18
; udiv x8, x3, x5
; msub x0, x8, x5, x3
; cbz w1, #0x10
; udiv w4, w0, w1
; msub w0, w4, w1, w0
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_divz

Expand Down Expand Up @@ -795,3 +781,24 @@ block0(v0: i64):
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf

function %sdiv_i16_const(i16) -> i16 {
block0(v0: i16):
v1 = iconst.i16 -2
v2 = sdiv v0, v1
return v2
}

; VCode:
; block0:
; sxth w2, w0
; movn w4, #1
; sdiv w0, w2, w4
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sxth w2, w0
; mov w4, #-2
; sdiv w0, w2, w4
; ret

Loading

0 comments on commit a179f95

Please sign in to comment.