Skip to content

Commit

Permalink
pulley: Implement full 128-bit multiplication
Browse files Browse the repository at this point in the history
While Pulley has lowering rules for widening multiplication it didn't
have a rule for a full 128-bit multiplication which is possible to
generate through CLIF optimizations given wasm input. This commit adds
such a lowering to the Cranelift backend but doesn't add any new
instructions yet under the assumption this probably isn't perf-critical
at this time.
  • Loading branch information
alexcrichton committed Jan 21, 2025
1 parent 3ed4a63 commit d929116
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 2 deletions.
8 changes: 8 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,14 @@
)
)

(decl pure partial amode_add (Amode i32) Amode)
(rule (amode_add (Amode.RegOffset base offset) amt)
(if-let new_offset (s32_add_fallible offset amt))
(Amode.RegOffset base amt))
(rule (amode_add (Amode.SpOffset offset) amt)
(if-let new_offset (s32_add_fallible offset amt))
(Amode.SpOffset amt))

(type ExtKind (enum None Sign32 Sign64 Zero32 Zero64))

(type VExtKind (enum None S8x8 U8x8 S16x4 U16x4 S32x2 U32x2))
Expand Down
56 changes: 54 additions & 2 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -331,11 +331,36 @@
(pulley_xmul64_s8 a b))

;; 128-bit (or wide) multiplication
(rule (lower (has_type $I128 (imul (uextend a) (uextend b))))
(rule 4 (lower (has_type $I128 (imul (uextend a) (uextend b))))
(pulley_xwidemul64_u (zext64 a) (zext64 b)))
(rule (lower (has_type $I128 (imul (sextend a) (sextend b))))
(rule 4 (lower (has_type $I128 (imul (sextend a) (sextend b))))
(pulley_xwidemul64_s (sext64 a) (sext64 b)))

;; for I128
(rule (lower (has_type $I128 (imul x y)))
(let
((x_regs ValueRegs x)
(x_lo XReg (value_regs_get x_regs 0))
(x_hi XReg (value_regs_get x_regs 1))

;; Get the high/low registers for `y`.
(y_regs ValueRegs y)
(y_lo XReg (value_regs_get y_regs 0))
(y_hi XReg (value_regs_get y_regs 1))

;; 128bit mul formula:
;; dst_lo = x_lo * y_lo
;; dst_hi = mul_high(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo)
(wide_regs ValueRegs (pulley_xwidemul64_u x_lo y_lo))
(wide_lo XReg (value_regs_get wide_regs 0))
(wide_hi XReg (value_regs_get wide_regs 1))
(tmp_hi1 XReg (pulley_xmul64 x_lo y_hi))
(tmp_hi2 XReg (pulley_xmul64 x_hi y_lo))
(tmp_add XReg (pulley_xadd64 wide_hi tmp_hi1))
(result_hi XReg (pulley_xadd64 tmp_add tmp_hi2))
)
(value_regs wide_lo result_hi)))

;; vector multiplication
(rule (lower (has_type $I8X16 (imul a b))) (pulley_vmuli8x16 a b))
(rule (lower (has_type $I16X8 (imul a b))) (pulley_vmuli16x8 a b))
Expand Down Expand Up @@ -1054,6 +1079,30 @@
(rule 2 (lower (store flags src @ (value_type (ty_vec128 ty)) addr offset))
(side_effect (pulley_vstore (amode addr offset) src ty flags)))

;; i128 stores

(rule 3 (lower (store flags src @ (value_type $I128) addr offset))
(let
((src_regs ValueRegs src)
(src_lo XReg (value_regs_get src_regs 0))
(src_hi XReg (value_regs_get src_regs 1))
(amode Amode (amode addr offset)))
(side_effect (emit_store_i128 flags src_lo src_hi amode))))

;; Helper to handle big/little endian to determine which order the lo/hi
;; halves of the i128 are stored.
(decl emit_store_i128 (MemFlags XReg XReg Amode) SideEffectNoResult)
(rule 0 (emit_store_i128 flags lo hi addr_low)
(if-let addr_high (amode_add addr_low 8))
(if-let (Endianness.Little) (endianness flags))
(let ((_ InstOutput (side_effect (pulley_xstore addr_low lo $I64 flags))))
(pulley_xstore addr_high hi $I64 flags)))
(rule 1 (emit_store_i128 flags lo hi addr_low)
(if-let addr_high (amode_add addr_low 8))
(if-let (Endianness.Big) (endianness flags))
(let ((_ InstOutput (side_effect (pulley_xstore addr_low hi $I64 flags))))
(pulley_xstore addr_high lo $I64 flags)))

;; Equivalent of `gen_xload` but for stores.
(decl gen_xstore (Value Value Offset32 MemFlags Type) SideEffectNoResult)

Expand Down Expand Up @@ -1092,6 +1141,9 @@
(rule 1 (lower (has_type $I64 (uextend val)))
(zext64 val))

(rule 1 (lower (has_type $I128 (uextend val)))
(value_regs (zext64 val) (pulley_xzero)))

;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 0 (lower (has_type (fits_in_32 _) (sextend val)))
Expand Down
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/i128-arithmetic.clif
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ target riscv64
target riscv64 has_c has_zcb
set enable_multi_ret_implicit_sret
target s390x
target pulley32
target pulley32be
target pulley64
target pulley64be

function %add_i128(i128, i128) -> i128 {
block0(v0: i128,v1: i128):
Expand Down

0 comments on commit d929116

Please sign in to comment.