Skip to content

Commit

Permalink
pulley: Fix regalloc of return-call-indirect
Browse files Browse the repository at this point in the history
This commit fixes an issue in the Pulley backend for the
`return_call_indirect` implementation. This brings Pulley in line with
other backends to use a fixed register for the indirect call location
which is caller-save instead of possibly using a callee-save register.
If a callee-save-register is used then the location to be jumped to is
clobbered by register restores and won't have the correct location to
jump to.

This additionally required updating the Pulley ABI slightly. Previously
all caller-saved registers were considered argument registers meaning
that there weren't any actual available registers to use for the jump
destination. To handle this I've decreased the number of argument
registers by 1 so there's a single register used for the
return-call-indirect destination available which is also caller-saved.
  • Loading branch information
alexcrichton committed Jan 15, 2025
1 parent bb5e4bb commit 44e5099
Show file tree
Hide file tree
Showing 5 changed files with 279 additions and 267 deletions.
8 changes: 7 additions & 1 deletion cranelift/codegen/src/isa/pulley_shared/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,14 @@ where
) -> CodegenResult<(u32, Option<usize>)> {
// NB: make sure this method stays in sync with
// `cranelift_pulley::interp::Vm::call`.
//
// In general we use the first half of all register banks as argument
// passing registers because, well, why not for now. Currently the only
// exception is x15 which is reserved as a single caller-saved register
// not used for arguments. This is used in `ReturnCallIndirect` to hold
// the location of where we're jumping to.

let x_end = 15;
let x_end = 14;
let f_end = 15;
let v_end = 15;

Expand Down
12 changes: 11 additions & 1 deletion cranelift/codegen/src/isa/pulley_shared/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,17 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
}
}
Inst::ReturnIndirectCall { info } => {
collector.reg_use(&mut info.dest);
// Use a fixed location of where to store the value to
// return-call-to. Using a fixed location prevents this register
// from being allocated to a callee-saved register which will get
// clobbered during the register restores just before the
// return-call.
//
// Also note that `x15` is specifically the last caller-saved
// register and, at this time, the only non-argument caller-saved
// register. This register allocation constraint is why it's not an
// argument register.
collector.reg_fixed_use(&mut info.dest, regs::x15());

for CallArgPair { vreg, preg } in &mut info.uses {
collector.reg_fixed_use(vreg, *preg);
Expand Down
216 changes: 107 additions & 109 deletions cranelift/filetests/filetests/isa/pulley32/call.clif
Original file line number Diff line number Diff line change
Expand Up @@ -129,52 +129,52 @@ block0:
}

; VCode:
; push_frame_save 48, {}
; push_frame_save 64, {}
; block0:
; xzero x15
; xstore64 OutgoingArg(0), x15 // flags = notrap aligned
; xstore64 OutgoingArg(8), x15 // flags = notrap aligned
; xstore64 OutgoingArg(16), x15 // flags = notrap aligned
; xstore64 OutgoingArg(24), x15 // flags = notrap aligned
; xstore64 OutgoingArg(32), x15 // flags = notrap aligned
; xstore64 OutgoingArg(40), x15 // flags = notrap aligned
; xmov x4, x15
; xmov x5, x15
; xmov x6, x15
; xmov x7, x15
; xmov x8, x15
; xmov x9, x15
; xmov x10, x15
; xmov x11, x15
; xmov x12, x15
; xmov x13, x15
; xmov x14, x15
; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p15i), XReg(p15i), XReg(p15i), XReg(p15i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 }
; pop_frame_restore 48, {}
; xzero x14
; xstore64 OutgoingArg(0), x14 // flags = notrap aligned
; xstore64 OutgoingArg(8), x14 // flags = notrap aligned
; xstore64 OutgoingArg(16), x14 // flags = notrap aligned
; xstore64 OutgoingArg(24), x14 // flags = notrap aligned
; xstore64 OutgoingArg(32), x14 // flags = notrap aligned
; xstore64 OutgoingArg(40), x14 // flags = notrap aligned
; xstore64 OutgoingArg(48), x14 // flags = notrap aligned
; xmov x4, x14
; xmov x5, x14
; xmov x6, x14
; xmov x7, x14
; xmov x8, x14
; xmov x9, x14
; xmov x10, x14
; xmov x11, x14
; xmov x12, x14
; xmov x13, x14
; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p14i), XReg(p14i), XReg(p14i), XReg(p14i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 }
; pop_frame_restore 64, {}
; ret
;
; Disassembled:
; push_frame_save 48,
; xzero x15
; xstore64le_offset8 sp, 0, x15
; xstore64le_offset8 sp, 8, x15
; xstore64le_offset8 sp, 16, x15
; xstore64le_offset8 sp, 24, x15
; xstore64le_offset8 sp, 32, x15
; xstore64le_offset8 sp, 40, x15
; xmov x4, x15
; xmov x5, x15
; xmov x6, x15
; xmov x7, x15
; xmov x8, x15
; xmov x9, x15
; xmov x10, x15
; xmov x11, x15
; xmov x12, x15
; xmov x13, x15
; xmov x14, x15
; call4 x15, x15, x15, x15, 0x0 // target = 0x40
; pop_frame_restore 48,
; push_frame_save 64,
; xzero x14
; xstore64le_offset8 sp, 0, x14
; xstore64le_offset8 sp, 8, x14
; xstore64le_offset8 sp, 16, x14
; xstore64le_offset8 sp, 24, x14
; xstore64le_offset8 sp, 32, x14
; xstore64le_offset8 sp, 40, x14
; xstore64le_offset8 sp, 48, x14
; xmov x4, x14
; xmov x5, x14
; xmov x6, x14
; xmov x7, x14
; xmov x8, x14
; xmov x9, x14
; xmov x10, x14
; xmov x11, x14
; xmov x12, x14
; xmov x13, x14
; call4 x14, x14, x14, x14, 0x0 // target = 0x41
; pop_frame_restore 64,
; ret

function %colocated_stack_rets() -> i64 {
Expand Down Expand Up @@ -214,82 +214,80 @@ block0:
}

; VCode:
; push_frame_save 112, {x17, x18, x20, x21, x22, x23, x29}
; push_frame_save 112, {x16, x18, x19, x20, x21, x22, x24, x28}
; block0:
; x12 = load_addr OutgoingArg(0)
; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [0, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 }
; xmov x20, x13
; xmov x22, x11
; x29 = xload64 OutgoingArg(0) // flags = notrap aligned
; x11 = xload64 OutgoingArg(8) // flags = notrap aligned
; x13 = xload64 OutgoingArg(16) // flags = notrap aligned
; x21 = xload64 OutgoingArg(24) // flags = notrap aligned
; x23 = xload64 OutgoingArg(32) // flags = notrap aligned
; xadd64 x18, x0, x1
; xadd64 x17, x2, x3
; xadd64 x5, x4, x5
; xadd64 x6, x6, x7
; xadd64 x7, x8, x9
; xmov x0, x22
; xadd64 x4, x10, x0
; xmov x10, x20
; xadd64 x8, x12, x10
; xadd64 x14, x14, x15
; xadd64 x15, x29, x11
; xadd64 x13, x11, x13
; xadd64 x0, x21, x23
; xadd64 x1, x18, x17
; xadd64 x2, x5, x6
; xadd64 x3, x7, x4
; xadd64 x14, x8, x14
; xadd64 x13, x15, x13
; xadd64 x15, x0, x0
; xadd64 x0, x1, x2
; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 }
; xmov x21, x12
; x28 = xload64 OutgoingArg(0) // flags = notrap aligned
; x16 = xload64 OutgoingArg(8) // flags = notrap aligned
; x12 = xload64 OutgoingArg(16) // flags = notrap aligned
; x15 = xload64 OutgoingArg(24) // flags = notrap aligned
; x22 = xload64 OutgoingArg(32) // flags = notrap aligned
; x24 = xload64 OutgoingArg(40) // flags = notrap aligned
; xadd64 x20, x0, x1
; xadd64 x19, x2, x3
; xadd64 x18, x4, x5
; xadd64 x4, x6, x7
; xadd64 x5, x8, x9
; xadd64 x2, x10, x11
; xmov x11, x21
; xadd64 x3, x11, x13
; xadd64 x14, x14, x28
; xadd64 x0, x16, x12
; xadd64 x15, x12, x15
; xadd64 x1, x22, x24
; xadd64 x6, x20, x19
; xadd64 x4, x18, x4
; xadd64 x2, x5, x2
; xadd64 x14, x3, x14
; xadd64 x13, x13, x15
; xadd64 x14, x0, x14
; xadd64 x13, x13, x13
; xadd64 x0, x14, x13
; pop_frame_restore 112, {x17, x18, x20, x21, x22, x23, x29}
; xadd64 x15, x0, x15
; xadd64 x0, x1, x1
; xadd64 x1, x6, x4
; xadd64 x14, x2, x14
; xadd64 x15, x15, x0
; xadd64 x14, x1, x14
; xadd64 x15, x15, x15
; xadd64 x0, x14, x15
; pop_frame_restore 112, {x16, x18, x19, x20, x21, x22, x24, x28}
; ret
;
; Disassembled:
; push_frame_save 112, x17, x18, x20, x21, x22, x23, x29
; push_frame_save 112, x16, x18, x19, x20, x21, x22, x24, x28
; xmov x12, sp
; call1 x12, 0x0 // target = 0x8
; xmov x20, x13
; xmov x22, x11
; xload64le_offset8 x29, sp, 0
; xload64le_offset8 x11, sp, 8
; xload64le_offset8 x13, sp, 16
; xload64le_offset8 x21, sp, 24
; xload64le_offset8 x23, sp, 32
; xadd64 x18, x0, x1
; xadd64 x17, x2, x3
; xadd64 x5, x4, x5
; xadd64 x6, x6, x7
; xadd64 x7, x8, x9
; xmov x0, x22
; xadd64 x4, x10, x0
; xmov x10, x20
; xadd64 x8, x12, x10
; xadd64 x14, x14, x15
; xadd64 x15, x29, x11
; xadd64 x13, x11, x13
; xadd64 x0, x21, x23
; xadd64 x1, x18, x17
; xadd64 x2, x5, x6
; xadd64 x3, x7, x4
; xadd64 x14, x8, x14
; xadd64 x13, x15, x13
; xadd64 x15, x0, x0
; xadd64 x0, x1, x2
; xmov x21, x12
; xload64le_offset8 x28, sp, 0
; xload64le_offset8 x16, sp, 8
; xload64le_offset8 x12, sp, 16
; xload64le_offset8 x15, sp, 24
; xload64le_offset8 x22, sp, 32
; xload64le_offset8 x24, sp, 40
; xadd64 x20, x0, x1
; xadd64 x19, x2, x3
; xadd64 x18, x4, x5
; xadd64 x4, x6, x7
; xadd64 x5, x8, x9
; xadd64 x2, x10, x11
; xmov x11, x21
; xadd64 x3, x11, x13
; xadd64 x14, x14, x28
; xadd64 x0, x16, x12
; xadd64 x15, x12, x15
; xadd64 x1, x22, x24
; xadd64 x6, x20, x19
; xadd64 x4, x18, x4
; xadd64 x2, x5, x2
; xadd64 x14, x3, x14
; xadd64 x13, x13, x15
; xadd64 x14, x0, x14
; xadd64 x13, x13, x13
; xadd64 x0, x14, x13
; pop_frame_restore 112, x17, x18, x20, x21, x22, x23, x29
; xadd64 x15, x0, x15
; xadd64 x0, x1, x1
; xadd64 x1, x6, x4
; xadd64 x14, x2, x14
; xadd64 x15, x15, x0
; xadd64 x14, x1, x14
; xadd64 x15, x15, x15
; xadd64 x0, x14, x15
; pop_frame_restore 112, x16, x18, x19, x20, x21, x22, x24, x28
; ret

function %call_indirect(i32) -> i64 {
Expand Down
Loading

0 comments on commit 44e5099

Please sign in to comment.