From 7942eab9d0024ce8b4a591edbc5b745f0246ce46 Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Fri, 7 Jun 2024 12:54:36 +0100 Subject: [PATCH] gh-119726: JIT: re-use trampolines on AArch64 When emitting AArch64 trampolines at the end of every data stencil, re-use existent ones fot the same symbol. Fix the disassebly to reflect the "bl" instruction without the relocation. --- Tools/jit/_stencils.py | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 6e046df3026ae9..211a9784b1a03f 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -4,6 +4,7 @@ import enum import sys import typing +import re import _schema @@ -181,6 +182,7 @@ class Stencil: body: bytearray = dataclasses.field(default_factory=bytearray, init=False) holes: list[Hole] = dataclasses.field(default_factory=list, init=False) disassembly: list[str] = dataclasses.field(default_factory=list, init=False) + aarch64_trampolines: dict = dataclasses.field(default_factory=dict, init=False) def pad(self, alignment: int) -> None: """Pad the stencil to the given alignment.""" @@ -189,14 +191,39 @@ def pad(self, alignment: int) -> None: self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}") self.body.extend([0] * padding) - def emit_aarch64_trampoline(self, hole: Hole) -> None: + def emit_aarch64_trampoline(self, hole: Hole, alignment: int) -> None: """Even with the large code model, AArch64 Linux insists on 28-bit jumps.""" - base = len(self.body) + reuse_trampoline = hole.symbol in self.aarch64_trampolines + if reuse_trampoline: + # Re-use the base address of the previously created trampoline + base = self.aarch64_trampolines[hole.symbol] + else: + self.pad(alignment) + base = len(self.body) where = slice(hole.offset, hole.offset + 4) instruction = int.from_bytes(self.body[where], sys.byteorder) instruction &= 0xFC000000 instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF self.body[where] = instruction.to_bytes(4, sys.byteorder) + + # Fix the disassembly for the branch to call the trampoline + bl_instruction = f"{hole.offset:x}: {instruction:x} bl {hole.offset:#x} <{hole.symbol}> // trampoline" + self.disassembly = [ + bl_instruction if line.startswith(f"{hole.offset:x}:") else line + for line in self.disassembly + ] + + # Remove the relocation once the bl instruction has been fixed + relocation_regex = re.compile(rf"{hole.offset:016x}:\s+{hole.kind}\s+_?{hole.symbol}") + self.disassembly = [ + line2 for line2 in self.disassembly if not relocation_regex.match(line2) + ] + + if reuse_trampoline: + # There is no need to emit a new trampoline. + return + + # Emit a new trampoline only if is not already present in the Stencil self.disassembly += [ f"{base + 4 * 0:x}: d2800008 mov x8, #0x0", f"{base + 4 * 0:016x}: R_AARCH64_MOVW_UABS_G0_NC {hole.symbol}", @@ -225,6 +252,7 @@ def emit_aarch64_trampoline(self, hole: Hole) -> None: ] ): self.holes.append(hole.replace(offset=base + 4 * i, kind=kind)) + self.aarch64_trampolines.update({hole.symbol: base}) def remove_jump(self, *, alignment: int = 1) -> None: """Remove a zero-length continuation jump, if it exists.""" @@ -300,8 +328,7 @@ def process_relocations(self, *, alignment: int = 1) -> None: in {"R_AARCH64_CALL26", "R_AARCH64_JUMP26", "ARM64_RELOC_BRANCH26"} and hole.value is HoleValue.ZERO ): - self.code.pad(alignment) - self.code.emit_aarch64_trampoline(hole) + self.code.emit_aarch64_trampoline(hole, alignment) self.code.holes.remove(hole) self.code.remove_jump(alignment=alignment) self.code.pad(alignment)