Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial Windows on ARM (AArch64) Support #249

Merged
merged 30 commits into from
Dec 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
9554298
Conditionally include ntdll.dll
hmelder Jan 5, 2023
c7902ef
Use text relocation instead of GOT
hmelder Jan 5, 2023
b4330af
Use FlushInstructionCache instead of clear_cache for arm64
hmelder Jan 5, 2023
dbf4d49
Load address in two stages (adrp, add)
hmelder Nov 17, 2023
1d2e52e
objc_msgSend.aarch64.S add comments
hmelder Nov 17, 2023
bac40ba
Add seh directives
hmelder Nov 18, 2023
d6255a3
Move .seh_proc into slow sloop section
hmelder Nov 25, 2023
3c2612e
Comment out cfi directives
hmelder Nov 25, 2023
be4facd
Substitute raw .seh directives with macros
hmelder Nov 25, 2023
1000065
Add documentation of SEH annotations
hmelder Nov 25, 2023
01ffd62
Conditionally include ntdll.dll
hmelder Jan 5, 2023
a594f84
Use text relocation instead of GOT
hmelder Jan 5, 2023
49b8888
Use FlushInstructionCache instead of clear_cache for arm64
hmelder Jan 5, 2023
eef8206
Load address in two stages (adrp, add)
hmelder Nov 17, 2023
0570f6a
objc_msgSend.aarch64.S add comments
hmelder Nov 17, 2023
21b752e
Add seh directives
hmelder Nov 18, 2023
e5d5475
Move .seh_proc into slow sloop section
hmelder Nov 25, 2023
19c0366
Comment out cfi directives
hmelder Nov 25, 2023
0c7cdd0
Substitute raw .seh directives with macros
hmelder Nov 25, 2023
42d7808
Add documentation of SEH annotations
hmelder Nov 25, 2023
f1d1861
Add include in UnexpectedException test
hmelder Nov 25, 2023
19f4fb6
Detect CPU Architecture with preprocessor
hmelder Nov 25, 2023
df39086
Cleanup CMakeLists.txt
hmelder Nov 26, 2023
a41498f
Remove line in objc_msgSend.aarch64.S
hmelder Nov 26, 2023
35b1ec5
Change Test CMakeList to use ARCHITECTURE var
hmelder Nov 26, 2023
bcb90b5
Use existing clear cache macro
hmelder Nov 26, 2023
01a3f43
Change _WIN64 to _WIN32 and reorder labels
hmelder Nov 26, 2023
f79a775
Remove macro and replace _WIN64 with _WIN32
hmelder Nov 27, 2023
d96c298
Remove argument from non-win32 macro
hmelder Nov 27, 2023
59d815f
Merge branch 'master' into woa_support
hmelder Nov 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CMake/detect_arch.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// detect_arch.c
#if defined(__aarch64__)
#error aarch64
#elif defined(__arm__)
#error arm
#elif defined(__i386__)
#error i386
#elif defined(__x86_64__)
#error x86_64
#else
#error unknown
#endif
21 changes: 19 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,21 @@ if (MSVC)
set(objc_LINK_FLAGS "/DEBUG /INCREMENTAL:NO ${objc_LINK_FLAGS}")
endif()

# Get Architecture without relying on CMake
hmelder marked this conversation as resolved.
Show resolved Hide resolved
try_compile(
COMPILE_SUCCESS
${CMAKE_BINARY_DIR}
${CMAKE_SOURCE_DIR}/CMake/detect_arch.c
OUTPUT_VARIABLE COMPILE_OUTPUT
)

if(NOT COMPILE_SUCCESS)
string(REGEX MATCH "(aarch64|arm|i386|x86_64|unknown)" ARCHITECTURE ${COMPILE_OUTPUT})
endif()

set(ARCHITECTURE ${ARCHITECTURE} CACHE STRING "Architecture Type")
message(STATUS "Architecture: ${ARCHITECTURE}")

# Build configuration
add_compile_definitions(GNUSTEP __OBJC_RUNTIME_INTERNAL__=1)

Expand Down Expand Up @@ -215,6 +230,10 @@ target_sources(objc PRIVATE ${libobjc_CXX_SRCS})

include(FindThreads)
target_link_libraries(objc Threads::Threads)
# Link against ntdll.dll for RtlRaiseException
if (WIN32)
target_link_libraries(objc ntdll.dll)
endif()


set_target_properties(objc PROPERTIES
Expand Down Expand Up @@ -342,8 +361,6 @@ configure_file(
add_custom_target(uninstall
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)



if (TESTS)
enable_testing()
add_subdirectory(Test)
Expand Down
6 changes: 3 additions & 3 deletions Test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ if (ENABLE_ALL_OBJC_ARC_TESTS)
endif()

# UnexpectedException test currently fails on ARM and needs to be fixed
if (NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)")
list(APPEND TESTS UnexpectedException.m)
endif ()
if(NOT ARCHITECTURE MATCHES "^(arm|aarch64)")
list(APPEND TESTS UnexpectedException.m)
endif()

# List of single-file tests that won't work with the legacy ABI and so
# shouldn't be run in legacy mode.
Expand Down
18 changes: 13 additions & 5 deletions block_to_imp.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#include <unistd.h>
#include <sys/types.h>
#include <sys/mman.h>
#else
#include "safewindows.h"
#endif
#include "objc/runtime.h"
#include "objc/blocks_runtime.h"
Expand All @@ -22,11 +24,17 @@
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
#if __has_builtin(__builtin___clear_cache)
# define clear_cache __builtin___clear_cache

#if defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
static inline void __clear_cache(void* start, void* end) {
FlushInstructionCache(GetCurrentProcess(), start, end - start);
}
#define clear_cache __clear_cache
#elif __has_builtin(__builtin___clear_cache)
#define clear_cache __builtin___clear_cache
#else
void __clear_cache(void* start, void* end);
# define clear_cache __clear_cache
void __clear_cache(void* start, void* end);
#define clear_cache __clear_cache
#endif


Expand All @@ -36,7 +44,6 @@ void __clear_cache(void* start, void* end);
#endif

#ifdef _WIN32
#include "safewindows.h"
#if defined(WINAPI_FAMILY) && WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP && _WIN32_WINNT >= 0x0A00
// Prefer the *FromApp versions when we're being built in a Windows Store App context on
// Windows >= 10. *FromApp require the application to be manifested for "codeGeneration".
Expand Down Expand Up @@ -178,6 +185,7 @@ static struct trampoline_set *alloc_trampolines(char *start, char *end)
metadata->buffers->headers[HEADERS_PER_PAGE-1].block = NULL;
mprotect(metadata->buffers->rx_buffer, PAGE_SIZE, PROT_READ | PROT_EXEC);
clear_cache(metadata->buffers->rx_buffer, &metadata->buffers->rx_buffer[PAGE_SIZE]);

return metadata;
}

Expand Down
188 changes: 158 additions & 30 deletions objc_msgSend.aarch64.S
Original file line number Diff line number Diff line change
@@ -1,15 +1,88 @@
#define ARGUMENT_SPILL_SIZE (8*10 + 8*16)
.macro MSGSEND receiver, sel
.cfi_startproc
cbz \receiver, 4f // Skip everything if the receiver is nil

/* Windows ARM64 Exception Handling
*
* Structured Exception Handling (SEH) on Windows ARM64 differs from the x64
* implementation. Functions consist of a single prologue and zero or more
* epilogues. Instead of using offsets for the .seh* directives to manipulate the
* stack frame, each directive corresponds to a single instruction.
*
* This presents a challenge for our objc_msgSend function, which only modifies
* the stack when a slow lookup is needed (see label "5").
*
* To address this, we move the directive marking the start of a function deep
* into the msgSend body to prevent marking every instruction as ".seh_nop."
*
* For Windows:
* - EH_START(x): Start of function (no effect on Windows)
* - EH_END(x): End of function (no effect on Windows)
* - EH_START_AT_OFFSET(x): Mark Start of function (Delayed)
* - EH_END_AT_OFFSET(x): Mark End of function (Delayed)
* - EH_END_PROLOGUE: End of function prologue
* - EH_START_EPILOGUE: Start of function epilogue
* - EH_END_EPILOGUE: End of function epilogue
* - EH_SAVE_FP_LR(x): Save Frame Pointer and Link Register
* - EH_STACK_ALLOC(x): Stack allocation (inside prologue)
* - EH_ADD_FP(x): Add to Frame Pointer
* - EH_NOP: Mark instruction with no unwinding relevance
*
* For non-64-bit Windows systems or other platforms, these macros have no effect and can be used without causing issues.
*/

#ifdef _WIN32
# define EH_START
# define EH_END

# define EH_START_AT_OFFSET .seh_proc objc_msgSend
# define EH_END_AT_OFFSET .seh_endproc objc_msgSend

# define EH_END_PROLOGUE .seh_endprologue
# define EH_START_EPILOGUE .seh_startepilogue
# define EH_END_EPILOGUE .seh_endepilogue

# define EH_SAVE_FP_LR(x) .seh_save_fplr x
# define EH_STACK_ALLOC(x) .seh_stackalloc x
# define EH_ADD_FP(x) .seh_add_fp x

# define EH_NOP .seh_nop
#else
// Marks the real start and end of the function
# define EH_START .cfi_startproc
# define EH_END .cfi_endproc

// The following directives are either not
// needed or not available with CFI
# define EH_START_AT_OFFSET
# define EH_END_AT_OFFSET
# define EH_END_PROLOGUE
# define EH_START_EPILOGUE
# define EH_END_EPILOGUE
# define EH_SAVE_FP_LR(x)
# define EH_STACK_ALLOC(x)
# define EH_ADD_FP(x)
# define EH_NOP
#endif

.globl CDECL(objc_msgSend_fpret)
TYPE_DIRECTIVE(CDECL(objc_msgSend_fpret), %function)
.globl CDECL(objc_msgSend)
TYPE_DIRECTIVE(CDECL(objc_msgSend), %function)
.globl CDECL(objc_msgSend_stret)
TYPE_DIRECTIVE(CDECL(objc_msgSend_stret), %function)
CDECL(objc_msgSend):
CDECL(objc_msgSend_fpret):
CDECL(objc_msgSend_stret):
EH_START

cbz x0, 4f // Skip everything if the receiver is nil
// Jump to 6: if this is a small object
ubfx x9, \receiver, #0, #SMALLOBJ_BITS
ubfx x9, x0, #0, #SMALLOBJ_BITS
cbnz x9, 6f

ldr x9, [\receiver] // Load class to x9 if not a small int
ldr x9, [x0] // Load class to x9 if not a small int
1:
ldr x9, [x9, #DTABLE_OFFSET] // Dtable -> x9
ldr w10, [\sel] // selector->index -> x10
ldr w10, [x1] // selector->index -> x10
ldr w11, [x9, #SHIFT_OFFSET] // dtable->shift -> x11

cmp x11, #8 // If this is a small dtable, jump to the
Expand Down Expand Up @@ -41,59 +114,114 @@
mov v0.d[1], x0
br lr
5: // Slow lookup
EH_START_AT_OFFSET

// Save anything that will be clobbered by
// the call
// the call.
// Note that we pre-index (see "!"), meaning
// that we adjust the sp before storing the pair
// of registers.
stp x0, x1, [sp, #-(ARGUMENT_SPILL_SIZE)]!
stp x2, x3, [sp, #16] // The order is arbitrary, except that
stp x4, x5, [sp, #32] // fp and lr must be spilled together and
stp x6, x7, [sp, #48] // it's convenient if \receiver is spilled at sp
EH_STACK_ALLOC((ARGUMENT_SPILL_SIZE))

stp x2, x3, [sp, #16]
EH_NOP // The following instructions can be ignored by SEH
stp x4, x5, [sp, #32]
EH_NOP
stp x6, x7, [sp, #48]
EH_NOP
stp q0, q1, [sp, #64]
EH_NOP
stp q2, q3, [sp, #96]
EH_NOP
stp q4, q5, [sp, #128]
EH_NOP
stp q6, q7, [sp, #160]
stp fp, lr, [sp, #192]
add fp, sp, 192
stp \receiver, x8, [sp, #-16]!
EH_NOP
stp fp, lr, [sp, #192] // The order is arbitrary, except that
EH_SAVE_FP_LR(192) // fp and lr must be spilled together

add fp, sp, 192 // Adjust frame pointer
EH_ADD_FP(192)
stp x0, x8, [sp, #-16]! // it's convenient if x0 is spilled at sp
EH_STACK_ALLOC(16) // stp performed pre-indexing by sp-16

EH_END_PROLOGUE

#ifndef _WIN32
.cfi_def_cfa fp, 16
.cfi_offset fp, -16
.cfi_offset lr, -8
#endif
// We now have all argument registers, the link
// register and the receiver spilled on the
// stack, with sp containing
// the address of the receiver

mov x0, sp // &self, _cmd in arguments
mov x1, \sel
bl CDECL(slowMsgLookup) // This is the only place where the CFI directives
mov x1, x1
bl CDECL(slowMsgLookup) // This is the only place where the EH directives
// have to be accurate...
mov x9, x0 // IMP -> x9

EH_START_EPILOGUE
ldp x0, x1, [sp, #16] // Reload spilled argument registers
EH_NOP
ldp x2, x3, [sp, #32]
EH_NOP
ldp x4, x5, [sp, #64]
EH_NOP
ldp x6, x7, [sp, #64]
EH_NOP
ldp q0, q1, [sp, #80]
EH_NOP
ldp q2, q3, [sp, #112]
EH_NOP
ldp q4, q5, [sp, #144]
EH_NOP
ldp q6, q7, [sp, #176]
EH_NOP
ldp fp, lr, [sp, #208]
ldp \receiver, x8, [sp], #(ARGUMENT_SPILL_SIZE + 16)
EH_SAVE_FP_LR(208)

// Post-increment sp += ARGUMENT_SPILL_SIZE +16
ldp x0, x8, [sp], #(ARGUMENT_SPILL_SIZE + 16)
EH_STACK_ALLOC((ARGUMENT_SPILL_SIZE + 16))

EH_END_EPILOGUE
EH_END_AT_OFFSET

br x9
6:
adrp x10, :got:SmallObjectClasses
ldr x10, [x10, :got_lo12:SmallObjectClasses]
// Load 63:12 of SmallObjectClasses address
// We use the CDECL macro as Windows prefixes
// cdecl conforming symbols with "_".
adrp x10, CDECL(SmallObjectClasses) // The macro handles this transparently.

// Add lower 12-bits of SmallObjectClasses address to x10
add x10, x10, :lo12:CDECL(SmallObjectClasses)
ldr x9, [x10, x9, lsl #3]

b 1b
.cfi_endproc
.endm
EH_END

.globl CDECL(objc_msgSend_fpret)
TYPE_DIRECTIVE(CDECL(objc_msgSend_fpret), %function)
.globl CDECL(objc_msgSend)
TYPE_DIRECTIVE(CDECL(objc_msgSend), %function)
.globl CDECL(objc_msgSend_stret)
TYPE_DIRECTIVE(CDECL(objc_msgSend_stret), %function)
CDECL(objc_msgSend):
CDECL(objc_msgSend_fpret):
CDECL(objc_msgSend_stret):
MSGSEND x0, x1
#ifdef _WIN32
.text
.def objc_msgSend;
.scl 2;
.type 32;
.endef
hmelder marked this conversation as resolved.
Show resolved Hide resolved
.def objc_msgSend_fpret;
.scl 2;
.type 32;
.endef
.def objc_msgSend_stret;
.scl 2;
.type 32;
.endef

.section .drectve,"yn"
.ascii " /EXPORT:objc_msgSend"
.ascii " /EXPORT:objc_msgSend_fpret"
.ascii " /EXPORT:objc_msgSend_stret"
#endif