From 8ec7ce857b9488d5f2619ae5c59b3c5ae79be7b1 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Fri, 10 Jul 2020 19:17:04 -0400 Subject: [PATCH] Some minimal support for Apple Silicon With this and some hacks to various dependencies, Julia builds for Darwin ARM64. We don't get very far though, and quickly end up jumping into a bit of uninitialized JIT memory, so there's clearly more work to be done here. That said, let's take this one step at a time, so here's the few pieces that at least make it build. --- src/processor_arm.cpp | 2 +- src/signal-handling.c | 4 +- src/signals-mach.c | 86 ++++++++++++++++++++++++++++++++----------- src/signals-unix.c | 12 +++++- 4 files changed, 78 insertions(+), 26 deletions(-) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index b834e32271a78..c83110b9962b4 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -11,7 +11,7 @@ // This nesting is required to allow compilation on musl #define USE_DYN_GETAUXVAL -#if defined(_CPU_AARCH64_) +#if defined(_OS_LINUX_) && defined(_CPU_AARCH64_) # undef USE_DYN_GETAUXVAL # include #elif defined(__GLIBC_PREREQ) diff --git a/src/signal-handling.c b/src/signal-handling.c index 148b1598c2996..759f143849c76 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -127,8 +127,10 @@ static uintptr_t jl_get_pc_from_ctx(const void *_ctx) return ((ucontext_t*)_ctx)->uc_mcontext.gregs[REG_EIP]; #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_) return ((ucontext_t*)_ctx)->uc_mcontext.mc_eip; -#elif defined(_OS_DARWIN_) +#elif defined(_OS_DARWIN_) && defined(_CPU_x86_64_) return ((ucontext64_t*)_ctx)->uc_mcontext64->__ss.__rip; +#elif defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) + return ((ucontext64_t*)_ctx)->uc_mcontext64->__ss.__pc; #elif defined(_OS_WINDOWS_) && defined(_CPU_X86_) return ((CONTEXT*)_ctx)->Eip; #elif defined(_OS_WINDOWS_) && defined(_CPU_X86_64_) diff --git a/src/signals-mach.c b/src/signals-mach.c index 280ce00236624..eafe30d590b1f 100644 --- a/src/signals-mach.c +++ b/src/signals-mach.c @@ -112,13 +112,30 @@ static kern_return_t profiler_segv_handler mach_msg_type_number_t code_count); #endif +#if defined(_CPU_X86_64_) +typedef x86_thread_state64_t host_thread_state_t; +typedef x86_exception_state64_t host_exception_state_t; +#define THREAD_STATE x86_THREAD_STATE64 +#define THREAD_STATE_COUNT x86_THREAD_STATE64_COUNT +#define HOST_EXCEPTION_STATE x86_EXCEPTION_STATE64 +#define HOST_EXCEPTION_STATE_COUNT x86_EXCEPTION_STATE64_COUNT + enum x86_trap_flags { USER_MODE = 0x4, WRITE_FAULT = 0x2, PAGE_PRESENT = 0x1 }; -static void jl_call_in_state(jl_ptls_t ptls2, x86_thread_state64_t *state, +#elif defined(_CPU_AARCH64_) +typedef arm_thread_state64_t host_thread_state_t; +typedef arm_exception_state64_t host_exception_state_t; +#define THREAD_STATE ARM_THREAD_STATE64 +#define THREAD_STATE_COUNT ARM_THREAD_STATE64_COUNT +#define HOST_EXCEPTION_STATE ARM_EXCEPTION_STATE64 +#define HOST_EXCEPTION_STATE_COUNT ARM_EXCEPTION_STATE64_COUNT +#endif + +static void jl_call_in_state(jl_ptls_t ptls2, host_thread_state_t *state, void (*fptr)(void)) { uint64_t rsp = (uint64_t)ptls2->signal_stack + sig_stack_size; @@ -128,15 +145,20 @@ static void jl_call_in_state(jl_ptls_t ptls2, x86_thread_state64_t *state, rsp -= sizeof(void*); *(void**)rsp = NULL; +#ifdef _CPU_X86_64_ state->__rsp = rsp; // set stack pointer state->__rip = (uint64_t)fptr; // "call" the function +#else + state->__sp = rsp; + state->__pc = (uint64_t)fptr; +#endif } static void jl_throw_in_thread(int tid, mach_port_t thread, jl_value_t *exception) { - unsigned int count = MACHINE_THREAD_STATE_COUNT; - x86_thread_state64_t state; - kern_return_t ret = thread_get_state(thread, x86_THREAD_STATE64, (thread_state_t)&state, &count); + unsigned int count = THREAD_STATE_COUNT; + host_thread_state_t state; + kern_return_t ret = thread_get_state(thread, THREAD_STATE, (thread_state_t)&state, &count); HANDLE_MACH_ERROR("thread_get_state", ret); jl_ptls_t ptls2 = jl_all_tls_states[tid]; if (!ptls2->safe_restore) { @@ -146,7 +168,7 @@ static void jl_throw_in_thread(int tid, mach_port_t thread, jl_value_t *exceptio ptls2->sig_exception = exception; } jl_call_in_state(ptls2, &state, &jl_sig_throw); - ret = thread_set_state(thread, x86_THREAD_STATE64, + ret = thread_set_state(thread, THREAD_STATE, (thread_state_t)&state, count); HANDLE_MACH_ERROR("thread_set_state", ret); } @@ -160,10 +182,10 @@ kern_return_t catch_exception_raise(mach_port_t exception_port, exception_data_t code, mach_msg_type_number_t code_count) { - unsigned int count = MACHINE_THREAD_STATE_COUNT; - unsigned int exc_count = X86_EXCEPTION_STATE64_COUNT; - x86_exception_state64_t exc_state; - x86_thread_state64_t state; + unsigned int count = THREAD_STATE_COUNT; + unsigned int exc_count = HOST_EXCEPTION_STATE_COUNT; + host_exception_state_t exc_state; + host_thread_state_t state; #ifdef LIBOSXUNWIND if (thread == mach_profiler_thread) { return profiler_segv_handler(exception_port, thread, task, exception, code, code_count); @@ -190,9 +212,13 @@ kern_return_t catch_exception_raise(mach_port_t exception_port, return KERN_SUCCESS; } assert(exception == EXC_BAD_ACCESS); - kern_return_t ret = thread_get_state(thread, x86_EXCEPTION_STATE64, (thread_state_t)&exc_state, &exc_count); + kern_return_t ret = thread_get_state(thread, HOST_EXCEPTION_STATE, (thread_state_t)&exc_state, &exc_count); HANDLE_MACH_ERROR("thread_get_state", ret); +#ifdef _CPU_X86_64_ uint64_t fault_addr = exc_state.__faultvaddr; +#else + uint64_t fault_addr = exc_state.__far; +#endif if (jl_addr_is_safepoint(fault_addr)) { if (jl_mach_gc_wait(ptls2, thread, tid)) return KERN_SUCCESS; @@ -236,7 +262,7 @@ kern_return_t catch_exception_raise(mach_port_t exception_port, return KERN_SUCCESS; } else { - kern_return_t ret = thread_get_state(thread, x86_THREAD_STATE64, (thread_state_t)&state, &count); + kern_return_t ret = thread_get_state(thread, THREAD_STATE, (thread_state_t)&state, &count); HANDLE_MACH_ERROR("thread_get_state", ret); jl_critical_error(SIGSEGV, (unw_context_t*)&state, ptls2->bt_data, &ptls2->bt_size); @@ -264,12 +290,12 @@ static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx) HANDLE_MACH_ERROR("thread_suspend", ret); // Do the actual sampling - unsigned int count = MACHINE_THREAD_STATE_COUNT; + unsigned int count = THREAD_STATE_COUNT; static unw_context_t state; memset(&state, 0, sizeof(unw_context_t)); // Get the state of the suspended thread - ret = thread_get_state(tid_port, x86_THREAD_STATE64, (thread_state_t)&state, &count); + ret = thread_get_state(tid_port, THREAD_STATE, (thread_state_t)&state, &count); // Initialize the unwind context with the suspend thread's state *ctx = &state; @@ -325,9 +351,9 @@ static void jl_exit_thread0(int exitstate) ret = thread_abort(thread); HANDLE_MACH_ERROR("thread_abort", ret); - unsigned int count = MACHINE_THREAD_STATE_COUNT; - x86_thread_state64_t state; - ret = thread_get_state(thread, x86_THREAD_STATE64, + unsigned int count = THREAD_STATE_COUNT; + host_thread_state_t state; + ret = thread_get_state(thread, THREAD_STATE, (thread_state_t)&state, &count); void (*exit_func)(int) = &_exit; @@ -338,10 +364,16 @@ static void jl_exit_thread0(int exitstate) exit_func = &exit; } +#ifdef _CPU_X86_64_ // First integer argument. Not portable but good enough =) state.__rdi = exitstate; +#elif defined(_CPU_AARCH64_) + state.__x[0] = exitstate; +#else +#error Fill in first integer argument here +#endif jl_call_in_state(ptls2, &state, (void (*)(void))exit_func); - ret = thread_set_state(thread, x86_THREAD_STATE64, + ret = thread_set_state(thread, THREAD_STATE, (thread_state_t)&state, count); HANDLE_MACH_ERROR("thread_set_state", ret); @@ -368,7 +400,7 @@ static kern_return_t profiler_segv_handler mach_msg_type_number_t code_count) { assert(thread == mach_profiler_thread); - x86_thread_state64_t state; + host_thread_state_t state; // Not currently unwinding. Raise regular segfault if (forceDwarf == -2) @@ -379,24 +411,34 @@ static kern_return_t profiler_segv_handler else forceDwarf = -1; - unsigned int count = MACHINE_THREAD_STATE_COUNT; + unsigned int count = THREAD_STATE_COUNT; - thread_get_state(thread, x86_THREAD_STATE64, (thread_state_t)&state, &count); + thread_get_state(thread, THREAD_STATE, (thread_state_t)&state, &count); +#ifdef _CPU_X86_64_ // don't change cs fs gs rflags uint64_t cs = state.__cs; uint64_t fs = state.__fs; uint64_t gs = state.__gs; uint64_t rflags = state.__rflags; +#elif defined(_CPU_AARCH64_) + uint64_t cpsr = state.__cpsr; +#else +#error Unknown CPU +#endif - memcpy(&state, &profiler_uc, sizeof(x86_thread_state64_t)); + memcpy(&state, &profiler_uc, sizeof(state)); +#ifdef _CPU_X86_64_ state.__cs = cs; state.__fs = fs; state.__gs = gs; state.__rflags = rflags; +#else + state.__cpsr = cpsr; +#endif - kern_return_t ret = thread_set_state(thread, x86_THREAD_STATE64, (thread_state_t)&state, count); + kern_return_t ret = thread_set_state(thread, THREAD_STATE, (thread_state_t)&state, count); HANDLE_MACH_ERROR("thread_set_state", ret); return KERN_SUCCESS; diff --git a/src/signals-unix.c b/src/signals-unix.c index cd86a77023347..c3d93735d1168 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -72,9 +72,12 @@ static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void * #elif defined(_OS_LINUX_) && defined(_CPU_ARM_) const ucontext_t *ctx = (const ucontext_t*)_ctx; return ctx->uc_mcontext.arm_sp; -#elif defined(_OS_DARWIN_) +#elif defined(_OS_DARWIN_) && defined(_CPU_X86_64_) const ucontext64_t *ctx = (const ucontext64_t*)_ctx; return ctx->uc_mcontext64->__ss.__rsp; +#elif defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) + const ucontext64_t *ctx = (const ucontext64_t*)_ctx; + return ctx->uc_mcontext64->__ss.__sp; #else // TODO Add support for FreeBSD and PowerPC(64)? return 0; @@ -150,7 +153,7 @@ static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_c ctx->uc_mcontext.arm_sp = rsp; ctx->uc_mcontext.arm_lr = 0; // Clear link register ctx->uc_mcontext.arm_pc = target; -#elif defined(_OS_DARWIN_) +#elif defined(_OS_DARWIN_) && (defined(_CPU_X86_64_) || defined(_CPU_AARCH64_)) // Only used for SIGFPE. // This doesn't seems to be reliable when the SIGFPE is generated // from a divide-by-zero exception, which is now handled by @@ -159,8 +162,13 @@ static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_c ucontext64_t *ctx = (ucontext64_t*)_ctx; rsp -= sizeof(void*); *(void**)rsp = NULL; +#if defined(_CPU_X86_64_) ctx->uc_mcontext64->__ss.__rsp = rsp; ctx->uc_mcontext64->__ss.__rip = (uintptr_t)fptr; +#else + ctx->uc_mcontext64->__ss.__sp = rsp; + ctx->uc_mcontext64->__ss.__pc = (uintptr_t)fptr; +#endif #else #warning "julia: throw-in-context not supported on this platform" // TODO Add support for PowerPC(64)?