From 03d58b0746954ad11df58f9b71711e860a0078e8 Mon Sep 17 00:00:00 2001 From: Benjamin Oldenburg Date: Sat, 4 Apr 2026 16:29:28 +0200 Subject: [PATCH] arm64-win32 support : runtime --- include/tccdefs.h | 4 +- lib/bt-exe.c | 29 ++++++++ lib/libtcc1.c | 8 +- win32/include/_mingw.h | 9 +++ win32/include/setjmp.h | 49 ++++++++++++ win32/include/winapi/winnt.h | 139 +++++++++++++++++++++++++++++++++-- win32/lib/chkstk.S | 89 +++++++++++++++++++++- 7 files changed, 317 insertions(+), 10 deletions(-) diff --git a/include/tccdefs.h b/include/tccdefs.h index 45aa2c0b..57d1380a 100644 --- a/include/tccdefs.h +++ b/include/tccdefs.h @@ -224,7 +224,9 @@ &~3), *(type *)(ap - ((sizeof(type)+3)&~3))) #elif defined __aarch64__ -#if defined __APPLE__ +#if defined _WIN32 + typedef char *__builtin_va_list; +#elif defined __APPLE__ typedef struct { void *__stack; } __builtin_va_list; diff --git a/lib/bt-exe.c b/lib/bt-exe.c index 71c9c19c..60bf80e4 100644 --- a/lib/bt-exe.c +++ b/lib/bt-exe.c @@ -11,6 +11,21 @@ # define __declspec(n) #endif +#ifdef _WIN64 +static void bt_init_pe_prog_base(rt_context *p) +{ + MEMORY_BASIC_INFORMATION mbi; + addr_t imagebase; + + if (!p->prog_base) + return; + if (!VirtualQuery(p, &mbi, sizeof(mbi)) || !mbi.AllocationBase) + return; + imagebase = (addr_t)mbi.AllocationBase - p->prog_base; + p->prog_base = (addr_t)mbi.AllocationBase - (imagebase & 0xffffffffu); +} +#endif + __declspec(dllexport) void __bt_init(rt_context *p, int is_exe) { @@ -24,6 +39,10 @@ void __bt_init(rt_context *p, int is_exe) if (p->bounds_start) __bound_init(p->bounds_start, -1); +#ifdef _WIN64 + bt_init_pe_prog_base(p); +#endif + /* add to chain */ rt_wait_sem(); p->next = g_rc, g_rc = p; @@ -66,3 +85,13 @@ ST_FUNC char *pstrcpy(char *buf, size_t buf_size, const char *s) buf[l] = 0; return buf; } + +#if defined(_WIN64) && defined(__aarch64__) +/* The bt-only Windows ARM64 build should not rely on importing this helper. */ +LONG InterlockedExchange(LONG volatile *Target, LONG Value) +{ + LONG Old = *Target; + *Target = Value; + return Old; +} +#endif diff --git a/lib/libtcc1.c b/lib/libtcc1.c index 85c95df9..4bbd1cc7 100644 --- a/lib/libtcc1.c +++ b/lib/libtcc1.c @@ -630,6 +630,12 @@ long long __fixxfdi (long double a1) /* MSVC x64 intrinsic */ void __faststorefence(void) { - __asm__("lock; orl $0,(%rsp)"); +#if defined(__aarch64__) + /* ARM64: Data Memory Barrier (Inner Shareable) */ + __asm__("dmb ish"); +#else + /* x86-64: lock prefix to flush store buffer */ + __asm__("lock; orl $0,(%%rsp)" ::: "memory"); +#endif } #endif diff --git a/win32/include/_mingw.h b/win32/include/_mingw.h index d10a6b18..77a3bce6 100644 --- a/win32/include/_mingw.h +++ b/win32/include/_mingw.h @@ -70,12 +70,21 @@ #ifdef _WIN64 #define __stdcall +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_ARM64_) +#ifndef _M_ARM64 +#define _M_ARM64 1 +#endif +#ifndef _ARM64_ +#define _ARM64_ 1 +#endif +#else #define _AMD64_ 1 #define __x86_64 1 #define _M_X64 100 /* Visual Studio */ #define _M_AMD64 100 /* Visual Studio */ #define USE_MINGW_SETJMP_TWO_ARGS #define mingw_getsp tinyc_getbp +#endif #else #define __stdcall __attribute__((__stdcall__)) #define _X86_ 1 diff --git a/win32/include/setjmp.h b/win32/include/setjmp.h index e4f142a3..dc971c77 100644 --- a/win32/include/setjmp.h +++ b/win32/include/setjmp.h @@ -124,6 +124,55 @@ extern "C" { SETJMP_FLOAT128 Xmm14; SETJMP_FLOAT128 Xmm15; } _JUMP_BUFFER; +#elif defined(_ARM_) + +#define _JBLEN 28 +#define _JBTYPE int + + typedef struct __JUMP_BUFFER { + unsigned long Frame; + unsigned long R4; + unsigned long R5; + unsigned long R6; + unsigned long R7; + unsigned long R8; + unsigned long R9; + unsigned long R10; + unsigned long R11; + unsigned long Sp; + unsigned long Pc; + unsigned long Fpscr; + unsigned long long D[8]; + } _JUMP_BUFFER; +#elif defined(_ARM64_) + +#define _JBLEN 24 +#define _JBTYPE unsigned __int64 + + typedef struct __JUMP_BUFFER { + unsigned __int64 Frame; + unsigned __int64 Reserved; + unsigned __int64 X19; + unsigned __int64 X20; + unsigned __int64 X21; + unsigned __int64 X22; + unsigned __int64 X23; + unsigned __int64 X24; + unsigned __int64 X25; + unsigned __int64 X26; + unsigned __int64 X27; + unsigned __int64 X28; + unsigned __int64 Fp; + unsigned __int64 Lr; + unsigned __int64 Sp; + unsigned long Fpcr; + unsigned long Fpsr; + double D[8]; + } _JUMP_BUFFER; +#else + +#define _JBLEN 1 +#define _JBTYPE int #endif #ifndef _JMP_BUF_DEFINED typedef _JBTYPE jmp_buf[_JBLEN]; diff --git a/win32/include/winapi/winnt.h b/win32/include/winapi/winnt.h index fb90d216..01a850a8 100644 --- a/win32/include/winapi/winnt.h +++ b/win32/include/winapi/winnt.h @@ -21,7 +21,7 @@ extern "C" { #define __CRT_UNALIGNED #endif -#if defined(__ia64__) || defined(__x86_64) +#if defined(__ia64__) || defined(__x86_64) || defined(__aarch64__) #define UNALIGNED __CRT_UNALIGNED #ifdef _WIN64 #define UNALIGNED64 __CRT_UNALIGNED @@ -47,6 +47,9 @@ extern "C" { #endif #endif +#if !defined(I_X86_) && !defined(_IA64_) && !defined(_AMD64_) && defined(__aarch64__) && !defined(_ARM64_) +#define _ARM64_ +#endif #ifdef _WIN64 #define MAX_NATURAL_ALIGNMENT sizeof(ULONGLONG) @@ -65,7 +68,7 @@ extern "C" { #ifdef _WIN64 #ifdef _AMD64_ #define PROBE_ALIGNMENT(_s) TYPE_ALIGNMENT(DWORD) -#elif defined(_IA64_) +#elif defined(_IA64_) || defined(_ARM64_) #define PROBE_ALIGNMENT(_s) (TYPE_ALIGNMENT(_s) > TYPE_ALIGNMENT(DWORD) ? TYPE_ALIGNMENT(_s) : TYPE_ALIGNMENT(DWORD)) #else #error No Target Architecture @@ -79,7 +82,7 @@ extern "C" { #include -#if defined(_X86_) || defined(__ia64__) || defined(__x86_64) +#if defined(_X86_) || defined(__ia64__) || defined(__x86_64) || defined(__aarch64__) #define DECLSPEC_IMPORT __declspec(dllimport) #else #define DECLSPEC_IMPORT @@ -321,7 +324,7 @@ typedef DWORD LCID; #define Int32x32To64(a,b) (LONGLONG)((LONGLONG)(LONG)(a) *(LONG)(b)) #define UInt32x32To64(a,b) (ULONGLONG)((ULONGLONG)(DWORD)(a) *(DWORD)(b)) #define Int64ShrlMod32(a,b) ((DWORDLONG)(a)>>(b)) -#elif defined(__ia64__) || defined(__x86_64) +#elif defined(__ia64__) || defined(__x86_64) || defined(__aarch64__) #define Int32x32To64(a,b) ((LONGLONG)((LONG)(a)) *(LONGLONG)((LONG)(b))) #define UInt32x32To64(a,b) ((ULONGLONG)((DWORD)(a)) *(ULONGLONG)((DWORD)(b))) #define Int64ShrlMod32(a,b) ((ULONGLONG)(a) >> (b)) @@ -829,7 +832,7 @@ typedef DWORD LCID; typedef ULONG_PTR KSPIN_LOCK; typedef KSPIN_LOCK *PKSPIN_LOCK; -#ifdef _AMD64_ +#if defined(_AMD64_) || defined(_ARM64_) #if defined(__x86_64) && !defined(RC_INVOKED) @@ -1336,6 +1339,7 @@ typedef DWORD LCID; #define LEGACY_SAVE_AREA_LENGTH sizeof(XMM_SAVE_AREA32) +#if defined(__x86_64) || defined(_AMD64_) typedef struct DECLSPEC_ALIGN(16) _CONTEXT { DWORD64 P1Home; DWORD64 P2Home; @@ -1407,6 +1411,7 @@ typedef DWORD LCID; DWORD64 LastExceptionToRip; DWORD64 LastExceptionFromRip; } CONTEXT,*PCONTEXT; +#endif /* defined(__x86_64) || defined(_AMD64_) */ #define RUNTIME_FUNCTION_INDIRECT 0x1 @@ -1417,6 +1422,123 @@ typedef DWORD LCID; } RUNTIME_FUNCTION,*PRUNTIME_FUNCTION; typedef PRUNTIME_FUNCTION (*PGET_RUNTIME_FUNCTION_CALLBACK)(DWORD64 ControlPc,PVOID Context); + +#if defined(_ARM64_) || defined(__aarch64__) + +/* ARM64 Context Definition */ +#define CONTEXT_ARM64 0x00400000 + +#ifndef CONTEXT_CONTROL +#define CONTEXT_CONTROL (CONTEXT_ARM64 | 0x00000001L) +#endif +#ifndef CONTEXT_INTEGER +#define CONTEXT_INTEGER (CONTEXT_ARM64 | 0x00000002L) +#endif +#ifndef CONTEXT_FLOATING_POINT +#define CONTEXT_FLOATING_POINT (CONTEXT_ARM64 | 0x00000004L) +#endif +#ifndef CONTEXT_DEBUG +#define CONTEXT_DEBUG (CONTEXT_ARM64 | 0x00000008L) +#endif + +#ifndef CONTEXT_FULL +#define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) +#endif +#ifndef CONTEXT_ALL +#define CONTEXT_ALL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT | CONTEXT_DEBUG) +#endif + +#ifndef ARM64_MAX_BREAKPOINTS +#define ARM64_MAX_BREAKPOINTS 8 +#endif +#ifndef ARM64_MAX_WATCHPOINTS +#define ARM64_MAX_WATCHPOINTS 2 +#endif + +#ifndef _ARM64_NT_NEON128_DECLARED +#define _ARM64_NT_NEON128_DECLARED + typedef union _ARM64_NT_NEON128 { + struct { + ULONGLONG Low; + LONGLONG High; + } DUMMYSTRUCTNAME; + double D[2]; + float S[4]; + WORD H[8]; + BYTE B[16]; + } ARM64_NT_NEON128,*PARM64_NT_NEON128; +#endif + +#ifndef _ARM64_CONTEXT_DECLARED +#define _ARM64_CONTEXT_DECLARED + typedef struct DECLSPEC_ALIGN(16) _ARM64_NT_CONTEXT { + ULONG ContextFlags; + ULONG Cpsr; + union { + struct { + DWORD64 X0; + DWORD64 X1; + DWORD64 X2; + DWORD64 X3; + DWORD64 X4; + DWORD64 X5; + DWORD64 X6; + DWORD64 X7; + DWORD64 X8; + DWORD64 X9; + DWORD64 X10; + DWORD64 X11; + DWORD64 X12; + DWORD64 X13; + DWORD64 X14; + DWORD64 X15; + DWORD64 X16; + DWORD64 X17; + DWORD64 X18; + DWORD64 X19; + DWORD64 X20; + DWORD64 X21; + DWORD64 X22; + DWORD64 X23; + DWORD64 X24; + DWORD64 X25; + DWORD64 X26; + DWORD64 X27; + DWORD64 X28; + DWORD64 Fp; + DWORD64 Lr; + } DUMMYSTRUCTNAME; + DWORD64 X[31]; + } DUMMYUNIONNAME; + DWORD64 Sp; + DWORD64 Pc; + ARM64_NT_NEON128 V[32]; + DWORD Fpcr; + DWORD Fpsr; + DWORD Bcr[ARM64_MAX_BREAKPOINTS]; + DWORD64 Bvr[ARM64_MAX_BREAKPOINTS]; + DWORD Wcr[ARM64_MAX_WATCHPOINTS]; + DWORD64 Wvr[ARM64_MAX_WATCHPOINTS]; + } ARM64_NT_CONTEXT,*PARM64_NT_CONTEXT; + + C_ASSERT(sizeof(ARM64_NT_CONTEXT) == 0x390); + C_ASSERT(offsetof(ARM64_NT_CONTEXT, ContextFlags) == 0x000); + C_ASSERT(offsetof(ARM64_NT_CONTEXT, X) == 0x008); + C_ASSERT(offsetof(ARM64_NT_CONTEXT, Fp) == 0x0f0); + C_ASSERT(offsetof(ARM64_NT_CONTEXT, Lr) == 0x0f8); + C_ASSERT(offsetof(ARM64_NT_CONTEXT, Sp) == 0x100); + C_ASSERT(offsetof(ARM64_NT_CONTEXT, Pc) == 0x108); + C_ASSERT(offsetof(ARM64_NT_CONTEXT, V) == 0x110); + C_ASSERT(sizeof(((ARM64_NT_CONTEXT *)0)->V[0]) == 16); + C_ASSERT(offsetof(ARM64_NT_CONTEXT, Fpcr) == 0x310); + C_ASSERT(offsetof(ARM64_NT_CONTEXT, Fpsr) == 0x314); + C_ASSERT(offsetof(ARM64_NT_CONTEXT, Bvr) == 0x338); + C_ASSERT(offsetof(ARM64_NT_CONTEXT, Wvr) == 0x380); +#endif + + typedef ARM64_NT_CONTEXT CONTEXT,*PCONTEXT; + +#endif /* _ARM64_ || __aarch64__ */ typedef DWORD (*POUT_OF_PROCESS_FUNCTION_TABLE_CALLBACK)(HANDLE Process,PVOID TableAddress,PDWORD Entries,PRUNTIME_FUNCTION *Functions); #define OUT_OF_PROCESS_FUNCTION_TABLE_CALLBACK_EXPORT_NAME "OutOfProcessFunctionTableCallback" @@ -3701,6 +3823,7 @@ typedef DWORD LCID; #define IMAGE_FILE_MACHINE_CEF 0x0CEF #define IMAGE_FILE_MACHINE_EBC 0x0EBC #define IMAGE_FILE_MACHINE_AMD64 0x8664 +#define IMAGE_FILE_MACHINE_ARM64 0xAA64 #define IMAGE_FILE_MACHINE_M32R 0x9041 #define IMAGE_FILE_MACHINE_CEE 0xC0EE @@ -3857,10 +3980,16 @@ typedef DWORD LCID; #define IMAGE_SUBSYSTEM_EFI_ROM 13 #define IMAGE_SUBSYSTEM_XBOX 14 +#define IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA 0x0020 +#define IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE 0x0040 +#define IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY 0x0080 +#define IMAGE_DLLCHARACTERISTICS_NX_COMPAT 0x0100 #define IMAGE_DLLCHARACTERISTICS_NO_ISOLATION 0x0200 #define IMAGE_DLLCHARACTERISTICS_NO_SEH 0x0400 #define IMAGE_DLLCHARACTERISTICS_NO_BIND 0x0800 +#define IMAGE_DLLCHARACTERISTICS_APPCONTAINER 0x1000 #define IMAGE_DLLCHARACTERISTICS_WDM_DRIVER 0x2000 +#define IMAGE_DLLCHARACTERISTICS_GUARD_CF 0x4000 #define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE 0x8000 #define IMAGE_DIRECTORY_ENTRY_EXPORT 0 diff --git a/win32/lib/chkstk.S b/win32/lib/chkstk.S index 6f583a53..43e21634 100644 --- a/win32/lib/chkstk.S +++ b/win32/lib/chkstk.S @@ -8,7 +8,92 @@ #endif /* ---------------------------------------------- */ -#ifndef __x86_64__ +#if defined(__aarch64__) +/* ---------------------------------------------- */ + +.globl __chkstk +__chkstk: + /* Windows ARM64 stack probing helper. + arm64-gen.c passes the requested frame size in x15, scaled in 16-byte + units. Probe one 4 KiB page at a time and leave SP unchanged; the caller + subtracts SP after the probe returns. */ + mov x16, sp + lsl x17, x15, 4 + cbz x17, L_chkstk_done +L_chkstk_loop: + subs x0, x17, 4096 + bls L_chkstk_tail + sub x16, x16, 4096 + ldr xzr, [x16] + sub x17, x17, 4096 + b L_chkstk_loop +L_chkstk_tail: + sub x16, x16, x17 + ldr xzr, [x16] +L_chkstk_done: + ret + +.globl _(tinyc_getbp) +_(tinyc_getbp): + mov x0, x29 + ret + +.globl _(mingw_getsp) +_(mingw_getsp): + mov x0, sp + ret + +.globl _(__mingw_setjmp) +_(__mingw_setjmp): + /* _JUMP_BUFFER layout matches win32/include/setjmp.h for _ARM64_: + 0x00 Frame, 0x08 Reserved, 0x10-0x68 X19-X30, 0x70 Sp, + 0x78 Fpcr/Fpsr, 0x80-0xB8 D8-D15. */ + str xzr, [x0] /* Frame = 0 */ + stp x19, x20, [x0, 16] + stp x21, x22, [x0, 32] + stp x23, x24, [x0, 48] + stp x25, x26, [x0, 64] + stp x27, x28, [x0, 80] + stp x29, x30, [x0, 96] + mov x2, sp + str x2, [x0, 112] /* Sp */ + mrs x2, FPCR + str w2, [x0, 120] /* Fpcr */ + mrs x2, FPSR + str w2, [x0, 124] /* Fpsr */ + stp d8, d9, [x0, 128] + stp d10, d11, [x0, 144] + stp d12, d13, [x0, 160] + stp d14, d15, [x0, 176] + mov x0, 0 + ret + +.globl _(__mingw_longjmp) +_(__mingw_longjmp): + ldp x19, x20, [x0, 16] + ldp x21, x22, [x0, 32] + ldp x23, x24, [x0, 48] + ldp x25, x26, [x0, 64] + ldp x27, x28, [x0, 80] + ldp x29, x30, [x0, 96] + ldr x2, [x0, 112] /* Sp */ + mov sp, x2 + ldr w2, [x0, 120] /* Fpcr */ + msr FPCR, x2 + ldr w2, [x0, 124] /* Fpsr */ + msr FPSR, x2 + ldp d8, d9, [x0, 128] + ldp d10, d11, [x0, 144] + ldp d12, d13, [x0, 160] + ldp d14, d15, [x0, 176] + mov x0, x1 + cbnz x0, L_longjmp_done + mov x0, 1 +L_longjmp_done: + ret + +/* ---------------------------------------------- */ +#elif !defined(__x86_64__) /* ---------------------------------------------- */ .globl _(__chkstk) @@ -69,5 +154,3 @@ _(tinyc_getbp): /* ---------------------------------------------- */ #endif /* ---------------------------------------------- */ - -