arm64-win32 support : runtime

This commit is contained in:
Benjamin Oldenburg 2026-04-04 16:29:28 +02:00 committed by grischka
parent ff5d3b4874
commit 03d58b0746
7 changed files with 317 additions and 10 deletions

View File

@ -224,7 +224,9 @@
&~3), *(type *)(ap - ((sizeof(type)+3)&~3)))
#elif defined __aarch64__
#if defined __APPLE__
#if defined _WIN32
typedef char *__builtin_va_list;
#elif defined __APPLE__
typedef struct {
void *__stack;
} __builtin_va_list;

View File

@ -11,6 +11,21 @@
# define __declspec(n)
#endif
#ifdef _WIN64
static void bt_init_pe_prog_base(rt_context *p)
{
MEMORY_BASIC_INFORMATION mbi;
addr_t imagebase;
if (!p->prog_base)
return;
if (!VirtualQuery(p, &mbi, sizeof(mbi)) || !mbi.AllocationBase)
return;
imagebase = (addr_t)mbi.AllocationBase - p->prog_base;
p->prog_base = (addr_t)mbi.AllocationBase - (imagebase & 0xffffffffu);
}
#endif
__declspec(dllexport)
void __bt_init(rt_context *p, int is_exe)
{
@ -24,6 +39,10 @@ void __bt_init(rt_context *p, int is_exe)
if (p->bounds_start)
__bound_init(p->bounds_start, -1);
#ifdef _WIN64
bt_init_pe_prog_base(p);
#endif
/* add to chain */
rt_wait_sem();
p->next = g_rc, g_rc = p;
@ -66,3 +85,13 @@ ST_FUNC char *pstrcpy(char *buf, size_t buf_size, const char *s)
buf[l] = 0;
return buf;
}
#if defined(_WIN64) && defined(__aarch64__)
/* The bt-only Windows ARM64 build should not rely on importing this helper. */
LONG InterlockedExchange(LONG volatile *Target, LONG Value)
{
LONG Old = *Target;
*Target = Value;
return Old;
}
#endif

View File

@ -630,6 +630,12 @@ long long __fixxfdi (long double a1)
/* MSVC x64 intrinsic */
void __faststorefence(void)
{
__asm__("lock; orl $0,(%rsp)");
#if defined(__aarch64__)
/* ARM64: Data Memory Barrier (Inner Shareable) */
__asm__("dmb ish");
#else
/* x86-64: lock prefix to flush store buffer */
__asm__("lock; orl $0,(%%rsp)" ::: "memory");
#endif
}
#endif

View File

@ -70,12 +70,21 @@
#ifdef _WIN64
#define __stdcall
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_ARM64_)
#ifndef _M_ARM64
#define _M_ARM64 1
#endif
#ifndef _ARM64_
#define _ARM64_ 1
#endif
#else
#define _AMD64_ 1
#define __x86_64 1
#define _M_X64 100 /* Visual Studio */
#define _M_AMD64 100 /* Visual Studio */
#define USE_MINGW_SETJMP_TWO_ARGS
#define mingw_getsp tinyc_getbp
#endif
#else
#define __stdcall __attribute__((__stdcall__))
#define _X86_ 1

View File

@ -124,6 +124,55 @@ extern "C" {
SETJMP_FLOAT128 Xmm14;
SETJMP_FLOAT128 Xmm15;
} _JUMP_BUFFER;
#elif defined(_ARM_)
#define _JBLEN 28
#define _JBTYPE int
typedef struct __JUMP_BUFFER {
unsigned long Frame;
unsigned long R4;
unsigned long R5;
unsigned long R6;
unsigned long R7;
unsigned long R8;
unsigned long R9;
unsigned long R10;
unsigned long R11;
unsigned long Sp;
unsigned long Pc;
unsigned long Fpscr;
unsigned long long D[8];
} _JUMP_BUFFER;
#elif defined(_ARM64_)
#define _JBLEN 24
#define _JBTYPE unsigned __int64
typedef struct __JUMP_BUFFER {
unsigned __int64 Frame;
unsigned __int64 Reserved;
unsigned __int64 X19;
unsigned __int64 X20;
unsigned __int64 X21;
unsigned __int64 X22;
unsigned __int64 X23;
unsigned __int64 X24;
unsigned __int64 X25;
unsigned __int64 X26;
unsigned __int64 X27;
unsigned __int64 X28;
unsigned __int64 Fp;
unsigned __int64 Lr;
unsigned __int64 Sp;
unsigned long Fpcr;
unsigned long Fpsr;
double D[8];
} _JUMP_BUFFER;
#else
#define _JBLEN 1
#define _JBTYPE int
#endif
#ifndef _JMP_BUF_DEFINED
typedef _JBTYPE jmp_buf[_JBLEN];

View File

@ -21,7 +21,7 @@ extern "C" {
#define __CRT_UNALIGNED
#endif
#if defined(__ia64__) || defined(__x86_64)
#if defined(__ia64__) || defined(__x86_64) || defined(__aarch64__)
#define UNALIGNED __CRT_UNALIGNED
#ifdef _WIN64
#define UNALIGNED64 __CRT_UNALIGNED
@ -47,6 +47,9 @@ extern "C" {
#endif
#endif
#if !defined(I_X86_) && !defined(_IA64_) && !defined(_AMD64_) && defined(__aarch64__) && !defined(_ARM64_)
#define _ARM64_
#endif
#ifdef _WIN64
#define MAX_NATURAL_ALIGNMENT sizeof(ULONGLONG)
@ -65,7 +68,7 @@ extern "C" {
#ifdef _WIN64
#ifdef _AMD64_
#define PROBE_ALIGNMENT(_s) TYPE_ALIGNMENT(DWORD)
#elif defined(_IA64_)
#elif defined(_IA64_) || defined(_ARM64_)
#define PROBE_ALIGNMENT(_s) (TYPE_ALIGNMENT(_s) > TYPE_ALIGNMENT(DWORD) ? TYPE_ALIGNMENT(_s) : TYPE_ALIGNMENT(DWORD))
#else
#error No Target Architecture
@ -79,7 +82,7 @@ extern "C" {
#include <basetsd.h>
#if defined(_X86_) || defined(__ia64__) || defined(__x86_64)
#if defined(_X86_) || defined(__ia64__) || defined(__x86_64) || defined(__aarch64__)
#define DECLSPEC_IMPORT __declspec(dllimport)
#else
#define DECLSPEC_IMPORT
@ -321,7 +324,7 @@ typedef DWORD LCID;
#define Int32x32To64(a,b) (LONGLONG)((LONGLONG)(LONG)(a) *(LONG)(b))
#define UInt32x32To64(a,b) (ULONGLONG)((ULONGLONG)(DWORD)(a) *(DWORD)(b))
#define Int64ShrlMod32(a,b) ((DWORDLONG)(a)>>(b))
#elif defined(__ia64__) || defined(__x86_64)
#elif defined(__ia64__) || defined(__x86_64) || defined(__aarch64__)
#define Int32x32To64(a,b) ((LONGLONG)((LONG)(a)) *(LONGLONG)((LONG)(b)))
#define UInt32x32To64(a,b) ((ULONGLONG)((DWORD)(a)) *(ULONGLONG)((DWORD)(b)))
#define Int64ShrlMod32(a,b) ((ULONGLONG)(a) >> (b))
@ -829,7 +832,7 @@ typedef DWORD LCID;
typedef ULONG_PTR KSPIN_LOCK;
typedef KSPIN_LOCK *PKSPIN_LOCK;
#ifdef _AMD64_
#if defined(_AMD64_) || defined(_ARM64_)
#if defined(__x86_64) && !defined(RC_INVOKED)
@ -1336,6 +1339,7 @@ typedef DWORD LCID;
#define LEGACY_SAVE_AREA_LENGTH sizeof(XMM_SAVE_AREA32)
#if defined(__x86_64) || defined(_AMD64_)
typedef struct DECLSPEC_ALIGN(16) _CONTEXT {
DWORD64 P1Home;
DWORD64 P2Home;
@ -1407,6 +1411,7 @@ typedef DWORD LCID;
DWORD64 LastExceptionToRip;
DWORD64 LastExceptionFromRip;
} CONTEXT,*PCONTEXT;
#endif /* defined(__x86_64) || defined(_AMD64_) */
#define RUNTIME_FUNCTION_INDIRECT 0x1
@ -1417,6 +1422,123 @@ typedef DWORD LCID;
} RUNTIME_FUNCTION,*PRUNTIME_FUNCTION;
typedef PRUNTIME_FUNCTION (*PGET_RUNTIME_FUNCTION_CALLBACK)(DWORD64 ControlPc,PVOID Context);
#if defined(_ARM64_) || defined(__aarch64__)
/* ARM64 Context Definition */
#define CONTEXT_ARM64 0x00400000
#ifndef CONTEXT_CONTROL
#define CONTEXT_CONTROL (CONTEXT_ARM64 | 0x00000001L)
#endif
#ifndef CONTEXT_INTEGER
#define CONTEXT_INTEGER (CONTEXT_ARM64 | 0x00000002L)
#endif
#ifndef CONTEXT_FLOATING_POINT
#define CONTEXT_FLOATING_POINT (CONTEXT_ARM64 | 0x00000004L)
#endif
#ifndef CONTEXT_DEBUG
#define CONTEXT_DEBUG (CONTEXT_ARM64 | 0x00000008L)
#endif
#ifndef CONTEXT_FULL
#define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT)
#endif
#ifndef CONTEXT_ALL
#define CONTEXT_ALL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT | CONTEXT_DEBUG)
#endif
#ifndef ARM64_MAX_BREAKPOINTS
#define ARM64_MAX_BREAKPOINTS 8
#endif
#ifndef ARM64_MAX_WATCHPOINTS
#define ARM64_MAX_WATCHPOINTS 2
#endif
#ifndef _ARM64_NT_NEON128_DECLARED
#define _ARM64_NT_NEON128_DECLARED
typedef union _ARM64_NT_NEON128 {
struct {
ULONGLONG Low;
LONGLONG High;
} DUMMYSTRUCTNAME;
double D[2];
float S[4];
WORD H[8];
BYTE B[16];
} ARM64_NT_NEON128,*PARM64_NT_NEON128;
#endif
#ifndef _ARM64_CONTEXT_DECLARED
#define _ARM64_CONTEXT_DECLARED
typedef struct DECLSPEC_ALIGN(16) _ARM64_NT_CONTEXT {
ULONG ContextFlags;
ULONG Cpsr;
union {
struct {
DWORD64 X0;
DWORD64 X1;
DWORD64 X2;
DWORD64 X3;
DWORD64 X4;
DWORD64 X5;
DWORD64 X6;
DWORD64 X7;
DWORD64 X8;
DWORD64 X9;
DWORD64 X10;
DWORD64 X11;
DWORD64 X12;
DWORD64 X13;
DWORD64 X14;
DWORD64 X15;
DWORD64 X16;
DWORD64 X17;
DWORD64 X18;
DWORD64 X19;
DWORD64 X20;
DWORD64 X21;
DWORD64 X22;
DWORD64 X23;
DWORD64 X24;
DWORD64 X25;
DWORD64 X26;
DWORD64 X27;
DWORD64 X28;
DWORD64 Fp;
DWORD64 Lr;
} DUMMYSTRUCTNAME;
DWORD64 X[31];
} DUMMYUNIONNAME;
DWORD64 Sp;
DWORD64 Pc;
ARM64_NT_NEON128 V[32];
DWORD Fpcr;
DWORD Fpsr;
DWORD Bcr[ARM64_MAX_BREAKPOINTS];
DWORD64 Bvr[ARM64_MAX_BREAKPOINTS];
DWORD Wcr[ARM64_MAX_WATCHPOINTS];
DWORD64 Wvr[ARM64_MAX_WATCHPOINTS];
} ARM64_NT_CONTEXT,*PARM64_NT_CONTEXT;
C_ASSERT(sizeof(ARM64_NT_CONTEXT) == 0x390);
C_ASSERT(offsetof(ARM64_NT_CONTEXT, ContextFlags) == 0x000);
C_ASSERT(offsetof(ARM64_NT_CONTEXT, X) == 0x008);
C_ASSERT(offsetof(ARM64_NT_CONTEXT, Fp) == 0x0f0);
C_ASSERT(offsetof(ARM64_NT_CONTEXT, Lr) == 0x0f8);
C_ASSERT(offsetof(ARM64_NT_CONTEXT, Sp) == 0x100);
C_ASSERT(offsetof(ARM64_NT_CONTEXT, Pc) == 0x108);
C_ASSERT(offsetof(ARM64_NT_CONTEXT, V) == 0x110);
C_ASSERT(sizeof(((ARM64_NT_CONTEXT *)0)->V[0]) == 16);
C_ASSERT(offsetof(ARM64_NT_CONTEXT, Fpcr) == 0x310);
C_ASSERT(offsetof(ARM64_NT_CONTEXT, Fpsr) == 0x314);
C_ASSERT(offsetof(ARM64_NT_CONTEXT, Bvr) == 0x338);
C_ASSERT(offsetof(ARM64_NT_CONTEXT, Wvr) == 0x380);
#endif
typedef ARM64_NT_CONTEXT CONTEXT,*PCONTEXT;
#endif /* _ARM64_ || __aarch64__ */
typedef DWORD (*POUT_OF_PROCESS_FUNCTION_TABLE_CALLBACK)(HANDLE Process,PVOID TableAddress,PDWORD Entries,PRUNTIME_FUNCTION *Functions);
#define OUT_OF_PROCESS_FUNCTION_TABLE_CALLBACK_EXPORT_NAME "OutOfProcessFunctionTableCallback"
@ -3701,6 +3823,7 @@ typedef DWORD LCID;
#define IMAGE_FILE_MACHINE_CEF 0x0CEF
#define IMAGE_FILE_MACHINE_EBC 0x0EBC
#define IMAGE_FILE_MACHINE_AMD64 0x8664
#define IMAGE_FILE_MACHINE_ARM64 0xAA64
#define IMAGE_FILE_MACHINE_M32R 0x9041
#define IMAGE_FILE_MACHINE_CEE 0xC0EE
@ -3857,10 +3980,16 @@ typedef DWORD LCID;
#define IMAGE_SUBSYSTEM_EFI_ROM 13
#define IMAGE_SUBSYSTEM_XBOX 14
#define IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA 0x0020
#define IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE 0x0040
#define IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY 0x0080
#define IMAGE_DLLCHARACTERISTICS_NX_COMPAT 0x0100
#define IMAGE_DLLCHARACTERISTICS_NO_ISOLATION 0x0200
#define IMAGE_DLLCHARACTERISTICS_NO_SEH 0x0400
#define IMAGE_DLLCHARACTERISTICS_NO_BIND 0x0800
#define IMAGE_DLLCHARACTERISTICS_APPCONTAINER 0x1000
#define IMAGE_DLLCHARACTERISTICS_WDM_DRIVER 0x2000
#define IMAGE_DLLCHARACTERISTICS_GUARD_CF 0x4000
#define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE 0x8000
#define IMAGE_DIRECTORY_ENTRY_EXPORT 0

View File

@ -8,7 +8,92 @@
#endif
/* ---------------------------------------------- */
#ifndef __x86_64__
#if defined(__aarch64__)
/* ---------------------------------------------- */
.globl __chkstk
__chkstk:
/* Windows ARM64 stack probing helper.
arm64-gen.c passes the requested frame size in x15, scaled in 16-byte
units. Probe one 4 KiB page at a time and leave SP unchanged; the caller
subtracts SP after the probe returns. */
mov x16, sp
lsl x17, x15, 4
cbz x17, L_chkstk_done
L_chkstk_loop:
subs x0, x17, 4096
bls L_chkstk_tail
sub x16, x16, 4096
ldr xzr, [x16]
sub x17, x17, 4096
b L_chkstk_loop
L_chkstk_tail:
sub x16, x16, x17
ldr xzr, [x16]
L_chkstk_done:
ret
.globl _(tinyc_getbp)
_(tinyc_getbp):
mov x0, x29
ret
.globl _(mingw_getsp)
_(mingw_getsp):
mov x0, sp
ret
.globl _(__mingw_setjmp)
_(__mingw_setjmp):
/* _JUMP_BUFFER layout matches win32/include/setjmp.h for _ARM64_:
0x00 Frame, 0x08 Reserved, 0x10-0x68 X19-X30, 0x70 Sp,
0x78 Fpcr/Fpsr, 0x80-0xB8 D8-D15. */
str xzr, [x0] /* Frame = 0 */
stp x19, x20, [x0, 16]
stp x21, x22, [x0, 32]
stp x23, x24, [x0, 48]
stp x25, x26, [x0, 64]
stp x27, x28, [x0, 80]
stp x29, x30, [x0, 96]
mov x2, sp
str x2, [x0, 112] /* Sp */
mrs x2, FPCR
str w2, [x0, 120] /* Fpcr */
mrs x2, FPSR
str w2, [x0, 124] /* Fpsr */
stp d8, d9, [x0, 128]
stp d10, d11, [x0, 144]
stp d12, d13, [x0, 160]
stp d14, d15, [x0, 176]
mov x0, 0
ret
.globl _(__mingw_longjmp)
_(__mingw_longjmp):
ldp x19, x20, [x0, 16]
ldp x21, x22, [x0, 32]
ldp x23, x24, [x0, 48]
ldp x25, x26, [x0, 64]
ldp x27, x28, [x0, 80]
ldp x29, x30, [x0, 96]
ldr x2, [x0, 112] /* Sp */
mov sp, x2
ldr w2, [x0, 120] /* Fpcr */
msr FPCR, x2
ldr w2, [x0, 124] /* Fpsr */
msr FPSR, x2
ldp d8, d9, [x0, 128]
ldp d10, d11, [x0, 144]
ldp d12, d13, [x0, 160]
ldp d14, d15, [x0, 176]
mov x0, x1
cbnz x0, L_longjmp_done
mov x0, 1
L_longjmp_done:
ret
/* ---------------------------------------------- */
#elif !defined(__x86_64__)
/* ---------------------------------------------- */
.globl _(__chkstk)
@ -69,5 +154,3 @@ _(tinyc_getbp):
/* ---------------------------------------------- */
#endif
/* ---------------------------------------------- */