diff --git a/lib/bt-exe.c b/lib/bt-exe.c index 60bf80e4..17fdc501 100644 --- a/lib/bt-exe.c +++ b/lib/bt-exe.c @@ -85,13 +85,3 @@ ST_FUNC char *pstrcpy(char *buf, size_t buf_size, const char *s) buf[l] = 0; return buf; } - -#if defined(_WIN64) && defined(__aarch64__) -/* The bt-only Windows ARM64 build should not rely on importing this helper. */ -LONG InterlockedExchange(LONG volatile *Target, LONG Value) -{ - LONG Old = *Target; - *Target = Value; - return Old; -} -#endif diff --git a/tcc.h b/tcc.h index 083e9d30..c74fae47 100644 --- a/tcc.h +++ b/tcc.h @@ -1930,46 +1930,12 @@ dwarf_read_sleb128(unsigned char **ln, unsigned char *end) #if CONFIG_TCC_SEMLOCK #if defined _WIN32 typedef struct { volatile LONG init; CRITICAL_SECTION cs; } TCCSem; -#if defined __TINYC__ && (defined __aarch64__ || defined __arm64__) -/* Windows/arm64 Interlocked* names are compiler intrinsics, not - kernel32 exports, so tcc -run must not emit calls to them. */ -# define TCC_SEM_USE_ATOMICS 1 -enum { TCC_SEM_ATOMIC_SEQ_CST = 5 }; -#endif -static inline LONG tcc_sem_cmpxchg(volatile LONG *ptr, LONG val, LONG cmp) { -#ifdef TCC_SEM_USE_ATOMICS - LONG old = cmp; - __atomic_compare_exchange((LONG *)ptr, &old, &val, 0, - TCC_SEM_ATOMIC_SEQ_CST, - TCC_SEM_ATOMIC_SEQ_CST); - return old; -#else - return InterlockedCompareExchange(ptr, val, cmp); -#endif -} -static inline void tcc_sem_store(volatile LONG *ptr, LONG val) { -#ifdef TCC_SEM_USE_ATOMICS - __atomic_store((LONG *)ptr, &val, TCC_SEM_ATOMIC_SEQ_CST); -#else - InterlockedExchange(ptr, val); -#endif -} -static inline LONG tcc_sem_load(volatile LONG *ptr) { -#ifdef TCC_SEM_USE_ATOMICS - LONG val; - __atomic_load((LONG *)ptr, &val, TCC_SEM_ATOMIC_SEQ_CST); - return val; -#else - return InterlockedCompareExchange(ptr, 0, 0); -#endif -} static inline void wait_sem(TCCSem *p) { - if (tcc_sem_cmpxchg(&p->init, 1, 0) == 0) { + if (InterlockedCompareExchange(&p->init, 1, 0) == 0) { InitializeCriticalSection(&p->cs); - tcc_sem_store(&p->init, 2); + InterlockedExchange(&p->init, 2); } else { - /* On tcc/arm64, __atomic_load maps to the acquire helper path. */ - while (tcc_sem_load(&p->init) != 2) + while (InterlockedCompareExchange(&p->init, 2, 2) != 2) Sleep(0); } EnterCriticalSection(&p->cs); diff --git a/tests/tests2/145_winarm64_interlocked.c b/tests/tests2/145_winarm64_interlocked.c new file mode 100644 index 00000000..cee08562 --- /dev/null +++ b/tests/tests2/145_winarm64_interlocked.c @@ -0,0 +1,35 @@ +#include +#include + +#define PTR(x) ((PVOID)(ULONG_PTR)(x)) +#define CHECK(name, expr) printf("%s: %s\n", name, (expr) ? "yes" : "no") + +int main(void) +{ + PVOID volatile slot = PTR(0x1111222233334444ULL); + PVOID old; + + old = InterlockedExchangePointer(&slot, PTR(0x5555666677778888ULL)); + CHECK("exchange old", old == PTR(0x1111222233334444ULL)); + CHECK("exchange stored", slot == PTR(0x5555666677778888ULL)); + + old = InterlockedCompareExchangePointer(&slot, + PTR(0x9999aaaabbbbccccULL), + PTR(0x5555666677778888ULL)); + CHECK("compare old", old == PTR(0x5555666677778888ULL)); + CHECK("compare stored", slot == PTR(0x9999aaaabbbbccccULL)); + + old = InterlockedCompareExchangePointerAcquire(&slot, + PTR(0xdddd111122223333ULL), + PTR(0x0123456789abcdefULL)); + CHECK("acquire old", old == PTR(0x9999aaaabbbbccccULL)); + CHECK("acquire stored", slot == PTR(0x9999aaaabbbbccccULL)); + + old = InterlockedCompareExchangePointerRelease(&slot, + PTR(0xdddd111122223333ULL), + PTR(0x9999aaaabbbbccccULL)); + CHECK("release old", old == PTR(0x9999aaaabbbbccccULL)); + CHECK("release stored", slot == PTR(0xdddd111122223333ULL)); + + return 0; +} diff --git a/tests/tests2/145_winarm64_interlocked.expect b/tests/tests2/145_winarm64_interlocked.expect new file mode 100644 index 00000000..134c5f2f --- /dev/null +++ b/tests/tests2/145_winarm64_interlocked.expect @@ -0,0 +1,8 @@ +exchange old: yes +exchange stored: yes +compare old: yes +compare stored: yes +acquire old: yes +acquire stored: yes +release old: yes +release stored: yes diff --git a/tests/tests2/Makefile b/tests/tests2/Makefile index 4f19eb7d..c502b7c8 100644 --- a/tests/tests2/Makefile +++ b/tests/tests2/Makefile @@ -65,6 +65,11 @@ ifeq (,$(filter arm64 aarch64,$(ARCH))) SKIP += 139_arm64_errors.test SKIP += 140_arm64_extasm.test endif +ifneq ($(CONFIG_WIN32),yes) + SKIP += 145_winarm64_interlocked.test +else ifeq (,$(filter arm64 aarch64,$(ARCH))) + SKIP += 145_winarm64_interlocked.test +endif ifeq (,$(filter riscv64,$(ARCH))) SKIP += 141_riscv_asm.test # riscv64 asm endif diff --git a/win32/include/winapi/winbase.h b/win32/include/winapi/winbase.h index da38579c..da3e17e8 100644 --- a/win32/include/winapi/winbase.h +++ b/win32/include/winapi/winbase.h @@ -972,7 +972,9 @@ extern "C" { LONG WINAPI InterlockedDecrement(LONG volatile *lpAddend); LONG WINAPI InterlockedExchange(LONG volatile *Target,LONG Value); +#ifndef InterlockedExchangePointer #define InterlockedExchangePointer(Target,Value) (PVOID)InterlockedExchange((PLONG)(Target),(LONG)(Value)) +#endif LONG WINAPI InterlockedExchangeAdd(LONG volatile *Addend,LONG Value); LONG WINAPI InterlockedCompareExchange(LONG volatile *Destination,LONG Exchange,LONG Comperand); @@ -1035,6 +1037,7 @@ extern "C" { return Old; } +#ifndef InterlockedCompareExchangePointer #ifdef __cplusplus __CRT_INLINE PVOID __cdecl __InlineInterlockedCompareExchangePointer(PVOID volatile *Destination,PVOID ExChange,PVOID Comperand) { return((PVOID)(LONG_PTR)InterlockedCompareExchange((LONG volatile *)Destination,(LONG)(LONG_PTR)ExChange,(LONG)(LONG_PTR)Comperand)); @@ -1043,6 +1046,7 @@ extern "C" { #else #define InterlockedCompareExchangePointer(Destination,ExChange,Comperand)(PVOID)(LONG_PTR)InterlockedCompareExchange((LONG volatile *)(Destination),(LONG)(LONG_PTR)(ExChange),(LONG)(LONG_PTR)(Comperand)) #endif +#endif #define InterlockedIncrementAcquire InterlockedIncrement #define InterlockedIncrementRelease InterlockedIncrement @@ -1054,9 +1058,13 @@ extern "C" { #define InterlockedCompareExchangeRelease InterlockedCompareExchange #define InterlockedCompareExchangeAcquire64 InterlockedCompareExchange64 #define InterlockedCompareExchangeRelease64 InterlockedCompareExchange64 +#ifndef InterlockedCompareExchangePointerAcquire #define InterlockedCompareExchangePointerAcquire InterlockedCompareExchangePointer +#endif +#ifndef InterlockedCompareExchangePointerRelease #define InterlockedCompareExchangePointerRelease InterlockedCompareExchangePointer #endif +#endif #if defined(_SLIST_HEADER_) && !defined(_NTOSP_) WINBASEAPI VOID WINAPI InitializeSListHead(PSLIST_HEADER ListHead); diff --git a/win32/include/winapi/winnt.h b/win32/include/winapi/winnt.h index 405413fb..baace123 100644 --- a/win32/include/winapi/winnt.h +++ b/win32/include/winapi/winnt.h @@ -1419,6 +1419,69 @@ typedef DWORD LCID; #define OUT_OF_PROCESS_FUNCTION_TABLE_CALLBACK_EXPORT_NAME "OutOfProcessFunctionTableCallback" #endif /* defined(__x86_64) && !defined(RC_INVOKED) */ +#if defined(__TINYC__) && (defined(__aarch64__) || defined(__arm64__)) && !defined(RC_INVOKED) + +#define __TCC_WINNT_ATOMIC_SEQ_CST 5 +/* TCC lowers __atomic_compare_exchange's fourth argument as weak, but the + ARM64 helper reads it as success_memorder. Strong CAS passes weak == 0, + so keep Interlocked's barriers explicit here. */ +#define __TCC_WINNT_MEMORY_BARRIER() __asm__ __volatile__("dmb ish" : : : "memory") + +/* This covers the LONG operations needed by TCC's semaphore code and the + pointer helpers that winbase.h exposes through Interlocked pointer macros. + Add local helpers before relying on increment/decrement, add/exchange-add, + bitwise, or 64-bit Interlocked forms on Windows ARM64 with TCC. */ + + __CRT_INLINE LONG InterlockedExchange(LONG volatile *Target,LONG Value) { + LONG Old; + __atomic_load(Target,&Old,__TCC_WINNT_ATOMIC_SEQ_CST); + __TCC_WINNT_MEMORY_BARRIER(); + while (!__atomic_compare_exchange(Target,&Old,&Value,0, + __TCC_WINNT_ATOMIC_SEQ_CST,__TCC_WINNT_ATOMIC_SEQ_CST)) + ; + __TCC_WINNT_MEMORY_BARRIER(); + return Old; + } + __CRT_INLINE LONG InterlockedCompareExchange(LONG volatile *Destination,LONG ExChange,LONG Comperand) { + LONG Old = Comperand; + __TCC_WINNT_MEMORY_BARRIER(); + __atomic_compare_exchange(Destination,&Old,&ExChange,0, + __TCC_WINNT_ATOMIC_SEQ_CST,__TCC_WINNT_ATOMIC_SEQ_CST); + __TCC_WINNT_MEMORY_BARRIER(); + return Old; + } + __CRT_INLINE PVOID __TCC_WINNT_InterlockedExchangePointer(PVOID volatile *Target,PVOID Value) { + PVOID Old; + __atomic_load(Target,&Old,__TCC_WINNT_ATOMIC_SEQ_CST); + __TCC_WINNT_MEMORY_BARRIER(); + while (!__atomic_compare_exchange(Target,&Old,&Value,0, + __TCC_WINNT_ATOMIC_SEQ_CST,__TCC_WINNT_ATOMIC_SEQ_CST)) + ; + __TCC_WINNT_MEMORY_BARRIER(); + return Old; + } + __CRT_INLINE PVOID __TCC_WINNT_InterlockedCompareExchangePointer(PVOID volatile *Destination,PVOID ExChange,PVOID Comperand) { + PVOID Old = Comperand; + __TCC_WINNT_MEMORY_BARRIER(); + __atomic_compare_exchange(Destination,&Old,&ExChange,0, + __TCC_WINNT_ATOMIC_SEQ_CST,__TCC_WINNT_ATOMIC_SEQ_CST); + __TCC_WINNT_MEMORY_BARRIER(); + return Old; + } + +#define InterlockedExchangePointer __TCC_WINNT_InterlockedExchangePointer +#define InterlockedCompareExchangePointer __TCC_WINNT_InterlockedCompareExchangePointer +#define InterlockedCompareExchangePointerAcquire InterlockedCompareExchangePointer +#define InterlockedCompareExchangePointerRelease InterlockedCompareExchangePointer + +#define InterlockedCompareExchangeAcquire InterlockedCompareExchange +#define InterlockedCompareExchangeRelease InterlockedCompareExchange + +#undef __TCC_WINNT_MEMORY_BARRIER +#undef __TCC_WINNT_ATOMIC_SEQ_CST + +#endif /* defined(__TINYC__) && (defined(__aarch64__) || defined(__arm64__)) && !defined(RC_INVOKED) */ + #if defined(_ARM64_) /* ARM64 Context Definition */