tinycc/win32/lib/chkstk.S
grischka 30afb50e64 arm64-win32 review: fix problems and pass tests
tccpe.c:
- fix arm64 unwind codes (to make native set/longjmp() work)
  sizeof(RUNTIME_FUNCTION) is 8 on arm64 in the first place
  no need to note stack slots if we don't save any registers anyway

arm64-gen.c:
- fix long double reg-move
- fix arm64_hfa() for structs with float arrays
- gfunc_prolog(): setup stackframe eariler (simplifies unwind codes)
- new function gv_addr(RC);

win32/include/setjmp.h:
- provide correct definition for setjmo() (frameoffset = 224)

tccasm.c:
- support ".quad" with symbol & relocation
- support ".size"
- fix ". - symbol" arithmetic

win32/lib/crt1.c and win32/include/stdlib.h:
- do not write to __argc/__argv which reside in msvcrt.dll
  (msvcrt.dll on arm64 does not like that, crashes on unload)

tcc.c,libtcc.c:
- new functions tcc_fopen/fclose to avoid different stdio unstances
  in tcc.exe & libtcc.dll

tests & github workflow:
- add test-win32.bat to run tests with a tcc compiled by build-tcc.bat
- add msvcrt_start.c for gcc/clang to use the same runtime as tcc

  the problem is that newer gcc as well as clang and cl are
  linking to newer runtimes (such as UCRT) that have partially
  different printf format behavior which makes tcctest fail.

  the solution here is to force these compilers to link with
  msvcrt.dll just like tcc.

  Also, there is no gcc on arm64-win32 currently at all.

  Anyway, this approach to running the github CI tests
  does not require msys2.  But It does rely on gnumake
  as well as on some 'sh' shell though which seems to be
  installed somewhere (maybe it is the one from git).
2026-05-04 12:51:10 +02:00

87 lines
2.1 KiB
ArmAsm

/* ---------------------------------------------- */
/* chkstk86.s */
#ifdef __leading_underscore
# define _(s) _##s
#else
# define _(s) s
#endif
/* ---------------------------------------------- */
#if defined(__aarch64__)
.globl __chkstk
__chkstk:
/* Windows ARM64 stack probing helper.
arm64-gen.c passes the requested frame size in x15, scaled in 16-byte
units. Probe one 4 KiB page at a time and leave SP unchanged; the caller
subtracts SP after the probe returns. */
mov x16, sp
lsl x17, x15, 4
cbz x17, L_chkstk_done
L_chkstk_loop:
subs x0, x17, 4096
bls L_chkstk_tail
sub x16, x16, 4096
ldr xzr, [x16]
sub x17, x17, 4096
b L_chkstk_loop
L_chkstk_tail:
sub x16, x16, x17
ldr xzr, [x16]
L_chkstk_done:
ret
/* ---------------------------------------------- */
#elif defined(__i386__)
.globl _(__chkstk)
_(__chkstk):
xchg (%esp),%ebp /* store ebp, get ret.addr */
push %ebp /* push ret.addr */
lea 4(%esp),%ebp /* setup frame ptr */
push %ecx /* save ecx */
mov %ebp,%ecx
P0:
sub $4096,%ecx
test %eax,(%ecx)
sub $4096,%eax
cmp $4096,%eax
jge P0
sub %eax,%ecx
test %eax,(%ecx)
mov %esp,%eax
mov %ecx,%esp
mov (%eax),%ecx /* restore ecx */
jmp *4(%eax)
/* ---------------------------------------------- */
#else /* __x86_64__ */
.globl _(__chkstk)
_(__chkstk):
xchg (%rsp),%rbp /* store ebp, get ret.addr */
push %rbp /* push ret.addr */
lea 8(%rsp),%rbp /* setup frame ptr */
push %rcx /* save ecx */
mov %rbp,%rcx
movslq %eax,%rax
P0:
sub $4096,%rcx
test %rax,(%rcx)
sub $4096,%rax
cmp $4096,%rax
jge P0
sub %rax,%rcx
test %rax,(%rcx)
mov %rsp,%rax
mov %rcx,%rsp
mov (%rax),%rcx /* restore ecx */
jmp *8(%rax)
/* ---------------------------------------------- */
#endif
/* ---------------------------------------------- */