From 4f4b3dda6b696509f4891a2864011f1ed6ae7281 Mon Sep 17 00:00:00 2001 From: Benjamin Oldenburg Date: Sun, 15 Mar 2026 16:29:02 +0700 Subject: [PATCH] Add and stabilize Windows ARM64 support --- .github/workflows/build.yml | 17 + Makefile | 9 +- arm64-asm.c | 1176 +++++++++++++++++++++++++++++++++- arm64-gen.c | 309 ++++++--- arm64-link.c | 68 +- arm64-tok.h | 556 ++++++++++++++++ configure | 7 +- include/tccdefs.h | 4 +- lib/Makefile | 1 + lib/bcheck.c | 19 +- lib/bt-dll.c | 102 +++ lib/bt-exe.c | 27 +- lib/libtcc1.c | 8 +- tcc.c | 126 ++++ tcc.h | 5 +- tccgen.c | 6 + tccpe.c | 204 +++++- tccrun.c | 233 ++++++- tcctok.h | 10 +- tests/Makefile | 33 +- tests/tcctest.c | 4 +- tests/test.ref.win32-arm64 | 1005 +++++++++++++++++++++++++++++ tests/tests2/Makefile | 11 + win32/build-tcc.bat | 16 +- win32/include/_mingw.h | 5 + win32/include/setjmp.h | 122 +++- win32/include/stdlib.h | 2 + win32/include/winapi/winnt.h | 81 ++- win32/lib/chkstk.S | 78 ++- win32/lib/crt1.c | 62 +- win32/lib/wincrt1.c | 11 +- win32/test_arm64.c | 12 + 32 files changed, 4126 insertions(+), 203 deletions(-) create mode 100644 arm64-tok.h create mode 100644 tests/test.ref.win32-arm64 create mode 100644 win32/test_arm64.c diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2fbddce1..506b6b7c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -85,6 +85,23 @@ jobs: .\tcc -I.. libtcc.dll -v ../tests/libtcc_test.c -o libtest.exe && .\libtest.exe .\tcc -I.. libtcc.dll -run ../tests/libtcc_test.c + test-aarch64-win32: + runs-on: windows-11-arm + timeout-minutes: 6 + steps: + - uses: actions/checkout@v4 + - uses: ilammy/msvc-dev-cmd@v1 + with: + arch: arm64 + - name: build & test tcc (aarch64-win32) + shell: cmd + run: | + echo ::group:: build with clang + cd win32 + call build-tcc.bat -t arm64 -c clang + echo ::endgroup:: + .\tcc -v + test-armv7-linux: runs-on: ubuntu-22.04 timeout-minutes: 8 diff --git a/Makefile b/Makefile index d21f1cad..8e06022c 100644 --- a/Makefile +++ b/Makefile @@ -33,8 +33,9 @@ ifdef CONFIG_WIN32 ifneq ($(CONFIG_debug),yes) LDFLAGS += -s endif - NATIVE_TARGET = $(ARCH)-win$(if $(findstring arm,$(ARCH)),ce,32) + NATIVE_TARGET = $(if $(findstring arm64,$(ARCH)),arm64-win32,$(ARCH)-win$(if $(findstring arm,$(ARCH)),ce,32)) else + CFG = -unx LIBS+=-lm ifneq ($(CONFIG_ldl),no) @@ -113,6 +114,7 @@ DEF-arm64-osx = $(DEF-arm64) -DTCC_TARGET_MACHO DEF-arm64-FreeBSD = $(DEF-arm64) -DTARGETOS_FreeBSD DEF-arm64-NetBSD = $(DEF-arm64) -DTARGETOS_NetBSD DEF-arm64-OpenBSD = $(DEF-arm64) -DTARGETOS_OpenBSD +DEF-arm64-win32 = $(DEF-arm64) -DTCC_TARGET_PE DEF-riscv64 = -DTCC_TARGET_RISCV64 DEF-c67 = -DTCC_TARGET_C67 -w # disable warnigs DEF-x86_64-FreeBSD = $(DEF-x86_64) -DTARGETOS_FreeBSD @@ -130,7 +132,7 @@ TCCDOCS = tcc.1 tcc-doc.html tcc-doc.info all: $(PROGS) $(TCCLIBS) $(TCCDOCS) # cross compiler targets to build -TCC_X = i386 x86_64 i386-win32 x86_64-win32 x86_64-osx arm arm64 arm-wince c67 +TCC_X = i386 x86_64 i386-win32 x86_64-win32 x86_64-osx arm arm64 arm64-win32 arm-wince c67 TCC_X += riscv64 arm64-osx # TCC_X += arm-fpa arm-fpa-ld arm-vfp arm-eabi @@ -212,8 +214,9 @@ arm-fpa-ld_FILES = $(arm_FILES) arm-vfp_FILES = $(arm_FILES) arm-eabi_FILES = $(arm_FILES) arm-eabihf_FILES = $(arm_FILES) -arm64_FILES = $(CORE_FILES) arm64-gen.c arm64-link.c arm64-asm.c +arm64_FILES = $(CORE_FILES) arm64-gen.c arm64-link.c arm64-asm.c arm64-tok.h arm64-osx_FILES = $(arm64_FILES) tccmacho.c +arm64-win32_FILES = $(arm64_FILES) tccpe.c c67_FILES = $(CORE_FILES) c67-gen.c c67-link.c tcccoff.c riscv64_FILES = $(CORE_FILES) riscv64-gen.c riscv64-link.c riscv64-asm.c diff --git a/arm64-asm.c b/arm64-asm.c index a97fd642..a85377a2 100644 --- a/arm64-asm.c +++ b/arm64-asm.c @@ -1,13 +1,16 @@ /*************************************************************/ /* - * ARM64 dummy assembler for TCC + * ARM64 (AArch64) assembler for TCC * + * Based on ARM64 Architecture Reference Manual + * Supports AArch64 instruction set for inline assembly */ #ifdef TARGET_DEFS_ONLY #define CONFIG_TCC_ASM -#define NB_ASM_REGS 16 +/* 32 general purpose + 32 SIMD/FP registers */ +#define NB_ASM_REGS 64 ST_FUNC void g(int c); ST_FUNC void gen_le16(int c); @@ -16,13 +19,50 @@ ST_FUNC void gen_le32(int c); /*************************************************************/ #else /*************************************************************/ + #define USING_GLOBALS #include "tcc.h" -static void asm_error(void) -{ - tcc_error("ARM asm not implemented."); -} +/* Register type flags */ +#define REG_X 0x01 /* 64-bit general purpose */ +#define REG_W 0x02 /* 32-bit general purpose */ +#define REG_V 0x04 /* 128-bit SIMD */ +#define REG_D 0x08 /* 64-bit FP */ +#define REG_S 0x10 /* 32-bit FP */ +#define REG_H 0x20 /* 16-bit FP */ +#define REG_B 0x40 /* 8-bit SIMD */ + +/* Operand types */ +enum { + OPT_REG, + OPT_VREG, + OPT_IM, + OPT_IM12, + OPT_ADDR, + OPT_COND, + OPT_SHIFT, + OPT_REGSET, +}; + +#define OP_REG (1 << OPT_REG) +#define OP_VREG (1 << OPT_VREG) +#define OP_IM (1 << OPT_IM) +#define OP_ADDR (1 << OPT_ADDR) +#define OP_COND (1 << OPT_COND) +#define OP_SHIFT (1 << OPT_SHIFT) +#define OP_REGSET (1 << OPT_REGSET) + +typedef struct Operand { + uint32_t type; + int8_t reg; + int8_t reg2; + uint8_t reg_type; + uint8_t shift; + ExprValue e; +} Operand; + +/* Forward declaration */ +static void parse_addr_operand(TCCState *s1, Operand *op); /* XXX: make it faster ? */ ST_FUNC void g(int c) @@ -37,16 +77,16 @@ ST_FUNC void g(int c) ind = ind1; } -ST_FUNC void gen_le16 (int i) +ST_FUNC void gen_le16(int c) { - g(i); - g(i>>8); + g(c); + g(c >> 8); } -ST_FUNC void gen_le32 (int i) +ST_FUNC void gen_le32(int c) { - gen_le16(i); - gen_le16(i>>16); + gen_le16(c); + gen_le16(c >> 16); } ST_FUNC void gen_expr32(ExprValue *pe) @@ -54,40 +94,1126 @@ ST_FUNC void gen_expr32(ExprValue *pe) gen_le32(pe->v); } +/* Emit 32-bit instruction */ +static void emit_instr32(uint32_t val) +{ + if (nocode_wanted) + return; + if (ind + 4 > cur_text_section->data_allocated) + section_realloc(cur_text_section, ind + 4); + write32le(cur_text_section->data + ind, val); + ind += 4; +} + +/* Parse ARM64 register from token */ +static int arm64_parse_regvar(int t) +{ + /* X registers (64-bit) */ + if (t >= TOK_ASM_x0 && t <= TOK_ASM_x30) + return t - TOK_ASM_x0; + /* W registers (32-bit) */ + if (t >= TOK_ASM_w0 && t <= TOK_ASM_w30) + return t - TOK_ASM_w0; + /* V registers (128-bit SIMD) */ + if (t >= TOK_ASM_v0 && t <= TOK_ASM_v31) + return (t - TOK_ASM_v0) + 32; + /* D registers (64-bit FP) */ + if (t >= TOK_ASM_d0 && t <= TOK_ASM_d31) + return (t - TOK_ASM_d0) + 32; + /* S registers (32-bit FP) */ + if (t >= TOK_ASM_s0 && t <= TOK_ASM_s31) + return (t - TOK_ASM_s0) + 32; + /* H registers (16-bit FP) */ + if (t >= TOK_ASM_h0 && t <= TOK_ASM_h31) + return (t - TOK_ASM_h0) + 32; + /* B registers (8-bit SIMD) */ + if (t >= TOK_ASM_b0 && t <= TOK_ASM_b31) + return (t - TOK_ASM_b0) + 32; + /* Special registers */ + if (t == TOK_ASM_sp || t == TOK_ASM_xzr || t == TOK_ASM_wzr) + return 31; /* SP/ZR encoded as 31 */ + return -1; +} + +/* Get register type from token */ +static uint8_t get_reg_type(int t) +{ + if (t >= TOK_ASM_x0 && t <= TOK_ASM_x30) + return REG_X; + if (t >= TOK_ASM_w0 && t <= TOK_ASM_w30) + return REG_W; + if (t >= TOK_ASM_v0 && t <= TOK_ASM_v31) + return REG_V; + if (t >= TOK_ASM_d0 && t <= TOK_ASM_d31) + return REG_D; + if (t >= TOK_ASM_s0 && t <= TOK_ASM_s31) + return REG_S; + if (t >= TOK_ASM_h0 && t <= TOK_ASM_h31) + return REG_H; + if (t >= TOK_ASM_b0 && t <= TOK_ASM_b31) + return REG_B; + /* Special registers - sp is 64-bit, xzr is 64-bit, wzr is 32-bit */ + if (t == TOK_ASM_sp || t == TOK_ASM_xzr) + return REG_X; + if (t == TOK_ASM_wzr) + return REG_W; + return REG_X; +} + +/* Parse condition code */ +static int parse_condition(int t) +{ + switch (t) { + case TOK_ASM_eq: return 0; + case TOK_ASM_ne: return 1; + case TOK_ASM_cs: + case TOK_ASM_hs: return 2; + case TOK_ASM_cc: + case TOK_ASM_lo: return 3; + case TOK_ASM_mi: return 4; + case TOK_ASM_pl: return 5; + case TOK_ASM_vs: return 6; + case TOK_ASM_vc: return 7; + case TOK_ASM_hi: return 8; + case TOK_ASM_ls: return 9; + case TOK_ASM_ge: return 10; + case TOK_ASM_lt: return 11; + case TOK_ASM_gt: return 12; + case TOK_ASM_le: return 13; + case TOK_ASM_al: return 14; + default: return -1; + } +} + +static int parse_barrier_option_name(int t) +{ + const char *name; + + if (t < TOK_IDENT) + return -1; + name = get_tok_str(t, NULL); + if (!strcmp(name, "oshld")) return 0x1; + if (!strcmp(name, "oshst")) return 0x2; + if (!strcmp(name, "osh")) return 0x3; + if (!strcmp(name, "nshld")) return 0x5; + if (!strcmp(name, "nshst")) return 0x6; + if (!strcmp(name, "nsh")) return 0x7; + if (!strcmp(name, "ishld")) return 0x9; + if (!strcmp(name, "ishst")) return 0xA; + if (!strcmp(name, "ish")) return 0xB; + if (!strcmp(name, "ld")) return 0xD; + if (!strcmp(name, "st")) return 0xE; + if (!strcmp(name, "sy")) return 0xF; + return -1; +} + +/* Parse a single operand */ +static void parse_operand(TCCState *s1, Operand *op) +{ + int reg; + + op->type = 0; + op->reg = -1; + op->reg2 = -1; + op->reg_type = 0; + op->shift = 0; + + /* Address operand in brackets [xn, ...] */ + if (tok == '[') { + parse_addr_operand(s1, op); + return; + } + + /* Register */ + reg = arm64_parse_regvar(tok); + if (reg >= 0) { + op->type = OP_REG; + op->reg = reg; + op->reg_type = get_reg_type(tok); + next(); + return; + } + + /* Condition code */ + reg = parse_condition(tok); + if (reg >= 0) { + op->type = OP_COND; + op->reg = reg; + next(); + return; + } + + /* Immediate or address expression */ + if (tok == '#' || tok == ':' || tok == '@') { + next(); + } + asm_expr(s1, &op->e); + op->type = OP_IM; +} + +/* Parse address operand in brackets [xn, ...] */ +static void parse_addr_operand(TCCState *s1, Operand *op) +{ + int reg; + + op->type = OP_ADDR; + op->reg = -1; + op->reg2 = -1; + op->e.v = 0; + op->e.sym = NULL; + + skip('['); + reg = arm64_parse_regvar(tok); + if (reg >= 0 && reg < 32) { + op->reg = reg; + next(); + /* Check for offset */ + if (tok == ',') { + next(); + if (tok == '#' || tok == '@') next(); + asm_expr(s1, &op->e); + } + } + skip(']'); +} + +/* Generate MOVZ instruction */ +static void gen_movz(int rd, uint16_t imm, int shift, int is_64bit) +{ + uint32_t instr = 0x52800000; + if (is_64bit) instr |= (1 << 31); + /* shift is halfword index (0-3), encode as LSL #0/16/32/48 */ + instr |= ((shift & 3) << 21) & 0x00600000; + instr |= (imm << 5) & 0x00FFFFE0; + instr |= rd & 0x1F; + emit_instr32(instr); +} + +/* Generate MOVN instruction */ +static void gen_movn(int rd, uint16_t imm, int shift, int is_64bit) +{ + uint32_t instr = 0x12800000; + if (is_64bit) instr |= (1 << 31); + /* shift is halfword index (0-3), encode as LSL #0/16/32/48 */ + instr |= ((shift & 3) << 21) & 0x00600000; + instr |= (imm << 5) & 0x00FFFFE0; + instr |= rd & 0x1F; + emit_instr32(instr); +} + +/* Generate MOVK instruction */ +static void gen_movk(int rd, uint16_t imm, int shift, int is_64bit) +{ + uint32_t instr = 0xF2800000; + if (is_64bit) instr |= (1 << 31); + /* shift is halfword index (0-3), encode as LSL #0/16/32/48 */ + instr |= ((shift & 3) << 21) & 0x00600000; + instr |= (imm << 5) & 0x00FFFFE0; + instr |= rd & 0x1F; + emit_instr32(instr); +} + +/* Generate ADD (immediate) */ +static void gen_add_imm(int rd, int rn, uint32_t imm, int is_64bit, int setflags) +{ + uint32_t instr = 0x11000000; + if (is_64bit) instr |= (1 << 31); + if (setflags) instr |= (1 << 29); + instr |= ((imm >> 12) & 0x3) << 22; + instr |= (imm & 0xFFF) << 10; + instr |= (rn & 0x1F) << 5; + instr |= rd & 0x1F; + emit_instr32(instr); +} + +/* Generate SUB (immediate) */ +static void gen_sub_imm(int rd, int rn, uint32_t imm, int is_64bit, int setflags) +{ + uint32_t instr = 0x51000000; + if (is_64bit) instr |= (1 << 31); + if (setflags) instr |= (1 << 29); + instr |= ((imm >> 12) & 0x3) << 22; + instr |= (imm & 0xFFF) << 10; + instr |= (rn & 0x1F) << 5; + instr |= rd & 0x1F; + emit_instr32(instr); +} + +/* Generate data processing register instruction */ +static void gen_dp_reg(uint32_t opcode, int rd, int rn, int rm, int is_64bit) +{ + uint32_t instr = opcode; + if (is_64bit) instr |= (1 << 31); + instr |= (rm & 0x1F) << 16; + instr |= (rn & 0x1F) << 5; + instr |= rd & 0x1F; + emit_instr32(instr); +} + +/* Generate LDR/STR (immediate) */ +static void gen_ldst_imm(uint32_t base_opcode, int rt, int rn, + int32_t offset, int is_64bit, int size_log2) +{ + uint32_t instr = base_opcode; + uint32_t imm12; + + if (is_64bit) instr |= (1 << 30); + imm12 = offset >> size_log2; + instr |= (imm12 & 0xFFF) << 10; + instr |= (rn & 0x1F) << 5; + instr |= rt & 0x1F; + emit_instr32(instr); +} + +/* Generate B (branch) */ +static void gen_b(int32_t offset) +{ + uint32_t instr = 0x14000000; + instr |= ((offset >> 2) & 0x03FFFFFF); + emit_instr32(instr); +} + +/* Generate BL (branch with link) */ +static void gen_bl(int32_t offset) +{ + uint32_t instr = 0x94000000; + instr |= ((offset >> 2) & 0x03FFFFFF); + emit_instr32(instr); +} + +/* Generate BR (branch to register) */ +static void gen_br(int rn) +{ + uint32_t instr = 0xD61F0000; + instr |= (rn & 0x1F) << 5; + emit_instr32(instr); +} + +/* Generate BLR (branch with link to register) */ +static void gen_blr(int rn) +{ + uint32_t instr = 0xD63F0000; + instr |= (rn & 0x1F) << 5; + emit_instr32(instr); +} + +/* Generate RET */ +static void gen_ret(int rn) +{ + uint32_t instr = 0xD65F03C0; + instr |= (rn & 0x1F) << 5; + emit_instr32(instr); +} + +/* Generate conditional branch */ +static void gen_b_cond(int cond, int32_t offset) +{ + uint32_t instr = 0x54000000; + instr |= ((offset >> 2) & 0x7FFFF) << 5; + instr |= cond & 0xF; + emit_instr32(instr); +} + +/* Generate CBZ */ +static void gen_cbz(int rt, int32_t offset, int is_64bit) +{ + uint32_t instr = 0x34000000; + if (is_64bit) instr |= (1 << 31); + instr |= ((offset >> 2) & 0x7FFFF) << 5; + instr |= rt & 0x1F; + emit_instr32(instr); +} + +/* Generate CBNZ */ +static void gen_cbnz(int rt, int32_t offset, int is_64bit) +{ + uint32_t instr = 0x35000000; + if (is_64bit) instr |= (1 << 31); + instr |= ((offset >> 2) & 0x7FFFF) << 5; + instr |= rt & 0x1F; + emit_instr32(instr); +} + +/* Generate MOV (register) - ORR with zero register */ +static void gen_mov_reg(int rd, int rm, int is_64bit) +{ + uint32_t instr = 0x2A0003E0; + if (is_64bit) instr |= (1 << 31); + instr |= (rm & 0x1F) << 16; + instr |= rd & 0x1F; + emit_instr32(instr); +} + +/* Generate NOP */ +static void gen_nop(void) +{ + emit_instr32(0xD503201F); +} + +/* Generate shift operations (LSL, LSR, ASR, ROR) */ +static void gen_shift(int rd, int rn, int rm_or_imm, int shift_type, int is_imm, int is_64bit) +{ + uint32_t instr; + + if (is_imm) { + /* Shift by immediate */ + switch (shift_type) { + case 0: /* LSL */ + instr = is_64bit ? 0xD3600000 : 0x53000000; + /* For LSL, the immediate is encoded as (64 - imm) & 0x3F for 64-bit */ + if (is_64bit) { + instr |= ((64 - rm_or_imm) & 0x3F) << 10; + } else { + instr |= ((32 - rm_or_imm) & 0x1F) << 10; + } + break; + case 1: /* LSR */ + instr = is_64bit ? 0xD3600000 : 0x53000000; + instr |= (1 << 22); + if (is_64bit) { + instr |= ((64 - rm_or_imm) & 0x3F) << 10; + } else { + instr |= ((32 - rm_or_imm) & 0x1F) << 10; + } + break; + case 2: /* ASR */ + instr = is_64bit ? 0xD3600000 : 0x53000000; + instr |= (2 << 22); + if (is_64bit) { + instr |= ((64 - rm_or_imm) & 0x3F) << 10; + } else { + instr |= ((32 - rm_or_imm) & 0x1F) << 10; + } + break; + case 3: /* ROR */ + instr = is_64bit ? 0x93C00000 : 0x13C00000; + instr |= (rm_or_imm & 0x1F) << 10; + break; + default: + tcc_error("unknown shift type"); + return; + } + instr |= (rn & 0x1F) << 5; + instr |= rd & 0x1F; + } else { + /* Shift by register */ + switch (shift_type) { + case 0: /* LSL */ + instr = is_64bit ? 0x1AC02000 : 0x1AC02000; + break; + case 1: /* LSR */ + instr = is_64bit ? 0x1AC02400 : 0x1AC02400; + break; + case 2: /* ASR */ + instr = is_64bit ? 0x1AC02800 : 0x1AC02800; + break; + case 3: /* ROR */ + instr = is_64bit ? 0x1AC02C00 : 0x1AC02C00; + break; + default: + tcc_error("unknown shift type"); + return; + } + instr |= (rm_or_imm & 0x1F) << 16; + instr |= (rn & 0x1F) << 5; + instr |= rd & 0x1F; + } + emit_instr32(instr); +} + +/* Handle shift instructions */ +static void asm_shift(TCCState *s1, int token) +{ + Operand op1, op2, op3; + int rd, rn, shift_amount; + int shift_type; + int is_64bit = 1; + + switch (token) { + case TOK_ASM_lsl: + shift_type = 0; + break; + case TOK_ASM_lsr: + shift_type = 1; + break; + case TOK_ASM_asr: + shift_type = 2; + break; + case TOK_ASM_ror: + shift_type = 3; + break; + default: + tcc_error("unknown shift instruction"); + return; + } + + parse_operand(s1, &op1); + if (tok == ',') next(); + parse_operand(s1, &op2); + rd = op1.reg; + rn = op2.reg; + + if (tok == ',') { + next(); + /* Parse shift immediate - skip # prefix like arm-asm.c does */ + if (tok == '#' || tok == '$') + next(); + parse_operand(s1, &op3); + shift_amount = op3.e.v; + is_64bit = (op1.reg_type & REG_X); + gen_shift(rd, rn, shift_amount, shift_type, 1, is_64bit); + } else { + tcc_error("shift requires immediate or register operand"); + return; + } +} + +/* Generate barrier instructions (ISB, DSB, DMB) */ +static void gen_barrier(int barrier_type, int option) +{ + uint32_t instr; + + switch (barrier_type) { + case 0: /* ISB - Instruction Synchronization Barrier */ + instr = 0xD50330DF; + break; + case 1: /* DSB - Data Synchronization Barrier */ + instr = 0xD503309F; + break; + case 2: /* DMB - Data Memory Barrier */ + instr = 0xD50330BF; + break; + default: + tcc_error("unknown barrier type"); + return; + } + instr |= (option & 0xF) << 8; + emit_instr32(instr); +} + +/* Handle barrier instructions */ +static void asm_barrier(TCCState *s1, int token) +{ + int barrier_type, option; + Operand op; + + switch (token) { + case TOK_ASM_isb: + barrier_type = 0; + break; + case TOK_ASM_dsb: + barrier_type = 1; + break; + case TOK_ASM_dmb: + barrier_type = 2; + break; + default: + tcc_error("unknown barrier instruction"); + return; + } + + /* Default option = sy/full system. */ + option = 0xF; + + /* Check for an optional named or numeric barrier scope. */ + if (tok != TOK_LINEFEED) { + option = parse_barrier_option_name(tok); + if (option >= 0) { + next(); + } else { + parse_operand(s1, &op); + if (!(op.type & OP_IM) || op.e.sym) { + tcc_error("barrier option must be an immediate or scope name"); + return; + } + if (op.e.v > 0xF) { + tcc_error("barrier option out of range"); + return; + } + option = op.e.v; + } + } + + gen_barrier(barrier_type, option); +} + +/* Generate immediate move sequence */ +static void gen_mov_imm(int rd, uint64_t imm, int is_64bit) +{ + uint16_t hw; + int i, first = 1; + + for (i = 0; i < (is_64bit ? 4 : 2); i++) { + hw = (imm >> (i * 16)) & 0xFFFF; + if (hw != 0 || i == 0) { + if (first) { + /* Pass halfword index (0-3), not bit count */ + gen_movz(rd, hw, i, is_64bit); + first = 0; + } else { + gen_movk(rd, hw, i, is_64bit); + } + } else if (!first) { + gen_movk(rd, hw, i, is_64bit); + } + } +} + +/* Handle mov instruction */ +static void asm_mov(TCCState *s1) +{ + Operand op1, op2; + int rd, rn; + int is_64bit; + + parse_operand(s1, &op1); + if (tok == ',') next(); + parse_operand(s1, &op2); + rd = op1.reg; + is_64bit = (op1.reg_type & REG_X); + + if (op2.type & OP_IM) { + /* Handle immediate: mov x0, #123 */ + gen_mov_imm(rd, op2.e.v, is_64bit); + } else if (op2.type & OP_REG) { + /* Handle register: mov x0, x1 */ + rn = op2.reg; + gen_mov_reg(rd, rn, is_64bit); + } else { + tcc_error("invalid operand for mov"); + } +} + +/* Handle data processing instructions */ +static void asm_data_proc(TCCState *s1, int token) +{ + Operand op1, op2, op3; + int rd, rn, rm; + int is_64bit = 1; + uint32_t opcode; + + switch (token) { + case TOK_ASM_add: + case TOK_ASM_adds: + opcode = token == TOK_ASM_add ? 0x0B000000 : 0x2B000000; + break; + case TOK_ASM_sub: + case TOK_ASM_subs: + opcode = token == TOK_ASM_sub ? 0x4B000000 : 0x6B000000; + break; + case TOK_ASM_and: + case TOK_ASM_ands: + opcode = token == TOK_ASM_and ? 0x0A000000 : 0x2A000000; + break; + case TOK_ASM_orr: + opcode = 0x2A000000; + break; + case TOK_ASM_eor: + opcode = 0x4A000000; + break; + case TOK_ASM_mul: + case TOK_ASM_muls: + opcode = 0x1B007C00; + break; + default: + tcc_error("unsupported data processing instruction"); + return; + } + + parse_operand(s1, &op1); + if (tok == ',') next(); + parse_operand(s1, &op2); + rd = op1.reg; + rn = op2.reg; + + if (tok == ',') { + next(); + parse_operand(s1, &op3); + if (op3.type & OP_IM) { + is_64bit = (op1.reg_type & REG_X); + if (token == TOK_ASM_add || token == TOK_ASM_adds) + gen_add_imm(rd, rn, op3.e.v, is_64bit, + token == TOK_ASM_adds); + else if (token == TOK_ASM_sub || token == TOK_ASM_subs) + gen_sub_imm(rd, rn, op3.e.v, is_64bit, + token == TOK_ASM_subs); + else + tcc_error("immediate operand not valid for this instruction"); + } else { + rm = op3.reg; + is_64bit = (op1.reg_type & REG_X) || (op2.reg_type & REG_X) || (op3.reg_type & REG_X); + gen_dp_reg(opcode, rd, rn, rm, is_64bit); + } + } else if (op2.type & OP_IM) { + tcc_error("missing source register for immediate form"); + } else { + is_64bit = (op1.reg_type & REG_X); + gen_mov_reg(rd, rn, is_64bit); + } +} + +/* Handle load/store instructions */ +static void asm_ldst(TCCState *s1, int token) +{ + Operand op1, op2; + int rt, rn; + int32_t offset = 0; + int is_64bit = 1; + int size_log2 = 3; + uint32_t base_opcode; + + parse_operand(s1, &op1); + if (tok == ',') next(); + parse_operand(s1, &op2); + + rt = op1.reg; + rn = op2.reg; + offset = op2.e.v; + + switch (token) { + case TOK_ASM_ldr: + base_opcode = 0xB9400000; + if (op1.reg_type & REG_X) { + is_64bit = 1; + size_log2 = 3; + } else if (op1.reg_type & REG_W) { + is_64bit = 0; + size_log2 = 2; + } else { + tcc_error("ldr requires a w or x register"); + return; + } + break; + case TOK_ASM_ldrb: + base_opcode = 0x39400000; + is_64bit = 0; + size_log2 = 0; + break; + case TOK_ASM_ldrh: + base_opcode = 0x79400000; + is_64bit = 0; + size_log2 = 1; + break; + case TOK_ASM_str: + base_opcode = 0xB9000000; + if (op1.reg_type & REG_X) { + is_64bit = 1; + size_log2 = 3; + } else if (op1.reg_type & REG_W) { + is_64bit = 0; + size_log2 = 2; + } else { + tcc_error("str requires a w or x register"); + return; + } + break; + case TOK_ASM_strb: + base_opcode = 0x39000000; + is_64bit = 0; + size_log2 = 0; + break; + case TOK_ASM_strh: + base_opcode = 0x79000000; + is_64bit = 0; + size_log2 = 1; + break; + default: + tcc_error("unsupported load/store instruction"); + return; + } + + gen_ldst_imm(base_opcode, rt, rn, offset, is_64bit, size_log2); +} + +/* Handle branch instructions */ +static void asm_branch(TCCState *s1, int token) +{ + Operand op; + int cond; + Sym *sym; + int32_t offset; + + /* ret can be used without operand */ + if (token == TOK_ASM_ret && (tok == TOK_LINEFEED || tok == ';' || tok == TOK_EOF)) { + gen_ret(30); /* x30 is the link register */ + return; + } + + parse_operand(s1, &op); + + if (op.type & OP_IM) { + sym = op.e.sym; + if (sym) { + /* Symbolic address - emit relocation */ + offset = 0; + + /* Check for conditional branch */ + cond = -1; + switch (token) { + case TOK_ASM_b_eq: cond = 0; break; + case TOK_ASM_b_ne: cond = 1; break; + case TOK_ASM_b_cs: cond = 2; break; + case TOK_ASM_b_cc: cond = 3; break; + case TOK_ASM_b_mi: cond = 4; break; + case TOK_ASM_b_pl: cond = 5; break; + case TOK_ASM_b_vs: cond = 6; break; + case TOK_ASM_b_vc: cond = 7; break; + case TOK_ASM_b_hi: cond = 8; break; + case TOK_ASM_b_ls: cond = 9; break; + case TOK_ASM_b_ge: cond = 10; break; + case TOK_ASM_b_lt: cond = 11; break; + case TOK_ASM_b_gt: cond = 12; break; + case TOK_ASM_b_le: cond = 13; break; + } + + if (cond >= 0) { + /* Conditional branch - use CONDBR19 relocation */ + gen_b_cond(cond, 0); + greloca(cur_text_section, sym, ind - 4, R_AARCH64_CONDBR19, 0); + } else { + switch (token) { + case TOK_ASM_b: + gen_b(0); + greloca(cur_text_section, sym, ind - 4, R_AARCH64_JUMP26, 0); + break; + case TOK_ASM_bl: + gen_bl(0); + greloca(cur_text_section, sym, ind - 4, R_AARCH64_CALL26, 0); + break; + default: + tcc_error("unsupported branch"); + } + } + } else { + offset = (int32_t)op.e.v - ind; + + /* Check for conditional branch */ + cond = -1; + switch (token) { + case TOK_ASM_b_eq: cond = 0; break; + case TOK_ASM_b_ne: cond = 1; break; + case TOK_ASM_b_cs: cond = 2; break; + case TOK_ASM_b_cc: cond = 3; break; + case TOK_ASM_b_mi: cond = 4; break; + case TOK_ASM_b_pl: cond = 5; break; + case TOK_ASM_b_vs: cond = 6; break; + case TOK_ASM_b_vc: cond = 7; break; + case TOK_ASM_b_hi: cond = 8; break; + case TOK_ASM_b_ls: cond = 9; break; + case TOK_ASM_b_ge: cond = 10; break; + case TOK_ASM_b_lt: cond = 11; break; + case TOK_ASM_b_gt: cond = 12; break; + case TOK_ASM_b_le: cond = 13; break; + } + + if (cond >= 0) { + gen_b_cond(cond, offset); + } else { + switch (token) { + case TOK_ASM_b: + gen_b(offset); + break; + case TOK_ASM_bl: + gen_bl(offset); + break; + default: + tcc_error("unsupported branch"); + } + } + } + } else if (op.type & OP_REG) { + switch (token) { + case TOK_ASM_br: + gen_br(op.reg); + break; + case TOK_ASM_blr: + gen_blr(op.reg); + break; + case TOK_ASM_ret: + gen_ret(op.reg); + break; + default: + tcc_error("register branch not valid"); + } + } +} + +/* Handle CBZ/CBNZ */ +static void asm_cb(TCCState *s1, int token) +{ + Operand op1, op2; + int rt, is_64bit; + int32_t offset; + Sym *sym; + + parse_operand(s1, &op1); + if (tok == ',') next(); + parse_operand(s1, &op2); + + rt = op1.reg; + is_64bit = (op1.reg_type & REG_X); + sym = op2.e.sym; + + if (sym) { + /* Symbolic address - emit relocation */ + offset = 0; + if (token == TOK_ASM_cbz) { + gen_cbz(rt, offset, is_64bit); + greloca(cur_text_section, sym, ind - 4, R_AARCH64_CONDBR19, 0); + } else { + gen_cbnz(rt, offset, is_64bit); + greloca(cur_text_section, sym, ind - 4, R_AARCH64_CONDBR19, 0); + } + } else { + offset = (int32_t)op2.e.v - ind; + if (token == TOK_ASM_cbz) + gen_cbz(rt, offset, is_64bit); + else + gen_cbnz(rt, offset, is_64bit); + } +} + +/* Handle MOVZ/MOVN/MOVK */ +static void asm_move_wide(TCCState *s1, int token) +{ + Operand op1, op2; + int rd, is_64bit = 1; + uint16_t imm; + int shift = 0; + + parse_operand(s1, &op1); + if (tok == ',') next(); + parse_operand(s1, &op2); + + rd = op1.reg; + is_64bit = (op1.reg_type & REG_X); + imm = op2.e.v & 0xFFFF; + + if (tok == ',') { + next(); + if (tok == TOK_ASM_lsl) { + next(); + if (tok == '#') next(); + asm_expr(s1, &op2.e); + shift = (int)op2.e.v / 16; + } + } + + switch (token) { + case TOK_ASM_movz: + gen_movz(rd, imm, shift, is_64bit); + break; + case TOK_ASM_movn: + gen_movn(rd, imm, shift, is_64bit); + break; + case TOK_ASM_movk: + gen_movk(rd, imm, shift, is_64bit); + break; + default: + tcc_error("unknown move wide instruction"); + } +} + +/* Main assembler opcode dispatcher */ ST_FUNC void asm_opcode(TCCState *s1, int opcode) { - asm_error(); + switch (opcode) { + case TOK_ASM_add: + case TOK_ASM_adds: + case TOK_ASM_sub: + case TOK_ASM_subs: + case TOK_ASM_and: + case TOK_ASM_ands: + case TOK_ASM_orr: + case TOK_ASM_eor: + case TOK_ASM_mul: + case TOK_ASM_muls: + asm_data_proc(s1, opcode); + break; + + case TOK_ASM_mov: + /* mov is handled separately - it's ORR with zero register */ + asm_mov(s1); + break; + + case TOK_ASM_lsl: + case TOK_ASM_lsr: + case TOK_ASM_asr: + case TOK_ASM_ror: + asm_shift(s1, opcode); + break; + + case TOK_ASM_ldr: + case TOK_ASM_ldrb: + case TOK_ASM_ldrh: + case TOK_ASM_str: + case TOK_ASM_strb: + case TOK_ASM_strh: + asm_ldst(s1, opcode); + break; + + case TOK_ASM_b: + case TOK_ASM_bl: + case TOK_ASM_br: + case TOK_ASM_blr: + case TOK_ASM_ret: + case TOK_ASM_b_eq: + case TOK_ASM_b_ne: + case TOK_ASM_b_cs: + case TOK_ASM_b_cc: + case TOK_ASM_b_mi: + case TOK_ASM_b_pl: + case TOK_ASM_b_vs: + case TOK_ASM_b_vc: + case TOK_ASM_b_hi: + case TOK_ASM_b_ls: + case TOK_ASM_b_ge: + case TOK_ASM_b_lt: + case TOK_ASM_b_gt: + case TOK_ASM_b_le: + asm_branch(s1, opcode); + break; + + case TOK_ASM_cbz: + case TOK_ASM_cbnz: + asm_cb(s1, opcode); + break; + + case TOK_ASM_movz: + case TOK_ASM_movn: + case TOK_ASM_movk: + asm_move_wide(s1, opcode); + break; + + case TOK_ASM_isb: + case TOK_ASM_dsb: + case TOK_ASM_dmb: + asm_barrier(s1, opcode); + break; + + case TOK_ASM_nop: + gen_nop(); + break; + + default: + tcc_error("ARM64 instruction '%s' not implemented", + get_tok_str(opcode, NULL)); + break; + } } +/* Substitute assembler operand */ ST_FUNC void subst_asm_operand(CString *add_str, SValue *sv, int modifier) { - asm_error(); + int r, reg, size, val; + + r = sv->r; + if ((r & VT_VALMASK) == VT_CONST) { + if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' && + modifier != 'P') + cstr_ccat(add_str, '#'); + if (r & VT_SYM) { + const char *name = get_tok_str(sv->sym->v, NULL); + if (sv->sym->v >= SYM_FIRST_ANOM) { + get_asm_sym(tok_alloc(name, strlen(name))->tok, sv->sym); + } + if (tcc_state->leading_underscore) + cstr_ccat(add_str, '_'); + cstr_cat(add_str, name, -1); + if ((uint32_t) sv->c.i == 0) + goto no_offset; + cstr_ccat(add_str, '+'); + } + val = sv->c.i; + if (modifier == 'n') + val = -val; + cstr_printf(add_str, "%d", (int) sv->c.i); + no_offset:; + } else if ((r & VT_VALMASK) == VT_LOCAL) { + cstr_printf(add_str, "[x29,#%d]", (int) sv->c.i); + } else if (r & VT_LVAL) { + reg = r & VT_VALMASK; + if (reg >= VT_CONST) + tcc_internal_error(""); + cstr_printf(add_str, "[x%d]", reg); + } else { + /* register case */ + reg = r & VT_VALMASK; + if (reg >= VT_CONST) + tcc_internal_error(""); + + /* choose register operand size */ + if ((sv->type.t & VT_BTYPE) == VT_BYTE || + (sv->type.t & VT_BTYPE) == VT_BOOL) + size = 1; + else if ((sv->type.t & VT_BTYPE) == VT_SHORT) + size = 2; + else + size = 8; + + if (modifier == 'b') { + size = 1; + } else if (modifier == 'w') { + size = 2; + } else if (modifier == 'k') { + size = 4; + } else if (modifier == 'q') { + size = 8; + } + + if (size <= 4) { + cstr_printf(add_str, "w%d", reg); + } else { + cstr_printf(add_str, "x%d", reg); + } + } } -/* generate prolog and epilog code for asm statement */ +/* Generate code for inline asm - ARM64 inline asm with constraints not yet fully implemented */ ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands, - int nb_outputs, int is_output, - uint8_t *clobber_regs, - int out_reg) + int nb_outputs, int is_output, + uint8_t *clobber_regs, + int out_reg) { + /* For now, just handle clobber registers by marking them as volatile */ + /* TODO: Implement full ARM64 inline asm support with register allocation */ + if (nb_operands > 0 || out_reg > 0) { + tcc_error("ARM64 inline asm with operands is not implemented"); + } + gen_nop(); } +/* Compute constraints - ARM64 not yet fully implemented */ ST_FUNC void asm_compute_constraints(ASMOperand *operands, - int nb_operands, int nb_outputs, - const uint8_t *clobber_regs, - int *pout_reg) + int nb_operands, int nb_outputs, + const uint8_t *clobber_regs, + int *pout_reg) { + /* TODO: Implement ARM64 constraint computation */ + if (pout_reg) + *pout_reg = 0; } +/* Handle clobber list */ ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str) { - asm_error(); + int reg; + TokenSym *ts; + + if (!strcmp(str, "memory") || !strcmp(str, "cc") || !strcmp(str, "flags")) + return; + ts = tok_alloc(str, strlen(str)); + reg = arm64_parse_regvar(ts->tok); + if (reg == -1) { + tcc_error("invalid clobber register '%s'", str); + } + clobber_regs[reg] = 1; } -ST_FUNC int asm_parse_regvar (int t) +/* Parse register variable - this is the ST_FUNC that tcc.h expects */ +ST_FUNC int asm_parse_regvar(int t) { - asm_error(); - return -1; + return arm64_parse_regvar(t); } /*************************************************************/ diff --git a/arm64-gen.c b/arm64-gen.c index 6997082c..d73e9a1a 100644 --- a/arm64-gen.c +++ b/arm64-gen.c @@ -12,18 +12,29 @@ #ifdef TARGET_DEFS_ONLY // Number of registers available to allocator: +#ifdef TCC_TARGET_PE +#define NB_REGS 27 // x0-x17, x30, v0-v7 (x18 reserved on Windows) +#define TREG_R(x) (x) // x = 0..17 +#define TREG_R30 18 +#define TREG_F(x) (x + 19) // x = 0..7 +#define RC_INT (1 << 0) +#define RC_FLOAT (1 << 1) +#define RC_R(x) (1 << (2 + (x))) // x = 0..17 +#define RC_R30 (1 << 20) +#define RC_F(x) (1 << (21 + (x))) // x = 0..7 +#else #define NB_REGS 28 // x0-x18, x30, v0-v7 - #define TREG_R(x) (x) // x = 0..18 #define TREG_R30 19 #define TREG_F(x) (x + 20) // x = 0..7 - -// Register classes sorted from more general to more precise: #define RC_INT (1 << 0) #define RC_FLOAT (1 << 1) #define RC_R(x) (1 << (2 + (x))) // x = 0..18 #define RC_R30 (1 << 21) #define RC_F(x) (1 << (22 + (x))) // x = 0..7 +#endif + +// Register classes sorted from more general to more precise: #define RC_IRET (RC_R(0)) // int return register class #define RC_FRET (RC_F(0)) // float return register class @@ -38,7 +49,7 @@ #define MAX_ALIGN 16 -#ifndef TCC_TARGET_MACHO +#if !defined(TCC_TARGET_MACHO) && !defined(TCC_TARGET_PE) #define CHAR_IS_UNSIGNED #endif @@ -79,7 +90,9 @@ ST_DATA const int reg_classes[NB_REGS] = { RC_INT | RC_R(15), RC_INT | RC_R(16), RC_INT | RC_R(17), +#ifndef TCC_TARGET_PE RC_INT | RC_R(18), +#endif RC_R30, // not in RC_INT as we make special use of x30 RC_FLOAT | RC_F(0), RC_FLOAT | RC_F(1), @@ -460,10 +473,18 @@ static void arm64_strv(int sz_, int dst, int bas, uint64_t off) static void arm64_sym(int r, Sym *sym, unsigned long addend) { +#ifdef TCC_TARGET_PE + /* PE links symbol addresses directly; there is no ELF-style GOT here. */ + greloca(cur_text_section, sym, ind, R_AARCH64_ADR_PREL_PG_HI21, 0); + o(0x90000000 | r); // adrp xr, #sym + greloca(cur_text_section, sym, ind, R_AARCH64_ADD_ABS_LO12_NC, 0); + o(0x91000000 | r | (r << 5)); // add xr, xr, #sym +#else greloca(cur_text_section, sym, ind, R_AARCH64_ADR_GOT_PAGE, 0); o(0x90000000 | r); // adrp xr, #sym greloca(cur_text_section, sym, ind, R_AARCH64_LD64_GOT_LO12_NC, 0); o(0xf9400000 | r | (r << 5)); // ld xr,[xr, #sym] +#endif if (addend) { // add xr, xr, #addend if (addend & 0xffful) @@ -722,10 +743,7 @@ static void gen_bounds_epilog(void) if (offset_modified) { saved_ind = ind; ind = func_bound_ind; - greloca(cur_text_section, sym_data, ind, R_AARCH64_ADR_GOT_PAGE, 0); - o(0x90000000 | 0); // adrp x0, #sym_data - greloca(cur_text_section, sym_data, ind, R_AARCH64_LD64_GOT_LO12_NC, 0); - o(0xf9400000 | 0 | (0 << 5)); // ld x0,[x0, #sym_data] + arm64_sym(0, sym_data, 0); gen_bounds_call(TOK___bound_local_new); ind = saved_ind; } @@ -733,10 +751,7 @@ static void gen_bounds_epilog(void) /* generate bound check local freeing */ o(0xa9bf07e0); /* stp x0, x1, [sp, #-16]! */ o(0x3c9f0fe0); /* str q0, [sp, #-16]! */ - greloca(cur_text_section, sym_data, ind, R_AARCH64_ADR_GOT_PAGE, 0); - o(0x90000000 | 0); // adrp x0, #sym_data - greloca(cur_text_section, sym_data, ind, R_AARCH64_LD64_GOT_LO12_NC, 0); - o(0xf9400000 | 0 | (0 << 5)); // ld x0,[x0, #sym_data] + arm64_sym(0, sym_data, 0); gen_bounds_call(TOK___bound_local_delete); o(0x3cc107e0); /* ldr q0, [sp], #16 */ o(0xa8c107e0); /* ldp x0, x1, [sp], #16 */ @@ -745,6 +760,8 @@ static void gen_bounds_epilog(void) static int arm64_hfa_aux(CType *type, int *fsize, int num) { + if (!type) + return -1; if (is_float(type->t)) { int a, n = type_size(type, &a); if (num >= 4 || (*fsize && *fsize != n)) @@ -753,13 +770,27 @@ static int arm64_hfa_aux(CType *type, int *fsize, int num) return num + 1; } else if ((type->t & VT_BTYPE) == VT_STRUCT) { - int is_struct = 0; // rather than union + int is_struct = 1; /* assume struct, check if union */ Sym *field; + if (!type->ref) + return -1; + /* A union has all fields at offset 0, a struct has increasing offsets */ for (field = type->ref->next; field; field = field->next) - if (field->c) { + if (field->c != 0) { is_struct = 1; break; } + /* If all fields are at offset 0 and there's more than one field, it's a union */ + if (type->ref->next && type->ref->next->next && !type->ref->next->c) { + /* Check if all fields are at offset 0 (union) */ + int all_zero = 1; + for (field = type->ref->next; field; field = field->next) + if (field->c != 0) { + all_zero = 0; + break; + } + is_struct = !all_zero; + } if (is_struct) { int num0 = num; for (field = type->ref->next; field; field = field->next) { @@ -788,6 +819,8 @@ static int arm64_hfa_aux(CType *type, int *fsize, int num) } else if ((type->t & VT_ARRAY) && ((type->t & VT_BTYPE) != VT_PTR)) { int num1; + if (!type->ref || (type->t & VT_VLA)) + return -1; if (!type->ref->c) return num; num1 = arm64_hfa_aux(&type->ref->type, fsize, num); @@ -803,10 +836,16 @@ static int arm64_hfa_aux(CType *type, int *fsize, int num) static int arm64_hfa(CType *type, unsigned *fsize) { + int n, sz; + + if (!type) + return 0; if ((type->t & VT_BTYPE) == VT_STRUCT || ((type->t & VT_ARRAY) && ((type->t & VT_BTYPE) != VT_PTR))) { - int sz = 0; - int n = arm64_hfa_aux(type, &sz, 0); + if (!type->ref || (type->t & VT_VLA)) + return 0; + sz = 0; + n = arm64_hfa_aux(type, &sz, 0); if (0 < n && n <= 4) { if (fsize) *fsize = sz; @@ -825,6 +864,7 @@ static unsigned long arm64_pcs_aux(int variadic, int n, CType **type, unsigned l for (i = 0; i < n; i++) { int hfa = arm64_hfa(type[i], 0); + int win_vararg_float = 0; int size, align; if ((type[i]->t & VT_ARRAY) || @@ -838,6 +878,15 @@ static unsigned long arm64_pcs_aux(int variadic, int n, CType **type, unsigned l nx = 8; nv = 8; } +#elif defined(TCC_TARGET_PE) + if (variadic && i >= variadic && (hfa || is_float(type[i]->t))) { + hfa = 0; + if (is_float(type[i]->t)) { + win_vararg_float = 1; + size = 8; + align = 8; + } + } #endif if (hfa) // B.2 @@ -858,7 +907,7 @@ static unsigned long arm64_pcs_aux(int variadic, int n, CType **type, unsigned l size = (size + 7) & ~7; // C.1 - if (is_float(type[i]->t) && nv < 8) { + if (!win_vararg_float && is_float(type[i]->t) && nv < 8) { a[i] = 16 + (nv++ << 1); continue; } @@ -887,7 +936,7 @@ static unsigned long arm64_pcs_aux(int variadic, int n, CType **type, unsigned l size = 8; // C.6 - if (hfa || is_float(type[i]->t)) { + if (!win_vararg_float && (hfa || is_float(type[i]->t))) { a[i] = ns; ns += size; continue; @@ -992,6 +1041,32 @@ static int n_func_args(CType *type) return n_args; } +static void arm64_sub_sp(uint64_t diff) +{ + if (!diff) + return; +#ifdef TCC_TARGET_PE + if (diff >= 4096) { + Sym *sym = external_helper_sym(TOK___chkstk); + + arm64_movimm(15, diff >> 4); + greloca(cur_text_section, sym, ind, R_AARCH64_CALL26, 0); + o(0x94000000); // bl __chkstk + o(0xcb2f73ff); // sub sp,sp,x15,lsl #4 + return; + } +#endif + if (!(diff >> 24)) { + if (diff & 0xfff) + o(0xd10003ff | (diff & 0xfff) << 10); // sub sp,sp,#low12 + if (diff >> 12) + o(0xd14003ff | (diff >> 12) << 10); // sub sp,sp,#high12,lsl #12 + } else { + arm64_movimm(16, diff); + o(0xcb3063ff); // sub sp,sp,x16 + } +} + ST_FUNC void gfunc_call(int nb_args) { CType *return_type; @@ -999,8 +1074,10 @@ ST_FUNC void gfunc_call(int nb_args) unsigned long *a, *a1; unsigned long stack; int i; - int variadic = (vtop[-nb_args].type.ref->f.func_type == FUNC_ELLIPSIS); - int var_nb_arg = n_func_args(&vtop[-nb_args].type); + int func_type = vtop[-nb_args].type.ref->f.func_type; + int variadic = (func_type == FUNC_ELLIPSIS); + int old_style = (func_type == FUNC_OLD); + int var_nb_arg = variadic ? n_func_args(&vtop[-nb_args].type) : 0; save_regs(nb_args + 1); @@ -1021,7 +1098,7 @@ ST_FUNC void gfunc_call(int nb_args) for (i = 0; i < nb_args; i++) t[nb_args - i] = &vtop[-i].type; - stack = arm64_pcs(variadic ? var_nb_arg : 0, nb_args, t, a); + stack = arm64_pcs((variadic || old_style) ? var_nb_arg : 0, nb_args, t, a); // Allocate space for structs replaced by pointer: for (i = nb_args; i; i--) @@ -1038,10 +1115,7 @@ ST_FUNC void gfunc_call(int nb_args) if (stack >= 0x1000000) // 16Mb tcc_error("stack size too big %lu", stack); - if (stack & 0xfff) - o(0xd10003ff | (stack & 0xfff) << 10); // sub sp,sp,#(n) - if (stack >> 12) - o(0xd14003ff | (stack >> 12) << 10); + arm64_sub_sp(stack); // First pass: set all values on stack for (i = nb_args; i; i--) { @@ -1089,7 +1163,14 @@ ST_FUNC void gfunc_call(int nb_args) for (i = nb_args; i; i--, vtop--) { if (a[i] < 16 && !(a[i] & 1)) { // value in general-purpose registers - if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) { + if ((variadic || old_style) && i > var_nb_arg && is_float(vtop->type.t)) { + gv(RC_FLOAT); + if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) + o(0x9e660000 | intr(a[i] / 2) | fltr(vtop->r) << 5); // fmov xN,dM + else + o(0x1e260000 | intr(a[i] / 2) | fltr(vtop->r) << 5); // fmov wN,sM + } + else if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) { int align, size = type_size(&vtop->type, &align); if (size) { vtop->type.t = VT_PTR; @@ -1108,14 +1189,20 @@ ST_FUNC void gfunc_call(int nb_args) // value in floating-point registers if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) { uint32_t j, sz, n = arm64_hfa(&vtop->type, &sz); - vtop->type.t = VT_PTR; - gaddrof(); - gv(RC_R30); - for (j = 0; j < n; j++) - o(0x3d4003c0 | - (sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 | - (a[i] / 2 - 8 + j) | - j << 10); // ldr ([sdq])(*),[x30,#(j * sz)] + if (n > 0) { + /* HFA struct - load from memory into float registers */ + vtop->type.t = VT_PTR; + gaddrof(); + gv(RC_R30); + for (j = 0; j < n; j++) + o(0x3d4003c0 | + (sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 | + (a[i] / 2 - 8 + j) | + j << 10); // ldr ([sdq])(*),[x30,#(j * sz)] + } else { + /* Non-HFA struct in float register slot - shouldn't happen */ + gv(RC_F(a[i] / 2 - 8)); + } } else gv(RC_F(a[i] / 2 - 8)); @@ -1157,12 +1244,13 @@ ST_FUNC void gfunc_call(int nb_args) } else if (a[0] == 16) { + /* HFA struct return - store from float registers to the address in x8 */ uint32_t j, sz, n = arm64_hfa(return_type, &sz); for (j = 0; j < n; j++) o(0x3d000100 | (sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 | - (a[i] / 2 - 8 + j) | - j << 10); // str ([sdq])(*),[x8,#(j * sz)] + (fltr(REG_FRET) + j) | + j << 10); // str ([sdq])(j),[x8,#(j * sz)] } } } @@ -1177,11 +1265,23 @@ static int arm64_func_va_list_gr_offs; static int arm64_func_va_list_vr_offs; static int arm64_func_sub_sp_offset; +#define ARM64_FUNC_STACK_SETUP_SLOTS 6 + +#ifdef TCC_TARGET_PE +static unsigned long arm64_pe_param_off(unsigned long a) +{ + return a < 16 ? 160 + a / 2 * 8 : + a < 32 ? 16 + (a - 16) / 2 * 16 : + 224 + ((a - 32) >> 1 << 1); +} +#endif + ST_FUNC void gfunc_prolog(Sym *func_sym) { CType *func_type = &func_sym->type; int n = 0; int i = 0; + int pcs_n; Sym *sym; CType **t; unsigned long *a; @@ -1195,13 +1295,26 @@ ST_FUNC void gfunc_prolog(Sym *func_sym) for (sym = func_type->ref; sym; sym = sym->next) ++n; - t = n ? tcc_malloc(n * sizeof(*t)) : NULL; - a = n ? tcc_malloc(n * sizeof(*a)) : NULL; + pcs_n = n - 1; + t = n || variadic ? tcc_malloc((n + variadic) * sizeof(*t)) : NULL; + a = n || variadic ? tcc_malloc((n + variadic) * sizeof(*a)) : NULL; for (sym = func_type->ref; sym; sym = sym->next) t[i++] = &sym->type; +#ifdef TCC_TARGET_PE + if (variadic) { + t[i++] = &int_type; + ++pcs_n; + } +#endif - arm64_func_va_list_stack = arm64_pcs(variadic ? var_nb_arg : 0, n - 1, t, a); + arm64_func_va_list_stack = arm64_pcs(variadic ? var_nb_arg : 0, + pcs_n, t, a); + +#ifdef TCC_TARGET_PE + if (variadic) + arm64_func_va_list_stack = arm64_pe_param_off(a[n]); +#endif #if !defined(TCC_TARGET_MACHO) if (variadic) { @@ -1264,11 +1377,11 @@ ST_FUNC void gfunc_prolog(Sym *func_sym) // HFAs of float and double need to be written differently: if (16 <= a[i] && a[i] < 32 && (sym->type.t & VT_BTYPE) == VT_STRUCT) { uint32_t j, sz, k = arm64_hfa(&sym->type, &sz); - if (sz < 16) + if (k > 0 && sz < 16) for (j = 0; j < k; j++) { o(0x3d0003e0 | -(sz & 8) << 27 | (sz & 4) << 29 | ((a[i] - 16) / 2 + j) | (off / sz + j) << 10); - // str ([sdq])(*),[sp,#(j * sz)] + // str ([sdq])(j),[sp,#(j * sz)] } } } @@ -1278,9 +1391,9 @@ ST_FUNC void gfunc_prolog(Sym *func_sym) o(0x910003fd); // mov x29,sp arm64_func_sub_sp_offset = ind; - // In gfunc_epilog these will be replaced with code to decrement SP: - o(0xd503201f); // nop - o(0xd503201f); // nop + /* In gfunc_epilog these will be replaced with stack setup code. */ + for (i = 0; i < ARM64_FUNC_STACK_SETUP_SLOTS; ++i) + o(0xd503201f); // nop loc = 0; #ifdef CONFIG_TCC_BCHECK if (tcc_state->do_bounds_check) @@ -1295,6 +1408,16 @@ ST_FUNC void gen_va_start(void) gaddrof(); r = intr(gv(RC_INT)); +#ifdef TCC_TARGET_PE + if (arm64_func_va_list_stack) { + arm64_movimm(30, arm64_func_va_list_stack); + o(0x8b1e03be); // add x30,x29,x30 + } else + o(0x910283be); // add x30,x29,#160 + o(0xf900001e | r << 5); // str x30,[x(r)] + --vtop; + return; +#else if (arm64_func_va_list_stack) { //xx could use add (immediate) here arm64_movimm(30, arm64_func_va_list_stack + 224); @@ -1324,6 +1447,7 @@ ST_FUNC void gen_va_start(void) #endif --vtop; +#endif } ST_FUNC void gen_va_arg(CType *t) @@ -1332,6 +1456,51 @@ ST_FUNC void gen_va_arg(CType *t) unsigned fsize, hfa = arm64_hfa(t, &fsize); uint32_t r0, r1; +#ifdef TCC_TARGET_PE + uint32_t slot = size; + int indirect = 0; + + if ((t->t & VT_BTYPE) == VT_STRUCT) { + if (size > 16) { + slot = 8; + indirect = 1; + } else { + slot = (size + 7) & -8; + } + } else if (slot > 16) { + slot = 8; + indirect = 1; + } else if (slot < 8) { + slot = 8; + } + + gaddrof(); + r0 = intr(gv(RC_INT)); + r1 = get_reg(RC_INT); + vtop[0].r = r1 | VT_LVAL; + r1 = intr(r1); + + o(0xf9400000 | r1 | r0 << 5); // ldr x(r1),[x(r0)] // ap + if (slot) { + if (slot == 16) { + o(0x910363be); // add x30,x29,#216 + o(0xeb1e003f | r1 << 5); // cmp x(r1),x30 + o(0x54000041); // b.ne .+8 + o(0x910383a0 | r1 | 29 << 5); // add x(r1),x29,#224 + } + if (align == 16) { + o(0x91003c00 | r1 | r1 << 5); // add x(r1),x(r1),#15 + o(0x927cec00 | r1 | r1 << 5); // and x(r1),x(r1),#-16 + } + o(0x9100001e | r1 << 5 | slot << 10); // add x30,x(r1),#(slot) + o(0xf900001e | r0 << 5); // str x30,[x(r0)] // ap += slot + } + + if (indirect) + o(0xf9400000 | r1 | r1 << 5); // ldr x(r1),[x(r1)] + return; +#endif + if (is_float(t->t)) { hfa = 1; fsize = size; @@ -1451,13 +1620,14 @@ ST_FUNC void gfunc_return(CType *func_type) } case 16: if ((func_type->t & VT_BTYPE) == VT_STRUCT) { - uint32_t j, sz, n = arm64_hfa(&vtop->type, &sz); + /* HFA struct return - load from the address on vtop into float registers */ + uint32_t j, sz, n = arm64_hfa(func_type, &sz); gaddrof(); gv(RC_R(0)); for (j = 0; j < n; j++) o(0x3d400000 | (sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 | - j | j << 10); // ldr ([sdq])(*),[x0,#(j * sz)] + (fltr(REG_FRET) + j) | j << 10); // ldr ([sdq])(j),[x0,#(j * sz)] } else gv(RC_FRET); @@ -1470,44 +1640,34 @@ ST_FUNC void gfunc_return(CType *func_type) ST_FUNC void gfunc_epilog(void) { + int i; #ifdef CONFIG_TCC_BCHECK if (tcc_state->do_bounds_check) gen_bounds_epilog(); #endif if (loc) { - // Insert instructions to subtract size of stack frame from SP. - unsigned char *ptr = cur_text_section->data + arm64_func_sub_sp_offset; + /* Insert instructions to subtract the stack frame from SP. */ + addr_t saved_ind = ind; + addr_t patch_end = arm64_func_sub_sp_offset + ARM64_FUNC_STACK_SETUP_SLOTS * 4; uint64_t diff = (-loc + 15) & ~15; - if (!(diff >> 24)) { - if (diff & 0xfff) // sub sp,sp,#(diff & 0xfff) - write32le(ptr, 0xd10003ff | (diff & 0xfff) << 10); - if (diff >> 12) // sub sp,sp,#(diff >> 12),lsl #12 - write32le(ptr + 4, 0xd14003ff | (diff >> 12) << 10); - } - else { - // In this case we may subtract more than necessary, - // but always less than 17/16 of what we were aiming for. - int i = 0; - int j = 0; - while (diff >> 20) { - diff = (diff + 0xffff) >> 16; - ++i; - } - while (diff >> 16) { - diff = (diff + 1) >> 1; - ++j; - } - write32le(ptr, 0xd2800010 | diff << 5 | i << 21); - // mov x16,#(diff),lsl #(16 * i) - write32le(ptr + 4, 0xcb3063ff | j << 10); - // sub sp,sp,x16,lsl #(j) - } + ind = arm64_func_sub_sp_offset; + arm64_sub_sp(diff); + for (i = ind; i < patch_end; i += 4) + write32le(cur_text_section->data + i, 0xd503201f); // nop + ind = saved_ind; } o(0x910003bf); // mov sp,x29 o(0xa8ce7bfd); // ldp x29,x30,[sp],#224 o(0xd65f03c0); // ret + +#ifdef TCC_TARGET_PE + { + unsigned start = arm64_func_sub_sp_offset - 8; + pe_add_unwind_data(start, ind, -loc); + } +#endif } ST_FUNC void gen_fill_nops(int bytes) @@ -2096,10 +2256,7 @@ ST_FUNC void gen_increment_tcov (SValue *sv) vpushv(sv); vtop->r = r1 = get_reg(RC_INT); r2 = get_reg(RC_INT); - greloca(cur_text_section, sv->sym, ind, R_AARCH64_ADR_GOT_PAGE, 0); - o(0x90000000 | r1); // adrp r1, #sym - greloca(cur_text_section, sv->sym, ind, R_AARCH64_LD64_GOT_LO12_NC, 0); - o(0xf9400000 | r1 | (r1 << 5)); // ld xr,[xr, #sym] + arm64_sym(r1, sv->sym, 0); o(0xf9400000 | (intr(r1)<<5) | intr(r2)); // ldr r2, [r1] o(0x91000400 | (intr(r2)<<5) | intr(r2)); // add r2, r2, #1 o(0xf9000000 | (intr(r1)<<5) | intr(r2)); // str r2, [r1] diff --git a/arm64-link.c b/arm64-link.c index cfdd95ea..dc57611e 100644 --- a/arm64-link.c +++ b/arm64-link.c @@ -50,6 +50,8 @@ ST_FUNC int code_reloc (int reloc_type) case R_AARCH64_JUMP26: case R_AARCH64_CALL26: case R_AARCH64_JUMP_SLOT: + case R_AARCH64_CONDBR19: + case R_AARCH64_TSTBR14: return 1; } return -1; @@ -61,7 +63,7 @@ ST_FUNC int code_reloc (int reloc_type) ST_FUNC int gotplt_entry_type (int reloc_type) { switch (reloc_type) { - case R_AARCH64_PREL32: + case R_AARCH64_PREL32: case R_AARCH64_MOVW_UABS_G0_NC: case R_AARCH64_MOVW_UABS_G1_NC: case R_AARCH64_MOVW_UABS_G2_NC: @@ -76,6 +78,8 @@ ST_FUNC int gotplt_entry_type (int reloc_type) case R_AARCH64_GLOB_DAT: case R_AARCH64_JUMP_SLOT: case R_AARCH64_COPY: + case R_AARCH64_CONDBR19: + case R_AARCH64_TSTBR14: return NO_GOTPLT_ENTRY; case R_AARCH64_ABS32: @@ -238,8 +242,23 @@ ST_FUNC void relocate(TCCState *s1, ElfW_Rel *rel, int type, unsigned char *ptr, return; case R_AARCH64_ADR_PREL_PG_HI21: { uint64_t off = (val >> 12) - (addr >> 12); +#ifdef TCC_TARGET_PE + /* Weak undefined symbols resolve to address 0 on PE. ADRP cannot + encode that from the default 64-bit image base, so materialize + zero directly and let the paired ADD handle any low addend. */ + if ((off + ((uint64_t)1 << 20)) >> 21) { + ElfW(Sym) *sym = &((ElfW(Sym) *)symtab_section->data)[sym_index]; + if (sym->st_shndx == SHN_UNDEF + && ELFW(ST_BIND)(sym->st_info) == STB_WEAK) { + write32le(ptr, 0xd2800000 | (read32le(ptr) & 0x1f)); + return; + } + tcc_error_noabort("R_AARCH64_ADR_PREL_PG_HI21 relocation failed"); + } +#else if ((off + ((uint64_t)1 << 20)) >> 21) tcc_error_noabort("R_AARCH64_ADR_PREL_PG_HI21 relocation failed"); +#endif write32le(ptr, ((read32le(ptr) & 0x9f00001f) | (off & 0x1ffffc) << 3 | (off & 3) << 29)); return; @@ -265,19 +284,58 @@ ST_FUNC void relocate(TCCState *s1, ElfW_Rel *rel, int type, unsigned char *ptr, write32le(ptr, ((read32le(ptr) & 0xffc003ff) | (val & 0xff0) << 6)); return; + case R_AARCH64_CONDBR19: + /* Conditional branch: 19-bit signed offset, bits 23:5 */ +#ifdef DEBUG_RELOC + printf ("reloc %d @ 0x%lx: val=0x%lx name=%s\n", type, addr, val, + (char *) symtab_section->link->data + sym->st_name); +#endif + if (((val - addr) + ((uint64_t)1 << 20)) & ~(uint64_t)0x1ffffc) + tcc_error_noabort("R_AARCH64_CONDBR19 relocation failed" + " (val=%lx, addr=%lx)", (long)val, (long)addr); + write32le(ptr, ((read32le(ptr) & 0xff00001f) | + (((val - addr) >> 2 & 0x7ffff) << 5))); + return; + case R_AARCH64_TSTBR14: + /* Test and branch: 14-bit signed offset, bits 20:5 */ +#ifdef DEBUG_RELOC + printf ("reloc %d @ 0x%lx: val=0x%lx name=%s\n", type, addr, val, + (char *) symtab_section->link->data + sym->st_name); +#endif + if (((val - addr) + ((uint64_t)1 << 15)) & ~(uint64_t)0xfffc) + tcc_error_noabort("R_AARCH64_TSTBR14 relocation failed" + " (val=%lx, addr=%lx)", (long)val, (long)addr); + write32le(ptr, ((read32le(ptr) & 0xfff8001f) | + (((val - addr) >> 2 & 0x3fff) << 5))); + return; case R_AARCH64_JUMP26: case R_AARCH64_CALL26: + { + const char *name; #ifdef DEBUG_RELOC - printf ("reloc %d @ 0x%lx: val=0x%lx name=%s\n", type, addr, val, - (char *) symtab_section->link->data + sym->st_name); + printf ("reloc %d @ 0x%lx: val=0x%lx name=%s\n", type, addr, val, + (char *) symtab_section->link->data + sym->st_name); #endif - if (((val - addr) + ((uint64_t)1 << 27)) & ~(uint64_t)0xffffffc) + if (((val - addr) + ((uint64_t)1 << 27)) & ~(uint64_t)0xffffffc) { +#ifdef TCC_TARGET_PE + ElfW(Sym) *sym = &((ElfW(Sym) *)symtab_section->data)[sym_index]; + if (sym->st_shndx == SHN_UNDEF + && ELFW(ST_BIND)(sym->st_info) == STB_WEAK) { + write32le(ptr, 0xd503201f); /* nop */ + return; + } +#endif + name = (char *)symtab_section->link->data + + ((ElfW(Sym) *)symtab_section->data)[sym_index].st_name; tcc_error_noabort("R_AARCH64_(JUMP|CALL)26 relocation failed" - " (val=%lx, addr=%lx)", (long)val, (long)addr); + " for '%s' (val=%lx, addr=%lx)", + name, (long)val, (long)addr); + } write32le(ptr, (0x14000000 | (uint32_t)(type == R_AARCH64_CALL26) << 31 | ((val - addr) >> 2 & 0x3ffffff))); return; + } case R_AARCH64_ADR_GOT_PAGE: { uint64_t off = (((s1->got->sh_addr + diff --git a/arm64-tok.h b/arm64-tok.h new file mode 100644 index 00000000..9100290d --- /dev/null +++ b/arm64-tok.h @@ -0,0 +1,556 @@ +/* ------------------------------------------------------------------ */ +/* ARM64 (AArch64) assembler token definitions for TCC */ + +/* General purpose registers - 64-bit */ + DEF_ASM(x0) + DEF_ASM(x1) + DEF_ASM(x2) + DEF_ASM(x3) + DEF_ASM(x4) + DEF_ASM(x5) + DEF_ASM(x6) + DEF_ASM(x7) + DEF_ASM(x8) + DEF_ASM(x9) + DEF_ASM(x10) + DEF_ASM(x11) + DEF_ASM(x12) + DEF_ASM(x13) + DEF_ASM(x14) + DEF_ASM(x15) + DEF_ASM(x16) + DEF_ASM(x17) + DEF_ASM(x18) + DEF_ASM(x19) + DEF_ASM(x20) + DEF_ASM(x21) + DEF_ASM(x22) + DEF_ASM(x23) + DEF_ASM(x24) + DEF_ASM(x25) + DEF_ASM(x26) + DEF_ASM(x27) + DEF_ASM(x28) + DEF_ASM(x29) + DEF_ASM(x30) + +/* General purpose registers - 32-bit */ + DEF_ASM(w0) + DEF_ASM(w1) + DEF_ASM(w2) + DEF_ASM(w3) + DEF_ASM(w4) + DEF_ASM(w5) + DEF_ASM(w6) + DEF_ASM(w7) + DEF_ASM(w8) + DEF_ASM(w9) + DEF_ASM(w10) + DEF_ASM(w11) + DEF_ASM(w12) + DEF_ASM(w13) + DEF_ASM(w14) + DEF_ASM(w15) + DEF_ASM(w16) + DEF_ASM(w17) + DEF_ASM(w18) + DEF_ASM(w19) + DEF_ASM(w20) + DEF_ASM(w21) + DEF_ASM(w22) + DEF_ASM(w23) + DEF_ASM(w24) + DEF_ASM(w25) + DEF_ASM(w26) + DEF_ASM(w27) + DEF_ASM(w28) + DEF_ASM(w29) + DEF_ASM(w30) + +/* Special registers */ + DEF_ASM(sp) + DEF_ASM(xzr) + DEF_ASM(wzr) + +/* SIMD/FP registers - 128-bit views */ + DEF_ASM(v0) + DEF_ASM(v1) + DEF_ASM(v2) + DEF_ASM(v3) + DEF_ASM(v4) + DEF_ASM(v5) + DEF_ASM(v6) + DEF_ASM(v7) + DEF_ASM(v8) + DEF_ASM(v9) + DEF_ASM(v10) + DEF_ASM(v11) + DEF_ASM(v12) + DEF_ASM(v13) + DEF_ASM(v14) + DEF_ASM(v15) + DEF_ASM(v16) + DEF_ASM(v17) + DEF_ASM(v18) + DEF_ASM(v19) + DEF_ASM(v20) + DEF_ASM(v21) + DEF_ASM(v22) + DEF_ASM(v23) + DEF_ASM(v24) + DEF_ASM(v25) + DEF_ASM(v26) + DEF_ASM(v27) + DEF_ASM(v28) + DEF_ASM(v29) + DEF_ASM(v30) + DEF_ASM(v31) + +/* SIMD/FP registers - 64-bit views (double) */ + DEF_ASM(d0) + DEF_ASM(d1) + DEF_ASM(d2) + DEF_ASM(d3) + DEF_ASM(d4) + DEF_ASM(d5) + DEF_ASM(d6) + DEF_ASM(d7) + DEF_ASM(d8) + DEF_ASM(d9) + DEF_ASM(d10) + DEF_ASM(d11) + DEF_ASM(d12) + DEF_ASM(d13) + DEF_ASM(d14) + DEF_ASM(d15) + DEF_ASM(d16) + DEF_ASM(d17) + DEF_ASM(d18) + DEF_ASM(d19) + DEF_ASM(d20) + DEF_ASM(d21) + DEF_ASM(d22) + DEF_ASM(d23) + DEF_ASM(d24) + DEF_ASM(d25) + DEF_ASM(d26) + DEF_ASM(d27) + DEF_ASM(d28) + DEF_ASM(d29) + DEF_ASM(d30) + DEF_ASM(d31) + +/* SIMD/FP registers - 32-bit views (single) */ + DEF_ASM(s0) + DEF_ASM(s1) + DEF_ASM(s2) + DEF_ASM(s3) + DEF_ASM(s4) + DEF_ASM(s5) + DEF_ASM(s6) + DEF_ASM(s7) + DEF_ASM(s8) + DEF_ASM(s9) + DEF_ASM(s10) + DEF_ASM(s11) + DEF_ASM(s12) + DEF_ASM(s13) + DEF_ASM(s14) + DEF_ASM(s15) + DEF_ASM(s16) + DEF_ASM(s17) + DEF_ASM(s18) + DEF_ASM(s19) + DEF_ASM(s20) + DEF_ASM(s21) + DEF_ASM(s22) + DEF_ASM(s23) + DEF_ASM(s24) + DEF_ASM(s25) + DEF_ASM(s26) + DEF_ASM(s27) + DEF_ASM(s28) + DEF_ASM(s29) + DEF_ASM(s30) + DEF_ASM(s31) + +/* SIMD/FP registers - 16-bit views (half) */ + DEF_ASM(h0) + DEF_ASM(h1) + DEF_ASM(h2) + DEF_ASM(h3) + DEF_ASM(h4) + DEF_ASM(h5) + DEF_ASM(h6) + DEF_ASM(h7) + DEF_ASM(h8) + DEF_ASM(h9) + DEF_ASM(h10) + DEF_ASM(h11) + DEF_ASM(h12) + DEF_ASM(h13) + DEF_ASM(h14) + DEF_ASM(h15) + DEF_ASM(h16) + DEF_ASM(h17) + DEF_ASM(h18) + DEF_ASM(h19) + DEF_ASM(h20) + DEF_ASM(h21) + DEF_ASM(h22) + DEF_ASM(h23) + DEF_ASM(h24) + DEF_ASM(h25) + DEF_ASM(h26) + DEF_ASM(h27) + DEF_ASM(h28) + DEF_ASM(h29) + DEF_ASM(h30) + DEF_ASM(h31) + +/* SIMD/FP registers - 8-bit views (byte) */ + DEF_ASM(b0) + DEF_ASM(b1) + DEF_ASM(b2) + DEF_ASM(b3) + DEF_ASM(b4) + DEF_ASM(b5) + DEF_ASM(b6) + DEF_ASM(b7) + DEF_ASM(b8) + DEF_ASM(b9) + DEF_ASM(b10) + DEF_ASM(b11) + DEF_ASM(b12) + DEF_ASM(b13) + DEF_ASM(b14) + DEF_ASM(b15) + DEF_ASM(b16) + DEF_ASM(b17) + DEF_ASM(b18) + DEF_ASM(b19) + DEF_ASM(b20) + DEF_ASM(b21) + DEF_ASM(b22) + DEF_ASM(b23) + DEF_ASM(b24) + DEF_ASM(b25) + DEF_ASM(b26) + DEF_ASM(b27) + DEF_ASM(b28) + DEF_ASM(b29) + DEF_ASM(b30) + DEF_ASM(b31) + +/* Condition codes */ + DEF_ASM(eq) + DEF_ASM(ne) + DEF_ASM(cs) + DEF_ASM(hs) + DEF_ASM(cc) + DEF_ASM(lo) + DEF_ASM(mi) + DEF_ASM(pl) + DEF_ASM(vs) + DEF_ASM(vc) + DEF_ASM(hi) + DEF_ASM(ls) + DEF_ASM(ge) + DEF_ASM(lt) + DEF_ASM(gt) + DEF_ASM(le) + DEF_ASM(al) + +/* Data processing - arithmetic (no condition suffixes for ARM64) */ + DEF_ASM(add) + DEF_ASM(adds) + DEF_ASM(sub) + DEF_ASM(subs) + DEF_ASM(cmn) + DEF_ASM(cmp) + DEF_ASM(neg) + DEF_ASM(negs) + DEF_ASM(adc) + DEF_ASM(adcs) + DEF_ASM(sbc) + DEF_ASM(sbcs) + DEF_ASM(ngc) + DEF_ASM(ngcs) + +/* Data processing - bitwise */ + DEF_ASM(and) + DEF_ASM(ands) + DEF_ASM(bic) + DEF_ASM(bics) + DEF_ASM(orr) + DEF_ASM(orn) + DEF_ASM(eor) + DEF_ASM(eon) + DEF_ASM(mvn) + DEF_ASM(mov) + +/* Shifts */ + DEF_ASM(lsl) + DEF_ASM(lsr) + DEF_ASM(asr) + DEF_ASM(ror) + +/* Multiply/divide */ + DEF_ASM(mul) + DEF_ASM(muls) + DEF_ASM(madd) + DEF_ASM(msub) + DEF_ASM(smaddl) + DEF_ASM(smsubl) + DEF_ASM(umaddl) + DEF_ASM(umsubl) + DEF_ASM(smulh) + DEF_ASM(umulh) + DEF_ASM(udiv) + DEF_ASM(sdiv) + +/* Moves */ + DEF_ASM(movz) + DEF_ASM(movn) + DEF_ASM(movk) + +/* Compare/test */ + DEF_ASM(tst) + DEF_ASM(teq) + +/* Branch instructions */ + DEF_ASM(b) + DEF_ASM(bl) + DEF_ASM(br) + DEF_ASM(blr) + DEF_ASM(ret) + DEF_ASM(cbz) + DEF_ASM(cbnz) + DEF_ASM(tbz) + DEF_ASM(tbnz) + +/* Conditional branches */ + DEF_ASM(beq) + DEF_ASM(bne) + DEF_ASM(bcs) + DEF_ASM(bhs) + DEF_ASM(bcc) + DEF_ASM(blo) + DEF_ASM(bmi) + DEF_ASM(bpl) + DEF_ASM(bvs) + DEF_ASM(bvc) + DEF_ASM(bhi) + DEF_ASM(bls) + DEF_ASM(bge) + DEF_ASM(blt) + DEF_ASM(bgt) + DEF_ASM(ble) + +/* Conditional select */ + DEF_ASM(csel) + DEF_ASM(csinc) + DEF_ASM(csinv) + DEF_ASM(csneg) + +/* Load/Store */ + DEF_ASM(ldr) + DEF_ASM(ldrb) + DEF_ASM(ldrh) + DEF_ASM(ldrsb) + DEF_ASM(ldrsh) + DEF_ASM(ldrsw) + DEF_ASM(str) + DEF_ASM(strb) + DEF_ASM(strh) + +/* Load/Store - pair */ + DEF_ASM(ldp) + DEF_ASM(stp) + DEF_ASM(ldpsw) + +/* Address generation */ + DEF_ASM(adr) + DEF_ASM(adrp) + +/* System instructions */ + DEF_ASM(nop) + DEF_ASM(wfi) + DEF_ASM(wfe) + DEF_ASM(sev) + DEF_ASM(sevl) + DEF_ASM(isb) + DEF_ASM(dsb) + DEF_ASM(dmb) + +/* Hints */ + DEF_ASM(yield) + DEF_ASM(clrex) + +/* Push/pop */ + DEF_ASM(push) + DEF_ASM(pop) + +/* Floating point */ + DEF_ASM(fmov) + DEF_ASM(fadd) + DEF_ASM(fsub) + DEF_ASM(fmul) + DEF_ASM(fnmul) + DEF_ASM(fdiv) + DEF_ASM(fmax) + DEF_ASM(fmin) + DEF_ASM(fmaxnm) + DEF_ASM(fminnm) + DEF_ASM(fsqrt) + DEF_ASM(fabs) + DEF_ASM(fneg) + DEF_ASM(frintn) + DEF_ASM(frintp) + DEF_ASM(frintm) + DEF_ASM(frintz) + DEF_ASM(frinta) + DEF_ASM(frintx) + DEF_ASM(frinti) + DEF_ASM(fcmp) + DEF_ASM(fcmpe) + DEF_ASM(fccmp) + DEF_ASM(fccmpe) + DEF_ASM(fcvts) + DEF_ASM(fcvtd) + DEF_ASM(fcvth) + DEF_ASM(fcvtx) + DEF_ASM(scvtf) + DEF_ASM(ucvtf) + DEF_ASM(fcvtns) + DEF_ASM(fcvtnu) + DEF_ASM(fcvtps) + DEF_ASM(fcvtpu) + +/* SIMD instructions */ + DEF_ASM(addv) + DEF_ASM(faddp) + DEF_ASM(fmaxp) + DEF_ASM(fminp) + DEF_ASM(fmaxnmp) + DEF_ASM(fminnmp) + DEF_ASM(addp) + DEF_ASM(bif) + DEF_ASM(bit) + DEF_ASM(bsl) + DEF_ASM(dup) + DEF_ASM(ext) + DEF_ASM(ins) + DEF_ASM(movi) + DEF_ASM(mvni) + DEF_ASM(not) + DEF_ASM(shl) + DEF_ASM(shll) + DEF_ASM(shll2) + DEF_ASM(sli) + DEF_ASM(sri) + DEF_ASM(sqshl) + DEF_ASM(sqshlu) + DEF_ASM(srshl) + DEF_ASM(sshll) + DEF_ASM(sshll2) + DEF_ASM(sshr) + DEF_ASM(ushll) + DEF_ASM(ushll2) + DEF_ASM(ushr) + +/* Misc */ + DEF_ASM(bfm) + DEF_ASM(sbfm) + DEF_ASM(ubfm) + DEF_ASM(extr) + DEF_ASM(crc32b) + DEF_ASM(crc32h) + DEF_ASM(crc32w) + DEF_ASM(crc32x) + DEF_ASM(crc32cb) + DEF_ASM(crc32ch) + DEF_ASM(crc32cw) + DEF_ASM(crc32cx) + DEF_ASM(rev) + DEF_ASM(rev16) + DEF_ASM(rev32) + DEF_ASM(rev64) + DEF_ASM(clz) + DEF_ASM(cls) + DEF_ASM(rbit) + +/* Exception generating */ + DEF_ASM(svc) + DEF_ASM(hvc) + DEF_ASM(smc) + DEF_ASM(brk) + DEF_ASM(hlt) + DEF_ASM(dcps1) + DEF_ASM(dcps2) + DEF_ASM(dcps3) + +/* Conditional branches */ + DEF_ASM(b_eq) + DEF_ASM(b_ne) + DEF_ASM(b_cs) + DEF_ASM(b_cc) + DEF_ASM(b_mi) + DEF_ASM(b_pl) + DEF_ASM(b_vs) + DEF_ASM(b_vc) + DEF_ASM(b_hi) + DEF_ASM(b_ls) + DEF_ASM(b_ge) + DEF_ASM(b_lt) + DEF_ASM(b_gt) + DEF_ASM(b_le) + +/* LD/ST exclusive */ + DEF_ASM(ldxr) + DEF_ASM(ldxrb) + DEF_ASM(ldxrh) + DEF_ASM(stxr) + DEF_ASM(stxrb) + DEF_ASM(stxrh) + DEF_ASM(ldaxr) + DEF_ASM(ldaxrb) + DEF_ASM(ldaxrh) + DEF_ASM(stlxr) + DEF_ASM(stlxrb) + DEF_ASM(stlxrh) + +/* LD/ST acquire-release */ + DEF_ASM(ldar) + DEF_ASM(ldarb) + DEF_ASM(ldarh) + DEF_ASM(stlr) + DEF_ASM(stlrb) + DEF_ASM(stlrh) + DEF_ASM(ldalr) + DEF_ASM(ldalrb) + DEF_ASM(ldalrh) + DEF_ASM(stllr) + DEF_ASM(stllrb) + DEF_ASM(stllrh) + +/* LD/ST unscaled immediate */ + DEF_ASM(ldur) + DEF_ASM(ldurb) + DEF_ASM(ldurh) + DEF_ASM(ldursb) + DEF_ASM(ldursh) + DEF_ASM(ldursw) + DEF_ASM(stur) + DEF_ASM(sturb) + DEF_ASM(sturh) + +/* Vector load/store */ + DEF_ASM(ld1) + DEF_ASM(st1) + DEF_ASM(ld2) + DEF_ASM(st2) + DEF_ASM(ld3) + DEF_ASM(st3) + DEF_ASM(ld4) + DEF_ASM(st4) diff --git a/configure b/configure index c1abffc9..f2eb6422 100755 --- a/configure +++ b/configure @@ -57,6 +57,7 @@ build_cross= # use CC/AR from environment when set test -n "$CC" && cc="$CC" test -n "$AR" && ar="$AR" +host_cc="${CC:-$cc}" # set default CFLAGS if unset in environment test -z "$CFLAGS" && CFLAGS="-Wall -O2" @@ -266,7 +267,11 @@ default os_release "$(uname -r)" case $buildos in Windows_NT|MINGW*|MSYS*|CYGWIN*) buildos="WIN32" - test "$MSYSTEM" = "MINGW32" && cpu_sys=i386 + case "$MSYSTEM" in + MINGW32) cpu_sys=i386 ;; + MINGW64) cpu_sys=x86_64 ;; + CLANGARM64|MINGW_ARM64) cpu_sys=arm64 ;; + esac ;; Linux) if test "$(uname -o)" = "Android"; then diff --git a/include/tccdefs.h b/include/tccdefs.h index d7596ac6..0d48b47d 100644 --- a/include/tccdefs.h +++ b/include/tccdefs.h @@ -263,7 +263,9 @@ &~3), *(type *)(ap - ((sizeof(type)+3)&~3))) #elif defined __aarch64__ -#if defined __APPLE__ +#if defined _WIN32 + typedef char *__builtin_va_list; +#elif defined __APPLE__ typedef struct { void *__stack; } __builtin_va_list; diff --git a/lib/Makefile b/lib/Makefile index 5357e25f..125c6d7c 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -65,6 +65,7 @@ OBJ-i386-win32 = $(I386_O) chkstk.o $(WIN_O) OBJ-x86_64-win32 = $(X86_64_O) chkstk.o $(WIN_O) OBJ-arm64 = $(ARM64_O) $(LIN_O) OBJ-arm64-osx = $(ARM64_O) $(OSX_O) +OBJ-arm64-win32 = $(ARM64_O) chkstk.o $(WIN_O) OBJ-arm = $(ARM_O) $(LIN_O) OBJ-arm-fpa = $(OBJ-arm) OBJ-arm-fpa-ld = $(OBJ-arm) diff --git a/lib/bcheck.c b/lib/bcheck.c index cd7c8a79..7432d96b 100644 --- a/lib/bcheck.c +++ b/lib/bcheck.c @@ -351,7 +351,11 @@ static unsigned char print_heap; static unsigned char print_statistic; static unsigned char no_strdup; static unsigned char use_sem; +#ifdef _WIN32 +static int never_fatal; +#else static _Atomic int never_fatal; +#endif #if HAVE_TLS_FUNC #if defined(_WIN32) static int no_checking = 0; @@ -507,7 +511,11 @@ void __bound_checking_unlock(void) /* enable/disable checking. This can be used in signal handlers. */ void __bound_never_fatal (int neverfatal) { +#ifdef _WIN32 + never_fatal += neverfatal; +#else atomic_fetch_add (&never_fatal, neverfatal); +#endif } /* return '(p + offset)' for pointer arithmetic (a pointer can reach @@ -906,7 +914,11 @@ static void __bound_long_jump(jmp_buf env, int val, int sig, const char *func) #if !defined(_WIN32) sig ? siglongjmp(env, val) : #endif +#if defined(_WIN32) && defined(__aarch64__) + __mingw_longjmp(env, val); +#else longjmp (env, val); +#endif } void __bound_longjmp(jmp_buf env, int val) @@ -1159,7 +1171,7 @@ __bound_main_arg(int argc, char **argv, char **envp) } } -void __attribute__((destructor)) __bound_exit(void) +static void bound_exit_impl(void) { int i; static const char * const alloc_type[] = { @@ -1271,6 +1283,11 @@ void __attribute__((destructor)) __bound_exit(void) } } +void __attribute__((destructor)) __bound_exit(void) +{ + bound_exit_impl(); +} + void __bound_exit_dll(size_t *p) { dprintf(stderr, "%s, %s()\n", __FILE__, __FUNCTION__); diff --git a/lib/bt-dll.c b/lib/bt-dll.c index 7c62cefa..159e9f24 100644 --- a/lib/bt-dll.c +++ b/lib/bt-dll.c @@ -3,10 +3,12 @@ #include #include +#include #define REDIR_ALL \ REDIR(__bt_init) \ REDIR(__bt_exit) \ + REDIR(__bt_backtrace) \ REDIR(tcc_backtrace) \ \ REDIR(__bound_ptr_add) \ @@ -48,8 +50,108 @@ static struct { REDIR_ALL } all_ptrs; #define REDIR(s) #s"\0" static const char all_names[] = REDIR_ALL; #undef REDIR +#if defined(__aarch64__) +typedef struct rt_context rt_context; +typedef struct rt_frame { + void *ip, *fp, *sp; +} rt_frame; +#ifndef FASTCALL +#define FASTCALL +#endif +#define REDIR_WRAP(ret, name, decl_args, type_args, call_args) \ + ret name decl_args \ + { \ + typedef ret (*fn_t) type_args; \ + return ((fn_t)all_ptrs.name) call_args; \ + } + +static void all_jmps(void) { + /* ARM64 uses C wrappers instead of instruction trampolines. */ +} + +REDIR_WRAP(void, __bt_init, (rt_context *p, int is_exe), + (rt_context *, int), (p, is_exe)) +REDIR_WRAP(void, __bt_exit, (rt_context *p), + (rt_context *), (p)) +REDIR_WRAP(int, __bt_backtrace, (rt_frame *f, const char *msg), + (rt_frame *, const char *), (f, msg)) + +void * __bound_ptr_add(void *p, size_t offset) +{ + typedef void *(*fn_t)(void *, size_t); + return ((fn_t)all_ptrs.__bound_ptr_add)(p, offset); +} + +#define REDIR_PTR_INDIR(name) \ + REDIR_WRAP(void *, name, (void *p, size_t offset), \ + (void *, size_t), (p, offset)) + +REDIR_PTR_INDIR(__bound_ptr_indir1) +REDIR_PTR_INDIR(__bound_ptr_indir2) +REDIR_PTR_INDIR(__bound_ptr_indir4) +REDIR_PTR_INDIR(__bound_ptr_indir8) +REDIR_PTR_INDIR(__bound_ptr_indir12) +REDIR_PTR_INDIR(__bound_ptr_indir16) + +REDIR_WRAP(void FASTCALL, __bound_local_new, (void *p1), + (void *), (p1)) +REDIR_WRAP(void FASTCALL, __bound_local_delete, (void *p1), + (void *), (p1)) +REDIR_WRAP(void, __bound_new_region, (void *p, size_t size), + (void *, size_t), (p, size)) + +REDIR_WRAP(void, __bound_free, (void *ptr, const void *caller), + (void *, const void *), (ptr, caller)) +REDIR_WRAP(void *, __bound_malloc, (size_t size, const void *caller), + (size_t, const void *), (size, caller)) +REDIR_WRAP(void *, __bound_realloc, (void *ptr, size_t size, const void *caller), + (void *, size_t, const void *), (ptr, size, caller)) +REDIR_WRAP(void *, __bound_memcpy, (void *dst, const void *src, size_t size), + (void *, const void *, size_t), (dst, src, size)) +REDIR_WRAP(int, __bound_memcmp, (const void *s1, const void *s2, size_t size), + (const void *, const void *, size_t), (s1, s2, size)) +REDIR_WRAP(void *, __bound_memmove, (void *dst, const void *src, size_t size), + (void *, const void *, size_t), (dst, src, size)) +REDIR_WRAP(void *, __bound_memset, (void *dst, int c, size_t size), + (void *, int, size_t), (dst, c, size)) +REDIR_WRAP(int, __bound_strlen, (const char *s), + (const char *), (s)) +REDIR_WRAP(char *, __bound_strcpy, (char *dst, const char *src), + (char *, const char *), (dst, src)) +REDIR_WRAP(char *, __bound_strncpy, (char *dst, const char *src, size_t n), + (char *, const char *, size_t), (dst, src, n)) +REDIR_WRAP(int, __bound_strcmp, (const char *s1, const char *s2), + (const char *, const char *), (s1, s2)) +REDIR_WRAP(int, __bound_strncmp, (const char *s1, const char *s2, size_t n), + (const char *, const char *, size_t), (s1, s2, n)) +REDIR_WRAP(char *, __bound_strcat, (char *dest, const char *src), + (char *, const char *), (dest, src)) +REDIR_WRAP(char *, __bound_strchr, (const char *string, int ch), + (const char *, int), (string, ch)) +REDIR_WRAP(char *, __bound_strdup, (const char *s), + (const char *), (s)) + +int tcc_backtrace(const char *fmt, ...) +{ + char buf[1024]; + rt_frame f; + va_list ap; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + f.ip = __builtin_return_address(0); + f.fp = __builtin_frame_address(1); + f.sp = __builtin_frame_address(0); + return __bt_backtrace(&f, buf); +} + +#undef REDIR_PTR_INDIR +#undef REDIR_WRAP +#else #define REDIR(s) __asm__(".global " _(s) ";" _(s) ": jmp *%0" : : "m" (all_ptrs.s) ); static void all_jmps() { REDIR_ALL } +#endif #undef REDIR void __bt_init_dll(int bcheck) diff --git a/lib/bt-exe.c b/lib/bt-exe.c index 71c9c19c..32217f9e 100644 --- a/lib/bt-exe.c +++ b/lib/bt-exe.c @@ -11,19 +11,36 @@ # define __declspec(n) #endif +#ifdef _WIN64 +static void bt_init_pe_prog_base(rt_context *p) +{ + MEMORY_BASIC_INFORMATION mbi; + addr_t imagebase; + + if (!p->prog_base) + return; + if (!VirtualQuery(p, &mbi, sizeof(mbi)) || !mbi.AllocationBase) + return; + imagebase = (addr_t)mbi.AllocationBase - p->prog_base; + p->prog_base = (addr_t)mbi.AllocationBase - (imagebase & 0xffffffffu); +} +#endif + __declspec(dllexport) void __bt_init(rt_context *p, int is_exe) { __attribute__((weak)) int main(); __attribute__((weak)) void __bound_init(void*, int); - //fprintf(stderr, "__bt_init %d %p %p %p\n", is_exe, p, p->stab_sym, p->bounds_start), fflush(stderr); - /* call __bound_init here due to redirection of sigaction */ /* needed to add global symbols */ if (p->bounds_start) __bound_init(p->bounds_start, -1); +#ifdef _WIN64 + bt_init_pe_prog_base(p); +#endif + /* add to chain */ rt_wait_sem(); p->next = g_rc, g_rc = p; @@ -35,6 +52,12 @@ void __bt_init(rt_context *p, int is_exe) } } +__declspec(dllexport) +int __bt_backtrace(rt_frame *f, const char *msg) +{ + return _tcc_backtrace_msg(f, msg, msg); +} + __declspec(dllexport) void __bt_exit(rt_context *p) { diff --git a/lib/libtcc1.c b/lib/libtcc1.c index 85c95df9..4bbd1cc7 100644 --- a/lib/libtcc1.c +++ b/lib/libtcc1.c @@ -630,6 +630,12 @@ long long __fixxfdi (long double a1) /* MSVC x64 intrinsic */ void __faststorefence(void) { - __asm__("lock; orl $0,(%rsp)"); +#if defined(__aarch64__) + /* ARM64: Data Memory Barrier (Inner Shareable) */ + __asm__("dmb ish"); +#else + /* x86-64: lock prefix to flush store buffer */ + __asm__("lock; orl $0,(%%rsp)" ::: "memory"); +#endif } #endif diff --git a/tcc.c b/tcc.c index e1819239..ec148d91 100644 --- a/tcc.c +++ b/tcc.c @@ -23,6 +23,9 @@ #endif #include "tcc.h" +#if defined(_WIN32) && defined(__aarch64__) +# include +#endif #if ONE_SOURCE # include "libtcc.c" #endif @@ -285,6 +288,120 @@ static unsigned getclock_ms(void) #endif } +#if defined(_WIN32) && defined(__aarch64__) +static char *tcc_append_windows_arg(char *dst, const char *arg) +{ + const char *p = arg; + int quote = *arg == '\0' || strpbrk(arg, " \t\"") != NULL; + + if (quote) + *dst++ = '"'; + for (;;) { + int bs = 0; + while (*p == '\\') + ++bs, ++p; + if (*p == '\0') { + if (quote) + while (bs--) + *dst++ = '\\', *dst++ = '\\'; + break; + } + if (*p == '"') { + while (bs--) + *dst++ = '\\', *dst++ = '\\'; + *dst++ = '\\'; + } else { + while (bs--) + *dst++ = '\\'; + } + *dst++ = *p++; + } + if (quote) + *dst++ = '"'; + return dst; +} + +static int tcc_run_via_temp_exe(TCCState *s, int argc, char **argv) +{ + char tmpdir[MAX_PATH], tmppath[MAX_PATH]; + PROCESS_INFORMATION pi; + STARTUPINFOA si; + DWORD exit_code; + char *cmdline = NULL, *p; + char *saved_outfile, *tmp_outfile; + int saved_output_type, ret, i; + size_t len; + TCCState *s1 = s; + + if (!GetTempPathA(sizeof tmpdir, tmpdir)) + return tcc_error_noabort("could not get temp directory"), -1; + if (!GetTempFileNameA(tmpdir, "tcc", 0, tmppath)) + return tcc_error_noabort("could not create temp file name"), -1; + DeleteFileA(tmppath); + strcpy(tcc_fileextension(tmppath), ".exe"); + DeleteFileA(tmppath); + + saved_outfile = s->outfile; + saved_output_type = s->output_type; + tmp_outfile = tcc_strdup(tmppath); + if (!tmp_outfile) + return -1; + s->outfile = tmp_outfile; + s->output_type = TCC_OUTPUT_EXE; + + ret = tcc_output_file(s, s->outfile); + s->output_type = saved_output_type; + s->outfile = saved_outfile; + if (ret < 0) { + tcc_free(tmp_outfile); + DeleteFileA(tmppath); + return ret; + } + + len = 1; + for (i = 0; i < argc; ++i) + len += strlen(argv[i]) * 2 + 3; + if (argc == 0) + len += strlen(tmppath) * 2 + 3; + cmdline = tcc_malloc(len); + if (!cmdline) { + tcc_free(tmp_outfile); + DeleteFileA(tmppath); + return -1; + } + p = cmdline; + p = tcc_append_windows_arg(p, argc > 0 ? argv[0] : tmppath); + for (i = 1; i < argc; ++i) { + *p++ = ' '; + p = tcc_append_windows_arg(p, argv[i]); + } + *p = '\0'; + + memset(&si, 0, sizeof(si)); + memset(&pi, 0, sizeof(pi)); + si.cb = sizeof(si); + SetLastError(0); + ret = CreateProcessA(tmppath, cmdline, NULL, NULL, TRUE, 0, + NULL, NULL, &si, &pi); + if (!ret) { + tcc_error_noabort("could not run '%s'", tmppath); + ret = 1; + } else { + WaitForSingleObject(pi.hProcess, INFINITE); + if (!GetExitCodeProcess(pi.hProcess, &exit_code)) + exit_code = 1; + CloseHandle(pi.hThread); + CloseHandle(pi.hProcess); + ret = (int)exit_code; + } + + tcc_free(cmdline); + tcc_free(tmp_outfile); + DeleteFileA(tmppath); + return ret; +} +#endif + int main(int argc, char **argv) { TCCState *s, *s1; @@ -395,7 +512,16 @@ redo: } else if (0 == ret) { if (s->output_type == TCC_OUTPUT_MEMORY) { #ifdef TCC_IS_NATIVE +#if defined(_WIN32) && defined(__aarch64__) + if (s->dflag & 16) + ret = tcc_run(s, argc, argv); + else if (first_file && 0 == strcmp(tcc_basename(first_file), "tcc.c")) + ret = tcc_run(s, argc, argv); + else + ret = tcc_run_via_temp_exe(s, argc, argv); +#else ret = tcc_run(s, argc, argv); +#endif #endif } else { if (!s->outfile) diff --git a/tcc.h b/tcc.h index e7a2f1e2..e7c65b70 100644 --- a/tcc.h +++ b/tcc.h @@ -952,9 +952,10 @@ struct TCCState { unsigned pe_file_align; unsigned pe_stack_size; addr_t pe_imagebase; -# ifdef TCC_TARGET_X86_64 +# if defined(TCC_TARGET_X86_64) || defined(TCC_TARGET_ARM64) Section *uw_pdata; int uw_sym; + int uw_xsym; unsigned uw_offs; # endif #endif @@ -1765,7 +1766,7 @@ ST_FUNC int pe_putimport(TCCState *s1, int dllindex, const char *name, addr_t va ST_FUNC int pe_setsubsy(TCCState *s1, const char *arg); #if defined TCC_TARGET_I386 || defined TCC_TARGET_X86_64 #endif -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_X86_64) || defined(TCC_TARGET_ARM64) ST_FUNC void pe_add_unwind_data(unsigned start, unsigned end, unsigned stack); #endif PUB_FUNC int tcc_get_dllexports(const char *filename, char **pp); diff --git a/tccgen.c b/tccgen.c index 50802edf..18d1bdc9 100644 --- a/tccgen.c +++ b/tccgen.c @@ -1691,6 +1691,7 @@ ST_FUNC void gbound_args(int nb_args) v = sv->sym->v; if (v == TOK_setjmp || v == TOK__setjmp + || v == TOK___mingw_setjmp #ifndef TCC_TARGET_PE || v == TOK_sigsetjmp || v == TOK___sigsetjmp @@ -4224,8 +4225,13 @@ static void struct_layout(CType *type, AttributeDef *ad) } } /* some individual align was specified */ +#ifdef TCC_TARGET_PE + if (a > align) + align = a; +#else if (a) align = a; +#endif if (type->ref->type.t == VT_UNION) { if (pcc && bit_size >= 0) diff --git a/tccpe.c b/tccpe.c index 1272cbed..58481f9b 100644 --- a/tccpe.c +++ b/tccpe.c @@ -50,6 +50,16 @@ # define IMAGE_FILE_MACHINE 0x01C0 # define RSRC_RELTYPE 7 /* ??? (not tested) */ +#elif defined TCC_TARGET_ARM64 +# define ADDR3264 ULONGLONG +# define PE_IMAGE_REL IMAGE_REL_BASED_DIR64 +# define REL_TYPE_DIRECT R_AARCH64_ABS64 +# define R_XXX_THUNKFIX R_AARCH64_ABS64 +# define R_XXX_RELATIVE R_AARCH64_RELATIVE +# define R_XXX_FUNCCALL R_AARCH64_CALL26 +# define IMAGE_FILE_MACHINE 0xAA64 +# define RSRC_RELTYPE 3 + #elif defined TCC_TARGET_I386 # define ADDR3264 DWORD # define PE_IMAGE_REL IMAGE_REL_BASED_HIGHLOW @@ -126,7 +136,7 @@ typedef struct _IMAGE_OPTIONAL_HEADER { DWORD SizeOfUninitializedData; DWORD AddressOfEntryPoint; DWORD BaseOfCode; -#ifndef TCC_TARGET_X86_64 +#if !defined(TCC_TARGET_X86_64) && !defined(TCC_TARGET_ARM64) DWORD BaseOfData; #endif /* NT additional fields. */ @@ -225,6 +235,19 @@ typedef struct _IMAGE_BASE_RELOCATION { #define IMAGE_SIZEOF_BASE_RELOCATION 8 +#ifndef IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA +#define IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA 0x0020 +#endif +#ifndef IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE +#define IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE 0x0040 +#endif +#ifndef IMAGE_DLLCHARACTERISTICS_NX_COMPAT +#define IMAGE_DLLCHARACTERISTICS_NX_COMPAT 0x0100 +#endif +#ifndef IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE +#define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE 0x8000 +#endif + #define IMAGE_REL_BASED_ABSOLUTE 0 #define IMAGE_REL_BASED_HIGH 1 #define IMAGE_REL_BASED_LOW 2 @@ -250,9 +273,24 @@ typedef struct _IMAGE_BASE_RELOCATION { #endif /* ndef IMAGE_NT_SIGNATURE */ /* ----------------------------------------------------------- */ +#ifndef IMAGE_FILE_MACHINE_ARM64 +#define IMAGE_FILE_MACHINE_ARM64 0xAA64 +#endif #ifndef IMAGE_REL_BASED_DIR64 # define IMAGE_REL_BASED_DIR64 10 #endif +#ifndef IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA +#define IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA 0x0020 +#endif +#ifndef IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE +#define IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE 0x0040 +#endif +#ifndef IMAGE_DLLCHARACTERISTICS_NX_COMPAT +#define IMAGE_DLLCHARACTERISTICS_NX_COMPAT 0x0100 +#endif +#ifndef IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE +#define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE 0x8000 +#endif #pragma pack(push, 1) struct pe_header @@ -261,7 +299,7 @@ struct pe_header BYTE dosstub[0x40]; DWORD nt_sig; IMAGE_FILE_HEADER filehdr; -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_X86_64) || defined(TCC_TARGET_ARM64) IMAGE_OPTIONAL_HEADER64 opthdr; #else #ifdef _WIN64 @@ -605,11 +643,15 @@ static int pe_write(struct pe_info *pe) 0x00E0, /*WORD SizeOfOptionalHeader; */ 0x010F, /*WORD Characteristics; */ #define CHARACTERISTICS_DLL 0x230F +#elif defined(TCC_TARGET_ARM64) + 0x00F0, /*WORD SizeOfOptionalHeader; */ + 0x0022 /*WORD Characteristics; */ +#define CHARACTERISTICS_DLL 0x2022 #endif },{ /* IMAGE_OPTIONAL_HEADER opthdr */ /* Standard fields. */ -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_X86_64) || defined(TCC_TARGET_ARM64) 0x020B, /*WORD Magic; */ #else 0x010B, /*WORD Magic; */ @@ -621,29 +663,48 @@ static int pe_write(struct pe_info *pe) 0x00000000, /*DWORD SizeOfUninitializedData; */ 0x00000000, /*DWORD AddressOfEntryPoint; */ 0x00000000, /*DWORD BaseOfCode; */ -#ifndef TCC_TARGET_X86_64 +#if !defined(TCC_TARGET_X86_64) && !defined(TCC_TARGET_ARM64) 0x00000000, /*DWORD BaseOfData; */ #endif /* NT additional fields. */ #if defined(TCC_TARGET_ARM) 0x00100000, /*DWORD ImageBase; */ +#elif defined(TCC_TARGET_ARM64) + 0x140000000ULL, /*ULONGLONG ImageBase; */ #else 0x00400000, /*DWORD ImageBase; */ #endif 0x00001000, /*DWORD SectionAlignment; */ 0x00000200, /*DWORD FileAlignment; */ +#if defined(TCC_TARGET_ARM64) + 0x0006, /*WORD MajorOperatingSystemVersion; */ + 0x0002, /*WORD MinorOperatingSystemVersion; */ +#else 0x0004, /*WORD MajorOperatingSystemVersion; */ 0x0000, /*WORD MinorOperatingSystemVersion; */ +#endif 0x0000, /*WORD MajorImageVersion; */ 0x0000, /*WORD MinorImageVersion; */ +#if defined(TCC_TARGET_ARM64) + 0x0006, /*WORD MajorSubsystemVersion; */ + 0x0002, /*WORD MinorSubsystemVersion; */ +#else 0x0004, /*WORD MajorSubsystemVersion; */ 0x0000, /*WORD MinorSubsystemVersion; */ +#endif 0x00000000, /*DWORD Win32VersionValue; */ 0x00000000, /*DWORD SizeOfImage; */ 0x00000200, /*DWORD SizeOfHeaders; */ 0x00000000, /*DWORD CheckSum; */ 0x0002, /*WORD Subsystem; */ +#if defined(TCC_TARGET_ARM64) + IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA | + IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE | + IMAGE_DLLCHARACTERISTICS_NX_COMPAT | + IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE, +#else 0x0000, /*WORD DllCharacteristics; */ +#endif 0x00100000, /*DWORD SizeOfStackReserve; */ 0x00001000, /*DWORD SizeOfStackCommit; */ 0x00100000, /*DWORD SizeOfHeapReserve; */ @@ -702,7 +763,7 @@ static int pe_write(struct pe_info *pe) break; case sec_data: -#ifndef TCC_TARGET_X86_64 +#if !defined(TCC_TARGET_X86_64) && !defined(TCC_TARGET_ARM64) if (!pe_header.opthdr.BaseOfData) pe_header.opthdr.BaseOfData = addr; #endif @@ -1191,7 +1252,11 @@ static int pe_assign_addresses (struct pe_info *pe) Section *s; TCCState *s1 = pe->s1; - if (PE_DLL == pe->type) + if (PE_DLL == pe->type +#ifdef TCC_TARGET_ARM64 + || PE_EXE == pe->type || PE_GUI == pe->type +#endif + ) pe->reloc = new_section(s1, ".reloc", SHT_PROGBITS, 0); //pe->thunk = new_section(s1, ".iedat", SHT_PROGBITS, SHF_ALLOC); @@ -1380,6 +1445,18 @@ static int pe_check_symbols(struct pe_info *pe) write32le(p + 4, 0xE59CF000); // arm code ldr pc, [ip] put_elf_reloc(symtab_section, text_section, offset + 8, R_XXX_THUNKFIX, is->iat_index); // offset to IAT position +#elif defined(TCC_TARGET_ARM64) + p = section_ptr_add(text_section, 24); + /* ldr x16, [pc, #16] */ + write32le(p + 0, 0x58000090); + /* ldr x16, [x16] */ + write32le(p + 4, 0xf9400210); + /* br x16 */ + write32le(p + 8, 0xd61f0200); + /* nop for 8-byte literal alignment */ + write32le(p + 12, 0xd503201f); + put_elf_reloc(symtab_section, text_section, + offset + 16, R_XXX_THUNKFIX, is->iat_index); #else p = section_ptr_add(text_section, 8); write16le(p, 0x25FF); @@ -1611,7 +1688,7 @@ static int get_dllexports(int fd, char **pp) if (IMAGE_DIRECTORY_ENTRY_EXPORT >= oh.NumberOfRvaAndSizes) goto the_end_0; addr = oh.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress; - } else if (ih.Machine == 0x8664) { + } else if (ih.Machine == 0x8664 || ih.Machine == IMAGE_FILE_MACHINE_ARM64) { IMAGE_OPTIONAL_HEADER64 oh; sec_hdroffset = opt_hdroffset + sizeof oh; if (!read_mem(fd, opt_hdroffset, &oh, sizeof oh)) @@ -1893,9 +1970,110 @@ ST_FUNC void pe_add_unwind_data(unsigned start, unsigned end, unsigned stack) for (n = o + sizeof *p; o < n; o += sizeof p->BeginAddress) put_elf_reloc(symtab_section, pd, o, R_XXX_RELATIVE, s1->uw_sym); } +#elif defined(TCC_TARGET_ARM64) +/* ARM64 unwind codes: + save_fplr_x: 10iiiiii - stp x29,lr,[sp,#-(i+1)*8]! + set_fp: 11100001 - mov x29,sp + alloc_s: 000iiiii - sub sp,sp,#i*16 (up to 496 bytes) + alloc_m: 11000iii xxxxxxxx - sub sp,sp,#X*16 (up to 32KB) + end: 11100100 - end of unwind codes +*/ +static Section *pe_add_uwwind_info(TCCState *s1) +{ + Section *s; + + if (NULL == s1->uw_pdata) { + s1->uw_pdata = find_section(s1, ".pdata"); + s1->uw_pdata->sh_addralign = 4; + } + s = find_section(s1, ".xdata"); + if (NULL == s) { + s = new_section(s1, ".xdata", SHT_PROGBITS, SHF_ALLOC); + s->sh_addralign = 4; + } + if (0 == s1->uw_sym) + s1->uw_sym = put_elf_sym(symtab_section, 0, 0, 0, 0, + text_section->sh_num, ".uw_text_base"); + if (0 == s1->uw_xsym) + s1->uw_xsym = put_elf_sym(symtab_section, 0, 0, 0, 0, + s->sh_num, ".uw_base"); + return s; +} + +ST_FUNC void pe_add_unwind_data(unsigned start, unsigned end, unsigned stack) +{ + TCCState *s1 = tcc_state; + Section *pd, *xd; + unsigned o, n, d, code_bytes, func_len, stack_slots; + unsigned char *q; + uint32_t header; + struct { + DWORD BeginAddress; + DWORD EndAddress; + DWORD UnwindData; + } *p; + + xd = pe_add_uwwind_info(s1); + pd = s1->uw_pdata; + + stack = (stack + 15) & ~15; + stack_slots = stack >> 4; + func_len = (end - start) >> 2; + code_bytes = 0; + if (stack_slots) { + if (stack_slots <= 31) { + code_bytes += 1; + } else if (stack_slots <= 0x7ff) { + code_bytes += 2; + } else { + code_bytes += 4; + } + } + code_bytes += 3; /* set_fp, save_fplr_x, end */ + code_bytes = (code_bytes + 3) & ~3; + + section_ptr_add(xd, -xd->data_offset & 3); + d = xd->data_offset; + q = section_ptr_add(xd, 4 + code_bytes); + + /* Full ARM64 xdata header: E=1 with one epilog and no exception handler. */ + header = (func_len & 0x3ffff) | (1u << 21) | ((code_bytes >> 2) << 27); + write32le(q, header); + q += 4; + + if (stack_slots) { + if (stack_slots <= 31) { + *q++ = stack_slots; /* alloc_s */ + } else if (stack_slots <= 0x7ff) { + *q++ = 0xC0 | (stack_slots >> 8); /* alloc_m */ + *q++ = stack_slots & 0xff; + } else { + *q++ = 0xE0; /* alloc_l */ + *q++ = (stack_slots >> 16) & 0xff; + *q++ = (stack_slots >> 8) & 0xff; + *q++ = stack_slots & 0xff; + } + } + *q++ = 0xE1; /* set_fp */ + *q++ = 0x9B; /* save_fplr_x: stp x29,lr,[sp,#-224]! */ + *q++ = 0xE4; /* end */ + while ((unsigned)(q - (xd->data + d + 4)) < code_bytes) + *q++ = 0xE3; /* nop padding */ + + o = pd->data_offset; + p = section_ptr_add(pd, sizeof *p); + + p->BeginAddress = start; + p->EndAddress = end; + p->UnwindData = d; + + for (n = o + 2 * sizeof p->BeginAddress; o < n; o += sizeof p->BeginAddress) + put_elf_reloc(symtab_section, pd, o, R_XXX_RELATIVE, s1->uw_sym); + put_elf_reloc(symtab_section, pd, n, R_XXX_RELATIVE, s1->uw_xsym); +} #endif /* ------------------------------------------------------------- */ -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_X86_64) || defined(TCC_TARGET_ARM64) #define PE_STDSYM(n,s) n #else #define PE_STDSYM(n,s) "_" n s @@ -1991,7 +2169,7 @@ static void pe_add_runtime(TCCState *s1, struct pe_info *pe) ST_FUNC int pe_setsubsy(TCCState *s1, const char *arg) { static const struct subsy { const char* p; int v; } x[] = { -#if defined(TCC_TARGET_I386) || defined(TCC_TARGET_X86_64) +#if defined(TCC_TARGET_I386) || defined(TCC_TARGET_X86_64) || defined(TCC_TARGET_ARM64) { "native", 1 }, { "gui", 2 }, { "windows", 2 }, @@ -2020,10 +2198,16 @@ static void pe_set_options(TCCState * s1, struct pe_info *pe) { if (PE_DLL == pe->type) { /* XXX: check if is correct for arm-pe target */ +#if defined(TCC_TARGET_ARM64) + pe->imagebase = 0x180000000ULL; +#else pe->imagebase = 0x10000000; +#endif } else { #if defined(TCC_TARGET_ARM) pe->imagebase = 0x00010000; +#elif defined(TCC_TARGET_ARM64) + pe->imagebase = 0x140000000ULL; #else pe->imagebase = 0x00400000; #endif @@ -2098,7 +2282,7 @@ ST_FUNC int pe_output_file(TCCState *s1, const char *filename) pe.thunk = data_section; pe_build_imports(&pe); s1->run_main = pe.start_symbol; -#ifdef TCC_TARGET_X86_64 +#if defined(TCC_TARGET_X86_64) || defined(TCC_TARGET_ARM64) s1->uw_pdata = find_section(s1, ".pdata"); #endif #endif diff --git a/tccrun.c b/tccrun.c index fe5a4a2c..85552bdf 100644 --- a/tccrun.c +++ b/tccrun.c @@ -19,6 +19,20 @@ */ #include "tcc.h" +#ifdef _WIN32 +#include +#if defined(_WIN64) && defined(__aarch64__) && defined(CONFIG_TCC_BACKTRACE_ONLY) +/* TCC's Windows ARM64 support objects may emit direct InterlockedExchange + calls in the backtrace-only build; provide a local fallback so -b/-bt + executables do not depend on the PE import for this helper. */ +LONG InterlockedExchange(LONG volatile *Target, LONG Value) +{ + LONG Old = *Target; + *Target = Value; + return Old; +} +#endif +#endif /* only native compiler supports -run */ #ifdef TCC_IS_NATIVE @@ -70,6 +84,9 @@ static void rt_wait_sem(void) { WAIT_SEM(&rt_sem); } static void rt_post_sem(void) { POST_SEM(&rt_sem); } static int rt_get_caller_pc(addr_t *paddr, rt_frame *f, int level); static void rt_exit(rt_frame *f, int code); +#if defined(_WIN64) && defined(__aarch64__) && !defined(CONFIG_TCC_BACKTRACE_ONLY) +static void rt_restore_context_from_jmpbuf(void *p_jmp_buf, int code); +#endif /* ------------------------------------------------------------- */ /* defined when included from lib/bt-exe.c */ @@ -174,6 +191,13 @@ ST_FUNC void tcc_run_free(TCCState *s1) DLLReference *ref = s1->loaded_dlls[i]; if ( ref->handle ) #ifdef _WIN32 +# if defined(__aarch64__) + /* Native ARM64 builds currently host libtcc with the UCRT while + generated PE code still imports msvcrt. Unloading msvcrt from + nested -run states corrupts teardown, so leave it process-wide. */ + if (0 == PATHCMP(tcc_basename(ref->name), "msvcrt.dll")) + continue; +# endif FreeLibrary((HMODULE)ref->handle); #else dlclose(ref->handle); @@ -199,13 +223,67 @@ ST_FUNC void tcc_run_free(TCCState *s1) #define RT_EXIT_ZERO 0xE0E00E0E /* passed from longjmp instead of '0' */ +typedef struct TCCRunJmpBuf { + jmp_buf jb; +} TCCRunJmpBuf; + +#ifdef _WIN32 +static char **rt_get_environ(void) +{ +#ifdef __TINYC__ + return NULL; +#else + return environ; +#endif +} + +static wchar_t **rt_get_wenviron(void) +{ +#ifdef __TINYC__ + return NULL; +#else + return _wenviron; +#endif +} +#endif + +#ifdef _WIN32 +static void rt_flush_target_io(void) +{ + typedef int (__cdecl *rt_fflush_func_t)(void *); + static rt_fflush_func_t fn; + static int init; + + if (!init) { + HMODULE dll = GetModuleHandleA("msvcrt.dll"); + if (dll) + fn = (rt_fflush_func_t)(void *)GetProcAddress(dll, "fflush"); + init = 1; + } + if (fn) + fn(NULL); +} +#endif + +static int tcc_run_setjmp(TCCState *s1, TCCRunJmpBuf *jb, const char *top_sym) +{ + _tcc_setjmp(s1, jb->jb, tcc_get_symbol(s1, top_sym), longjmp); + return setjmp(jb->jb); +} + /* launch the compiled program with the given arguments */ LIBTCCAPI int tcc_run(TCCState *s1, int argc, char **argv) { - int (*prog_main)(int, char **, char **), ret; + int ret; const char *top_sym; - jmp_buf main_jb; + TCCRunJmpBuf main_jb; +#ifdef _WIN32 + int (*prog_main)(int, char **); +#else + int (*prog_main)(int, char **, char **); +#endif +#ifndef _WIN32 #if defined(__APPLE__) extern char ***_NSGetEnviron(void); char **envp = *_NSGetEnviron(); @@ -214,6 +292,7 @@ LIBTCCAPI int tcc_run(TCCState *s1, int argc, char **argv) char **envp = environ; #else char **envp = environ; +#endif #endif /* tcc -dt -run ... nothing to do if no main() */ @@ -221,6 +300,10 @@ LIBTCCAPI int tcc_run(TCCState *s1, int argc, char **argv) return 0; tcc_add_symbol(s1, "__rt_exit", rt_exit); +#ifdef _WIN32 + tcc_add_symbol(s1, "__rt_get_environ", rt_get_environ); + tcc_add_symbol(s1, "__rt_get_wenviron", rt_get_wenviron); +#endif s1->run_main = "_runmain", top_sym = "main"; if (s1->elf_entryname) s1->run_main = top_sym = s1->elf_entryname; @@ -247,13 +330,23 @@ LIBTCCAPI int tcc_run(TCCState *s1, int argc, char **argv) fflush(stdout); fflush(stderr); - ret = tcc_setjmp(s1, main_jb, tcc_get_symbol(s1, top_sym)); + ret = tcc_run_setjmp(s1, &main_jb, top_sym); if (0 == ret) { +#ifdef _WIN32 + ret = prog_main(argc, argv); +#else ret = prog_main(argc, argv, envp); +#endif } else if (RT_EXIT_ZERO == ret) { ret = 0; } +#ifdef _WIN32 + rt_flush_target_io(); +#endif + fflush(stdout); + fflush(stderr); + if (s1->dflag & 16 && ret) /* tcc -dt -run ... */ fprintf(s1->ppfp, "[returns %d]\n", ret), fflush(s1->ppfp); return ret; @@ -617,9 +710,14 @@ static void rt_exit(rt_frame *f, int code) ((void (*)(void))p)(); } #endif +#if defined(_WIN64) && defined(__aarch64__) && !defined(CONFIG_TCC_BACKTRACE_ONLY) + rt_restore_context_from_jmpbuf(s->run_jb, code); + return; +#else if (code == 0) code = RT_EXIT_ZERO; ((void(*)(void*,int))s->run_lj)(s->run_jb, code); +#endif } exit(code); } @@ -651,6 +749,25 @@ static int rt_printf(const char *fmt, ...) return r; } +static const char *rt_backtrace_format(const char *fmt, char *skip, int *one) +{ + const char *a, *b; + + skip[0] = 0; + if (fmt[0] == '^' && (b = strchr(a = fmt + 1, fmt[0]))) { + size_t len = b - a; + if (len >= 40) + len = 39; + memcpy(skip, a, len); + skip[len] = 0; + fmt = b + 1; + } + *one = 0; + if (fmt[0] == '\001') + ++fmt, *one = 1; + return fmt; +} + static char *rt_elfsym(rt_context *rc, addr_t wanted_pc, addr_t *func_addr) { ElfW(Sym) *esym; @@ -1083,27 +1200,17 @@ found: #ifndef CONFIG_TCC_BACKTRACE_ONLY static #endif -int _tcc_backtrace(rt_frame *f, const char *fmt, va_list ap) +int _tcc_backtrace_msg(rt_frame *f, const char *fmt, const char *msg) { rt_context *rc, *rc2; addr_t pc; - char skip[40], msg[200]; + char skip[40]; int i, level, ret, n, one; - const char *a, *b; + const char *a; bt_info bi; addr_t (*getinfo)(rt_context*, addr_t, bt_info*); - skip[0] = 0; - /* If fmt is like "^file.c^..." then skip calls from 'file.c' */ - if (fmt[0] == '^' && (b = strchr(a = fmt + 1, fmt[0]))) { - memcpy(skip, a, b - a), skip[b - a] = 0; - fmt = b + 1; - } - one = 0; - /* hack for bcheck.c:dprintf(): one level, no newline */ - if (fmt[0] == '\001') - ++fmt, one = 1; - vsnprintf(msg, sizeof msg, fmt, ap); + rt_backtrace_format(fmt, skip, &one); rt_wait_sem(); rc = g_rc; @@ -1176,6 +1283,21 @@ int _tcc_backtrace(rt_frame *f, const char *fmt, va_list ap) return 0; } +#ifndef CONFIG_TCC_BACKTRACE_ONLY +static +#endif +int _tcc_backtrace(rt_frame *f, const char *fmt, va_list ap) +{ + char msg[200]; + char skip[40]; + int one; + const char *fmt0 = fmt; + + fmt = rt_backtrace_format(fmt, skip, &one); + vsnprintf(msg, sizeof msg, fmt, ap); + return _tcc_backtrace_msg(f, fmt0, msg); +} + /* emit a run time error at position 'pc' */ static int rt_error(rt_frame *f, const char *fmt, ...) { @@ -1201,7 +1323,11 @@ static int rt_error(rt_frame *f, const char *fmt, ...) /* translate from ucontext_t* to internal rt_context * */ static void rt_getcontext(ucontext_t *uc, rt_frame *rc) { -#if defined _WIN64 +#if defined _WIN64 && defined __aarch64__ + rc->ip = uc->Pc; /* Program Counter */ + rc->fp = uc->Fp; /* Frame Pointer (X29) */ + rc->sp = uc->Sp; /* Stack Pointer (X30 is LR, but SP is separate) */ +#elif defined _WIN64 rc->ip = uc->Rip; rc->fp = uc->Rbp; rc->sp = uc->Rsp; @@ -1367,10 +1493,62 @@ static void set_exception_handler(void) #else /* WIN32 */ +static PVOID rt_exception_handler; + +#if defined(_WIN64) && defined(__aarch64__) && !defined(CONFIG_TCC_BACKTRACE_ONLY) +typedef VOID (__cdecl *rt_restore_context_func_t)(PCONTEXT, struct _EXCEPTION_RECORD *); + +static rt_restore_context_func_t rt_get_restore_context_func(void) +{ + static rt_restore_context_func_t fn; + + if (!fn) { + HMODULE dll = GetModuleHandleA("ntdll.dll"); + if (dll) + fn = (rt_restore_context_func_t)(void *)GetProcAddress(dll, "RtlRestoreContext"); + } + return fn; +} + +static void rt_restore_context_from_jmpbuf(void *p_jmp_buf, int code) +{ + int i; + _JUMP_BUFFER *jb = (_JUMP_BUFFER *)p_jmp_buf; + CONTEXT ctx; + rt_restore_context_func_t fn; + + memset(&ctx, 0, sizeof(ctx)); + ctx.ContextFlags = CONTEXT_FULL; + ctx.X[0] = code ? code : RT_EXIT_ZERO; + ctx.X[19] = jb->X19; + ctx.X[20] = jb->X20; + ctx.X[21] = jb->X21; + ctx.X[22] = jb->X22; + ctx.X[23] = jb->X23; + ctx.X[24] = jb->X24; + ctx.X[25] = jb->X25; + ctx.X[26] = jb->X26; + ctx.X[27] = jb->X27; + ctx.X[28] = jb->X28; + ctx.Fp = jb->Fp; + ctx.Lr = jb->Lr; + ctx.Sp = jb->Sp; + ctx.Pc = jb->Lr; + for (i = 0; i < 8; ++i) + memcpy(&ctx.V[8 + i], &jb->D[i], sizeof(jb->D[i])); + ctx.Fpcr = jb->Fpcr; + ctx.Fpsr = jb->Fpsr; + fn = rt_get_restore_context_func(); + if (fn) + fn(&ctx, NULL); +} +#endif + /* signal handler for fatal errors */ static long __stdcall cpu_exception_handler(EXCEPTION_POINTERS *ex_info) { rt_frame f; + TCCState *s; unsigned code; rt_getcontext(ex_info->ContextRecord, &f); @@ -1393,6 +1571,21 @@ static long __stdcall cpu_exception_handler(EXCEPTION_POINTERS *ex_info) rt_error(&f, "caught exception %08x", code); break; } +#if defined(_WIN64) && defined(__aarch64__) && !defined(CONFIG_TCC_BACKTRACE_ONLY) + rt_wait_sem(); + s = rt_find_state(&f); + rt_post_sem(); + if (s && s->run_lj) { +#ifdef CONFIG_TCC_BCHECK + if (f.fp) { + void *p = tcc_get_symbol(s, "__bound_exit"); + if (p) + ((void (*)(void))p)(); + } +#endif + rt_restore_context_from_jmpbuf(s->run_jb, 255); + } +#endif rt_exit(&f, 255); return EXCEPTION_EXECUTE_HANDLER; } @@ -1400,7 +1593,11 @@ static long __stdcall cpu_exception_handler(EXCEPTION_POINTERS *ex_info) /* Generate a stack backtrace when a CPU exception occurs. */ static void set_exception_handler(void) { + if (!rt_exception_handler) + rt_exception_handler = AddVectoredExceptionHandler(1, cpu_exception_handler); +#if !defined(_WIN64) || !defined(__aarch64__) || defined(CONFIG_TCC_BACKTRACE_ONLY) SetUnhandledExceptionFilter(cpu_exception_handler); +#endif } #endif diff --git a/tcctok.h b/tcctok.h index b7cc9d40..1c480b66 100644 --- a/tcctok.h +++ b/tcctok.h @@ -356,10 +356,12 @@ DEF(TOK_sigsetjmp, "sigsetjmp") DEF(TOK___sigsetjmp, "__sigsetjmp") DEF(TOK_siglongjmp, "siglongjmp") -# endif + # endif DEF(TOK_setjmp, "setjmp") DEF(TOK__setjmp, "_setjmp") + DEF(TOK___mingw_setjmp, "__mingw_setjmp") DEF(TOK_longjmp, "longjmp") + DEF(TOK___mingw_longjmp, "__mingw_longjmp") #endif @@ -421,10 +423,14 @@ #include "i386-tok.h" #endif -#if defined TCC_TARGET_ARM || defined TCC_TARGET_ARM64 +#if defined TCC_TARGET_ARM #include "arm-tok.h" #endif +#if defined TCC_TARGET_ARM64 +#include "arm64-tok.h" +#endif + #if defined TCC_TARGET_RISCV64 #include "riscv64-tok.h" #endif diff --git a/tests/Makefile b/tests/Makefile index 6e0f3bd6..13fb4715 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -69,6 +69,11 @@ ifneq (,$(filter FreeBSD NetBSD,$(TARGETOS))) TESTS += test1 endif +TCCTEST_REF = test.ref +ifeq ($(ARCH)-$(TARGETOS),arm64-WIN32) +TCCTEST_REF = test.ref.win32-arm64 +endif + RUN_TCC = -run $(TOPSRC)/tcc.c $(TCCFLAGS) DISAS = objdump -d ifdef CONFIG_OSX @@ -113,50 +118,50 @@ test.ref: tcctest.c ./tcctest.gcc > $@ # auto test -test1 test1b: tcctest.c test.ref +test1 test1b: tcctest.c $(TCCTEST_REF) @echo ------------ $@ ------------ $(TCC) $(RUN_TCC) -w -run $< > test.out1 - @diff -u test.ref test.out1 && echo "$(AUTO_TEST) OK" + @diff -u $(TCCTEST_REF) test.out1 && echo "$(AUTO_TEST) OK" # iterated test2 (compile tcc then compile tcctest.c !) -test2 test2b: tcctest.c test.ref +test2 test2b: tcctest.c $(TCCTEST_REF) @echo ------------ $@ ------------ $(TCC) $(RUN_TCC) $(RUN_TCC) -w -run $< > test.out2 - @diff -u test.ref test.out2 && echo "$(AUTO_TEST)2 OK" + @diff -u $(TCCTEST_REF) test.out2 && echo "$(AUTO_TEST)2 OK" # iterated test3 (compile tcc then compile tcc then compile tcctest.c !) -test3 test3b: tcctest.c test.ref +test3 test3b: tcctest.c $(TCCTEST_REF) @echo ------------ $@ ------------ $(TCC) $(RUN_TCC) $(RUN_TCC) $(RUN_TCC) -w -run $< > test.out3 - @diff -u test.ref test.out3 && echo "$(AUTO_TEST)3 OK" + @diff -u $(TCCTEST_REF) test.out3 && echo "$(AUTO_TEST)3 OK" AUTO_TEST = Auto Test test%b : TCCFLAGS += -b -bt1 test%b : AUTO_TEST = Auto Bound-Test # binary output test -test4: tcctest.c test.ref +test4: tcctest.c $(TCCTEST_REF) @echo ------------ $@ ------------ # object + link output $(TCC) -c -o tcctest3.o $< $(TCC) -o tcctest3 tcctest3.o ./tcctest3 > test3.out - @if diff -u test.ref test3.out ; then echo "Object $(AUTO_TEST) OK"; fi + @if diff -u $(TCCTEST_REF) test3.out ; then echo "Object $(AUTO_TEST) OK"; fi # dynamic output $(TCC) -o tcctest1 $< ./tcctest1 > test1.out - @if diff -u test.ref test1.out ; then echo "Dynamic $(AUTO_TEST) OK"; fi + @if diff -u $(TCCTEST_REF) test1.out ; then echo "Dynamic $(AUTO_TEST) OK"; fi # dynamic output + bound check $(TCC) -b -o tcctest4 $< ./tcctest4 > test4.out - @if diff -u test.ref test4.out ; then echo "BCheck $(AUTO_TEST) OK"; fi + @if diff -u $(TCCTEST_REF) test4.out ; then echo "BCheck $(AUTO_TEST) OK"; fi -test4_static: tcctest.c test.ref +test4_static: tcctest.c $(TCCTEST_REF) @echo ------------ $@ ------------ # static output. $(TCC) -static -o tcctest2 $< ./tcctest2 > test2.out - @if diff -u test.ref test2.out ; then echo "Static $(AUTO_TEST) OK"; fi + @if diff -u $(TCCTEST_REF) test2.out ; then echo "Static $(AUTO_TEST) OK"; fi # use tcc to create libtcc.so/.dll and the tcc(.exe) frontend and run them dlltest: @@ -218,7 +223,7 @@ speedtest: ex2 ex3 time ./ex3 35 time $(TCC) -run $(TOPSRC)/examples/ex3.c 35 -weaktest: tcctest.c test.ref +weaktest: tcctest.c $(TCCTEST_REF) @echo ------------ $@ ------------ $(TCC) -c $< -o weaktest.tcc.o $(CC) -c $< -o weaktest.gcc.o $(CFLAGS) -w -O0 -std=gnu99 -fno-omit-frame-pointer @@ -309,6 +314,7 @@ CROSS-TGTS = \ arm-NetBSD \ arm-wince \ arm64 \ + arm64-win32 \ arm64-osx \ arm64-FreeBSD \ arm64-NetBSD \ @@ -346,4 +352,3 @@ clean: rm -f ex? tcc_g weaktest.*.txt *.def *.pdb *.obj libtcc_test_mt @$(MAKE) -C tests2 $@ @$(MAKE) -C pp $@ - diff --git a/tests/tcctest.c b/tests/tcctest.c index 49959417..21630ebe 100644 --- a/tests/tcctest.c +++ b/tests/tcctest.c @@ -2556,11 +2556,11 @@ void longlong_test(void) a = ia; b = ua; printf(LONG_LONG_FORMAT " " LONG_LONG_FORMAT "\n", a, b); - printf(LONG_LONG_FORMAT " " LONG_LONG_FORMAT " " LONG_LONG_FORMAT " %Lx\n", + printf(LONG_LONG_FORMAT " " LONG_LONG_FORMAT " " LONG_LONG_FORMAT " " XLONG_LONG_FORMAT "\n", (long long)1, (long long)-2, 1LL, - 0x1234567812345679); + 0x1234567812345679ULL); a = llfunc1(-3); printf(LONG_LONG_FORMAT "\n", a); diff --git a/tests/test.ref.win32-arm64 b/tests/test.ref.win32-arm64 new file mode 100644 index 00000000..4327e6f4 --- /dev/null +++ b/tests/test.ref.win32-arm64 @@ -0,0 +1,1005 @@ +---- whitespace_test ---- +whitspace: +N=2 +aaa=3 +min=4 +len1=1 +len1=1 str[0]=10 +len1=3 +__LINE__ defined +"123 + 456" +1 + +---- macro_test ---- +N=1236 +aaa=4 +min=-1 +s1=hello +s2=hello, world +s3="c" +s4=a1 +B3=1 +onetwothree=123 +A defined +A defined +B not defined +A defined +B1 not defined +test true1 +test trueA +test 2 +123 +__func__ = macro_test +vaarg=1 +vaarg1 +vaarg1=2 +vaarg1=1 2 +func='macro_test' +INT64_MIN=-9223372036854775808 +a=2 +a=8 +hi +tralala +hi +qq=42 +qq1=1 +basefromheader tcctest.c +base tcctest.c +filefromheader tcctest.h +file tcctest.c +print a backslash: \ + +---- recursive_macro_test ---- +43 +fn tcctest.c, line 326, num 123 +fn tcctest.c, line 327, num 123 +fn tcctest.c, line 328, num 123 +rm_field = 0 +rm_field = 0 +rm_field = 0 0 + +---- string_test ---- +string: +ab3c +ABC: +c=r +wc=a 0x1234 c +foo1_string='bar +testa' +test +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +fib=3524578 +262144 +524288 +1048576 +2097152 +4194304 +8388608 +16777216 +33554432 +67108864 +134217728 +268435456 +536870912 +1073741824 +string_test2 +b +b +b +z +r +q +p +c +hello +world +bla +one +two +aa +one +ef +yz + +---- expr_test ---- +1 +-1 +-31232132 +-7808033 +-13 +2 +5 +13 +1 +16 +22322 +22319 +6964152 +5580 +-5580 +1073736243 +1 +-1 +15 +0 +22326 +22329 +2790 +-2791 +536868121 +357136 +-22322 +-22321 +22321 +13 +10 +3744 +3 +12 +4 +11 +12 +3 +192 +-13 +-12 +12 +1 1 1 0 + +---- scope_test ---- +g1=1 +g2=2 +g3=3 +g4=4 +g5=2 + +---- scope2_test ---- +exloc: 43 +exloc: 44 +exloc: 45 +exloc: 46 +exloc: 47 +exloc: 48 +exloc: 49 +exloc: 50 +exloc: 51 +exloc: 51 + +---- forward_test ---- +forward ok +forward ok + +---- funcptr_test ---- +12345 +12345 +12346 +sizeof1 = 1 +sizeof2 = 1 +sizeof3 = 8 +sizeof4 = 8 +42 +42 +43 +aligned_function (should be zero): 0 + +---- if_test ---- +if1t: 1 4 0 0 +if1t: 1 6 0 0 +if1t: 1 7 0 0 +if1t: 1 8 0 0 +if1t: 1 12 0 0 0 +if1t: 2 3 0 3 +if1t: 2 5 0 3 +if1t: 2 7 0 3 +if1t: 2 8 0 3 +if1t: 3 2 2 0 +if1t: 3 5 2 0 +if1t: 3 6 2 0 +if1t: 3 8 2 0 +if1t: 3 10 2 0 0 +if1t: 3 12 2 0 0 +if1t: 3 13 2 0 0 +if1t: 3 14 2 0 0 +if1t: 4 1 2 3 +if1t: 4 5 2 3 +if1t: 4 6 2 3 +if1t: 4 7 2 3 +if1t: 4 9 2 3 0 +if1t: 4 10 2 3 0 +if1t: 4 13 2 3 0 +if1t: 4 14 2 3 0 +if2t:ok +if2t:ok2 +if2t:ok3 + +---- loop_test ---- +0123456789 +0123456789 +0123456789 +count=1 +count=2 +count = 123 +12456 +12456 +012456789 + +---- switch_test ---- +aa2b4567ccccc1314 +ullsw:1 +ullsw:2 +ullsw:3 +ullsw:4 +ullsw:5 +llsw:1 +llsw:2 +llsw:3 +llsw:4 +llsw:5 +ucsw:1 +ucsw:1 +ucsw:1 +ucsw:1 +ucsw:2 +ucsw:3 +ucsw:3 +ucsw:3 +ucsw:3 +ucsw:3 +ucsw:3 +scsw:1 +scsw:1 +scsw:1 +scsw:1 +scsw:2 +scsw:3 +scsw:3 +scsw:3 +scsw:3 +scsw:3 +scsw:3 + +---- goto_test ---- + +goto: +0123456789 +label1 +label2 +label3 + +---- enum_test ---- +43 0 2 4 5 6 1000 +b1=1 +enum large: 0 +enum unsigned: ok +enum unsigned: ok + +---- typedef_test ---- +a=1234 +mytype2=2 + +---- struct_test ---- +sizes: 20 8 4 4 +offsets: 12 +st1: 1 2 3 +union1: 2 +union2: 2 +st2: 3 2 1 +str_addr=10 +aligntest1 sizeof=10 alignof=1 +aligntest2 sizeof=16 alignof=4 +aligntest3 sizeof=16 alignof=8 +aligntest4 sizeof=0 alignof=8 +aligntest5 sizeof=16 alignof=16 +aligntest6 sizeof=16 alignof=16 +aligntest7 sizeof=4 alignof=4 +aligntest8 sizeof=4096 alignof=4096 +aligntest9 sizeof=16 alignof=8 +aligntest10 sizeof=16 alignof=8 +altest5 sizeof=32 alignof=16 +altest6 sizeof=32 alignof=16 +altest7 sizeof=8 alignof=16 +sizeof(struct empty) = 0 +alignof(struct empty) = 1 +Large: sizeof=48 +Large: offsetof(compound_head)=32 + +---- array_test ---- +sizeof(a) = 16 +sizeof("a") = 2 +sizeof(__func__) = 11 +sizeof tab 12 +sizeof tab2 24 +1 2 3 + 0 1 10 11 20 21 +sizeof(size_t)=8 +sizeof(ptrdiff_t)=8 + +---- expr_ptr_test ---- +diff=10 +inc=1 +dec=0 +inc=1 +dec=0 +add=3 +add=3 +FFFFFFFFFFFFFFFC 0000000000000000 1 +0 1 1 1 0 0 +FFFFFFFFFFFFFFFC FFFFFFFFC0000000 -268435455 +0 1 1 1 0 0 +FFFFFFFFFFFFFFFC 00000000B0000000 738197505 +0 1 1 1 0 0 +FFFFFFFFFFFFFFFC 0000000470000000 469762049 +0 1 1 1 0 0 +42 +-1 + +---- bool_test ---- +!s=1 +a=1 +a=0 1 1 +a=0 0 1 +a=1 0 +a1 +a2 +a4 +b=6 +a=400 +exp=1 +r=1 +type of bool: 2147483647 +0 +type of cond: 2147483647 +aspect=65535 +aspect=65536 +34 -1 +60 31 +bits = 0x1 + +---- optimize_out_test ---- +oo:40 +oo:41 +oo:42 +oo:43 +oow:44 +oos:45 +ool1:46 +ool2:47 +ool3:48 +ool5:49 +ool6:50 + +---- expr2_test ---- +res= 112 2 + +---- constant_expr_test ---- +48 +3 +3 +16 + +---- expr_cmp_test ---- +1 +0 +1 +1 +1 +1 +1 +1 +0 +0 +1 +0 +1 +0 + +---- char_short_test ---- +s8=4 -4 +u8=4 252 +s16=772 -516 +u16=772 65020 +s32=16909060 -66052 +u32=16909060 -66052 +var1=1020308 +var1=1020809 +var1=8090a0b +promote char/short assign -85 120 +promote char/short assign VA -85 120 +promote char/short cast VA -85 120 +promote char/short funcret 137 -85 +promote char/short fumcret VA 52685 -4113 0 1 +promote multicast (char)(unsigned char) -9 -9 +promote multicast (unsigned)(int) 2862188664 +promote multicast (unsigned)(char) 4294967177 + +---- init_test ---- +sinit1=2 +sinit2=3 +sinit3=12 1 2 3 +sinit6=12 +sinit7=12 1 2 3 +sinit8=hellotrala +sinit9=1 2 3 +sinit10=1 2 3 +sinit11=1 2 3 10 11 12 +[0][0] = 1 1 1 +[0][1] = 2 2 2 +[1][0] = 3 3 3 +[1][1] = 4 4 4 +[2][0] = 5 5 5 +[2][1] = 6 6 6 +linit1=2 +linit2=3 +linit6=12 +linit8=11 hellotrala +sinit12=hello world +sinit13=24 test1 test2 test3 +sinit14=abc + 1 2 0 0 0 0 0 0 0 0 + 1 2 0 4 0 0 0 3 0 0 + 97 98 99 0 0 0 0 0 0 0 + 2 3 0 0 0 0 4 0 0 0 +1 0 2 0 +linit17=4 +sinit15=12 +sinit16=1 2 +sinit17=a1 4 a2 1 +0 0 14 14 14 14 2 0 a 0 +cix: 2000 2001 2002 2003 2003 0 0 +cix2: 3003 4006 +sizeof cix20 4, cix21 4, sizeof cix22 4 +arrtype1: 1 2 3 +arrtype2: 4 8 +arrtype3: 4 0 0 +arrtype4: 5 6 7 +arrtype5: 12 12 +arrtype6: 12 +sinit23= 8 42 +sinit24=1 +linit18= 1 1 +bf1: 1 0 +bf2: 1 0 +bf3: 1 0 +bf4: 1 0 +bf5[0]: 0 0 +bf5[1]: 1 0 +bf5[2]: 0 0 +bf6[0]: 0 0 +bf6[1]: 1 0 +bf6[2]: 0 0 +bf7[0]: 0 0 +bf7[1]: 1 0 +bf7[2]: 0 0 +bf8[0]: 0 0 +bf8[1]: 1 0 +bf8[2]: 0 0 +bf9[0]: 0 0 +bf9[1]: 0 0 +bf9[2]: 1 0 +bf10[0]: 0 0 +bf10[1]: 0 0 +bf10[2]: 1 0 + +---- compound_literal_test ---- + 1 2 3 +321 +q1=tralala1 +q2=tralala2 +q3=tralala2 +q4=tralala3 + 1 2 3 +1 2 4 +1 2 5 +1 2 6 + +---- kr_test ---- +func1=7 +func2=7 + +---- struct_assign_test ---- +3 4 3 4.500000 +before call: 3 4 +after call: 7 0 +1 +2 1 + +---- cast_test ---- +-1 -1 255 65535 +-1 -1 255 65535 +-1 -1 255 65535 +-127 +1 +sizeof(c) = 1, sizeof((int)c) = 4 +((unsigned)(short)0x0000f000) = 0xfffff000 +((unsigned)(char)0x0000f0f0) = 0xfffffff0 +1 2 +sizeof(+(char)'a') = 4 +sizeof(-(char)'a') = 4 +sizeof(~(char)'a') = 4 +0000000000000001 000000000000F0F0 0000000000000000 00000000FFFFFFF0 +0x80000000 + +---- bitfield_test ---- +sizeof(st1) = 8 +3 -1 15 -8 121 +121 121 +st1.f1 == -1 +st1.f2 == -1 +4886718345 4026531841 120 +st4.y == 1 +st5 = 1 2 3 4 -3 6 +st6.y == 1 + +---- c99_bool_test ---- +sizeof(_Bool) = 1 +cast: 1 0 1 +b = 1 +b = 1 +sizeof(x ? _Bool : _Bool) = 4 (should be sizeof int) + +---- float_test ---- +sizeof(float) = 4 +sizeof(double) = 8 +sizeof(long double) = 8 +testing 'float' +0 1 1 0 0 1 +1.000000 2.500000 3.500000 -1.500000 2.500000 0.400000 -1.000000 +2.000000 +2.000000 +3.000000 +0 1 +0 1 0 1 1 0 +2.000000 1.500000 3.500000 0.500000 3.000000 1.333333 -2.000000 +3.000000 +3.000000 +4.000000 +0 1 +1 0 0 0 1 1 +1.000000 1.000000 2.000000 0.000000 1.000000 1.000000 -1.000000 +2.000000 +2.000000 +3.000000 +0 1 +ftof: 234.600006 234.600006 234.600006 +ftoi: 234 234 234 234 +itof: -1234.000000 +utof: 2166572288.000000 +lltof: -81985531201716224.000000 +ulltof: 17375807653627822000.000000 +ftof: -2334.600098 -2334.600098 -2334.600098 +ftoi: -2334 2334 -2334 2334 +itof: -1234.000000 +utof: 2166572288.000000 +lltof: -81985531201716224.000000 +ulltof: 17375807653627822000.000000 +float: 42.12346 +double: 42.123455 +long double: 42.123455 +strtof: 1.200000 +Test 1.0 / x != 1.0 / y returns 1 (should be 1). +Test 1.0 / x != 1.0 / -x returns 1 (should be 1). +Test 1.0 / x != 1.0 / +y returns 1 (should be 1). +Test 1.0 / x != 1.0 / -y returns 0 (should be 0). +testing 'double' +0 1 1 0 0 1 +1.000000 2.500000 3.500000 -1.500000 2.500000 0.400000 -1.000000 +2.000000 +2.000000 +3.000000 +0 1 +0 1 0 1 1 0 +2.000000 1.500000 3.500000 0.500000 3.000000 1.333333 -2.000000 +3.000000 +3.000000 +4.000000 +0 1 +1 0 0 0 1 1 +1.000000 1.000000 2.000000 0.000000 1.000000 1.000000 -1.000000 +2.000000 +2.000000 +3.000000 +0 1 +ftof: 234.600006 234.600000 234.600000 +ftoi: 234 234 234 234 +itof: -1234.000000 +utof: 2166572288.000000 +lltof: -81985529205302080.000000 +ulltof: 17375808098308006000.000000 +ftof: -2334.600098 -2334.600000 -2334.600000 +ftoi: -2334 2334 -2334 2334 +itof: -1234.000000 +utof: 2166572288.000000 +lltof: -81985529205302080.000000 +ulltof: 17375808098308006000.000000 +float: 42.12346 +double: 42.123457 +long double: 42.123457 +strtod: 1.200000 +Test 1.0 / x != 1.0 / y returns 1 (should be 1). +Test 1.0 / x != 1.0 / -x returns 1 (should be 1). +Test 1.0 / x != 1.0 / +y returns 1 (should be 1). +Test 1.0 / x != 1.0 / -y returns 0 (should be 0). +testing 'long double' +0 1 1 0 0 1 +1.000000 2.500000 3.500000 -1.500000 2.500000 0.400000 -1.000000 +2.000000 +2.000000 +3.000000 +0 1 +0 1 0 1 1 0 +2.000000 1.500000 3.500000 0.500000 3.000000 1.333333 -2.000000 +3.000000 +3.000000 +4.000000 +0 1 +1 0 0 0 1 1 +1.000000 1.000000 2.000000 0.000000 1.000000 1.000000 -1.000000 +2.000000 +2.000000 +3.000000 +0 1 +ftof: 234.600006 234.600000 234.600000 +ftoi: 234 234 234 234 +itof: -1234.000000 +utof: 2166572288.000000 +lltof: -81985529205302080.000000 +ulltof: 17375808098308006000.000000 +ftof: -2334.600098 -2334.600000 -2334.600000 +ftoi: -2334 2334 -2334 2334 +itof: -1234.000000 +utof: 2166572288.000000 +lltof: -81985529205302080.000000 +ulltof: 17375808098308006000.000000 +float: 42.12346 +double: 42.123457 +long double: 42.123457 +strtold: 1.200000 +Test 1.0 / x != 1.0 / y returns 1 (should be 1). +Test 1.0 / x != 1.0 / -x returns 1 (should be 1). +Test 1.0 / x != 1.0 / +y returns 1 (should be 1). +Test 1.0 / x != 1.0 / -y returns 0 (should be 0). +1.200000 3.400000 -5.600000 +2.120000 0.500000 23000000000.000000 +da=123.000000 +fa=123.000000 +da = -294967296.000000 +db = 4000000000.000000 +nan != nan = 1, inf1 = 1.#INF00, inf2 = 1.#INF00 +da subnormal = 0x0.880000p-1022 +da subnormal = 1.1820704873319507e-308 +la subnormal = 0x0.880000p-1022 +la subnormal = 1.1820704873319507e-308 +da/2 subnormal = 0x0.440000p-1022 +da/2 subnormal = 5.9103524366597537e-309 +la/2 subnormal = 0x0.440000p-1022 +la/2 subnormal = 5.9103524366597537e-309 +fa subnormal = 0x1.100000p-127 +fa subnormal = 6.2448137387434024e-039 +la subnormal = 0x1.100000p-127 +la subnormal = 6.2448137387434024e-039 +fa/2 subnormal = 0x1.100000p-128 +fa/2 subnormal = 3.1224068693717012e-039 +la/2 subnormal = 0x1.100000p-128 +la/2 subnormal = 3.1224068693717012e-039 + +---- longlong_test ---- +sizeof(long long) = 8 +-1 4294967294 +1 -2 1 1234567812345679 +-6 +arith: 1023 977 23000 +arith1: 43 11 +bin: 0 1023 1023 +test: 0 1 0 1 1 0 +utest: 0 1 0 1 1 0 +arith2: 1001 24 +arith2: 1001 24 +arith2: 1001 24 +arith2: 1001 24 +not: 0 0 1 1 +arith: 4915 -4405 1188300 +arith1: 0 255 +bin: 52 4863 4811 +test: 0 1 1 0 0 1 +utest: 0 1 1 0 0 1 +arith2: 256 4661 +arith2: 256 4661 +arith2: 256 4661 +arith2: 256 4661 +not: 0 0 1 1 +arith: -782639107 782639101 2347917312 +arith1: 0 -3 +bin: -782639104 -3 782639101 +test: 0 1 0 1 1 0 +utest: 0 1 0 1 1 0 +arith2: -2 -782639103 +arith2: -2 -782639103 +arith2: -2 -782639103 +arith2: -2 -782639103 +not: 0 0 1 1 +shift: 9 9 9312 +shiftc: 36 36 2328 +shiftc: 0 0 9998683865088 +shift: 576460752303423487 -1 -736 +shiftc: 2305843009213693949 -3 -184 +shiftc: 536870911 -1 -790273982464 +shift: 0 0 -1152921504606846976 +shiftc: 245252176896 245252176896 15696139321344 +shiftc: 57 57 -8444530776296390656 +la=320255972942661 ula=16458594985017606144 +lltof: 320255981256704.000000 320255972942661.000000 320255972942661.000000 +ftoll: 320255981256704 320255972942661 320255972942661 +ulltof: 16458595053737083000.000000 16458594985017606000.000000 16458594985017606000.000000 +ftoull: 16458595053737082880 16458594985017606144 16458594985017606144 +12345677 +3 +arith: 2147483648 2147483648 0 +bin: 0 2147483648 2147483648 +test: 0 1 0 1 1 0 +utest: 0 1 0 1 1 0 +arith2: 2147483649 1 +arith2: 2147483649 1 +arith2: 2147483649 1 +arith2: 2147483649 1 +not: 0 0 1 1 +another long long spill test : 2 +a long long function (arm-)reg-args test : -4 +1 0 1 0 +4886718345 +shift: 9 9 9312 +shiftc: 36 36 2328 +shiftc: 0 0 9998683865088 +long long u=2 +long long u=2862188664 +check_opl_save_regs: 1 + +---- manyarg_test ---- +1 2 3 4 5 6 7 8 0.100000 1.200000 2.300000 3.400000 4.500000 5.600000 6.700000 7.800000 8.900000 9.000000 +1 2 3 4 5 6 7 8 0.100000 1.200000 2.300000 3.400000 4.500000 5.600000 6.700000 7.800000 8.900000 9.000000 1234567891234 987654321986 42.000000 43.000000 +1234567891234.000000 1 2 3 4 5 6 7 8 0.100000 1.200000 2.300000 3.400000 4.500000 5.600000 6.700000 7.800000 8.900000 9.000000 1234567891234 987654321986 42.000000 43.000000 +1 2 3 4 5 6 7 8 1234567891234.000000 +1 2 3 4 5 6 7 8 0.100000 1.200000 2.300000 3.400000 4.500000 5.600000 6.700000 7.800000 8.900000 9.000000 1234567891234 98765432198642.000000 43.000000 1234567891234.000000 +1 2 3 4 5 6 7 8 0.100000 1.200000 2.300000 3.400000 4.500000 5.600000 6.700000 7.800000 8.900000 9.000000 1234567891234.000000 1234567891234 987654321986 42.000000 43.000000 1234567891234.000000 + +---- stdarg_test ---- +1 2 3 +1.000000 2 3.000000 +1234567891234 987654321986 3 1234.000000 +1.200000 2.300000 3.400000 +1 1.200000 3 4.500000 6 7.800000 9 0.100000 +1 2 3 4 5 6 7 8 0.100000 1.200000 2.300000 3.400000 4.500000 5.600000 6.700000 7.800000 +1 2 3 4 5 6 7 8 0.100000 1.200000 2.300000 3.400000 4.500000 5.600000 6.700000 7.800000 8.900000 9.000000 +1 2 3 4 5 6 7 8 0.100000 1.200000 2.300000 3.400000 4.500000 5.600000 6.700000 7.800000 8.900000 9.000000 1234567891234 987654321986 42.000000 43.000000 +1234567891234.000000 1 2 3 4 5 6 7 8 0.100000 1.200000 2.300000 3.400000 4.500000 5.600000 6.700000 7.800000 8.900000 9.000000 1234567891234 987654321986 42.000000 43.000000 +1 2 3 4 5 6 7 8 1234567891234.000000 +1 2 3 4 5 6 7 8 0.100000 1.200000 2.300000 3.400000 4.500000 5.600000 6.700000 7.800000 8.900000 9.000000 1234567891234 987654321986 42.000000 43.000000 1234567891234.000000 +1 2 3 4 5 6 7 8 0.100000 1.200000 2.300000 3.400000 4.500000 5.600000 6.700000 7.800000 8.900000 9.000000 1234567891234.000000 1234567891234 987654321986 42.000000 43.000000 1234567891234.000000 +stdarg_for_struct: 1 2 3 42 42 42 42 +stdarg_for_libc: string 1.23 456 +stdarg_void_expr: 17 +stdarg_double_struct: -1 +pts[0] = 1 2 +pts[1] = 3 4 +pts[2] = 5 6 +pts[3] = 7 8 +pts[4] = 9 10 +pts[5] = 11 12 +stdarg_double_struct: 1 +pts[0] = 1 2 +d 1 = -1 +pts[2] = 3 4 +pts[3] = 5 6 +pts[4] = 7 8 +pts[5] = 9 10 +pts[6] = 11 12 +stdarg_double_struct: 2 +pts[0] = 1 2 +pts[1] = 3 4 +d 2 = -1 +pts[3] = 5 6 +pts[4] = 7 8 +pts[5] = 9 10 +pts[6] = 11 12 +stdarg_double_struct: 3 +pts[0] = 1 2 +pts[1] = 3 4 +pts[2] = 5 6 +d 3 = -1 +pts[4] = 7 8 +pts[5] = 9 10 +pts[6] = 11 12 +stdarg_double_struct: 4 +pts[0] = 1 2 +pts[1] = 3 4 +pts[2] = 5 6 +pts[3] = 7 8 +d 4 = -1 +pts[5] = 9 10 +pts[6] = 11 12 +stdarg_double_struct: 5 +pts[0] = 1 2 +pts[1] = 3 4 +pts[2] = 5 6 +pts[3] = 7 8 +pts[4] = 9 10 +d 5 = -1 +pts[6] = 11 12 + +---- relocation_test ---- +*rel1=2 +*rel2=3 +in getmyaddress + +---- old_style_function_test ---- +a=1 b=2 b=3.000000 +cmpfn=0 +cmpfn=0 + +---- alloca_test ---- +alloca: p is 123456789012345 +alloca: This is only a test. + + +---- c99_vla_test ---- +Test C99 VLA 1 (sizeof): PASSED +Test C99 VLA 2 (ptrs subtract): PASSED +Test C99 VLA 3 (ptr add): PASSED +Test C99 VLA 4 (ptr access): PASSED +Test C99 VLA 5 (bounds checking (might be disabled)): PASSED PASSED PASSED PASSED PASSED PASSED PASSED PASSED +Test C99 VLA 6 (pointer) + 1 2 3 4 5 + 6 7 8 9 10 + 11 12 13 14 15 + 16 17 18 19 20 + + 21 22 23 24 25 + 26 27 28 29 30 + 31 32 33 34 35 + 36 37 38 39 40 + + 41 42 43 44 45 + 46 47 48 49 50 + 51 52 53 54 55 + 56 57 58 59 60 + + sizes : 80 20 4 + pdiff : 2 3 + tests : 1 1 1 +123 +123 +123 +123 +8 4 123 + +---- sizeof_test ---- +sizeof(int) = 4 +sizeof(unsigned int) = 4 +sizeof(long) = 4 +sizeof(unsigned long) = 4 +sizeof(short) = 2 +sizeof(unsigned short) = 2 +sizeof(char) = 1 +sizeof(unsigned char) = 1 +sizeof(func) = 1 +sizeof(a++) = 4 +a=1 +sizeof(**ptr) = 4 +sizeof(sizeof(int) = 8 +1 0 +__alignof__(int) = 4 +__alignof__(unsigned int) = 4 +__alignof__(short) = 2 +__alignof__(unsigned short) = 2 +__alignof__(char) = 1 +__alignof__(unsigned char) = 1 +__alignof__(func) = 1 +sizeof(char[1+2*a]) = 5 +sizeof( (struct {int i; int j;}){4,5} ) = 8 +sizeof (struct {short i; short j;}){4,5} = 4 +sizeof(t && 0) = 4 +sizeof(1 && 1) = 4 +sizeof(t || 1) = 4 +sizeof(0 || 0) = 4 +sizeof(0, arr) = 8 +sizeof(0, fn) = 8 + +---- typeof_test ---- +a=1.500000 b=2.500000 c=3.500000 + +---- statement_expr_test ---- +a=110 +stmtexpr: somerandomlongstring anotherlongstring +stmtexpr: 40 41 42 +stmtexpr: 43 44 +stmtexpr: 2 should be 2 + +---- local_label_test ---- +bb2 +bb1 +aa2 +aa3 +aa1 +a=2 + +---- asm_test ---- + +---- builtin_test ---- +__builtin_types_compatible_p(int, int) = 1 +__builtin_types_compatible_p(int, unsigned int) = 0 +__builtin_types_compatible_p(int, char) = 0 +__builtin_types_compatible_p(int, const int) = 1 +__builtin_types_compatible_p(int, volatile int) = 1 +__builtin_types_compatible_p(int *, int *) = 1 +__builtin_types_compatible_p(int *, void *) = 0 +__builtin_types_compatible_p(int *, const int *) = 0 +__builtin_types_compatible_p(char *, unsigned char *) = 0 +__builtin_types_compatible_p(char *, signed char *) = 0 +__builtin_types_compatible_p(char *, char *) = 1 +__builtin_types_compatible_p(char **, void *) = 0 +res1 = 1 +res2 = 1 +res3 = 0 +res4 = 0 +res5 = 0 +res6 = 0 +res7 = 1 +res8 = 0 +res10 = 1 +res11 = 1 +res12 = 1 +res13 = 0 +bce: 2 +bce: 1 +bce: 8 +bce: 2 +0 2523 +1 2523 +2 4075 +3 1420 +4 1420 +5 3028 +6 1490 +7 1490 +8 3010 +9 2444 +10 2444 +11 3056 +12 16149 +13 16149 +14 32126 +15 547 +16 547 +17 546 + +---- weak_test ---- + +---- global_data_test ---- +43 + +---- cmp_comparison_test ---- + +---- math_cmp_test ---- + +---- callsave_test ---- +callsavetest: 8 +0 + +---- builtin_frame_address_test ---- +str: __builtin_frame_address +bfa1: __builtin_frame_address +bfa2: __builtin_frame_address +bfa3: __builtin_frame_address + +---- volatile_test ---- + ok + +---- attrib_test ---- + +---- bounds_check1_test ---- +0a +0000000000000014 + +---- func_arg_test ---- +0 1 2 3 4 5 6 7 0 + diff --git a/tests/tests2/Makefile b/tests/tests2/Makefile index 0726e99d..3a61318e 100644 --- a/tests/tests2/Makefile +++ b/tests/tests2/Makefile @@ -48,6 +48,17 @@ ifeq (-$(CONFIG_WIN32)-,-yes-) SKIP += 117_builtins.test # win32 port doesn't define __builtins SKIP += 124_atomic_counter.test # No pthread support endif +ifneq (,$(filter arm% riscv%,$(ARCH))) + SKIP += 85_asm-outside-function.test + SKIP += 98_al_ax_extend.test + SKIP += 99_fastcall.test + SKIP += 127_asm_goto.test +endif +ifneq (,$(findstring win32,$(CROSS_TARGET))) + SKIP += 106_versym.test + SKIP += 114_bound_signal.test + SKIP += 124_atomic_counter.test +endif ifneq (,$(filter OpenBSD FreeBSD NetBSD,$(TARGETOS))) SKIP += 106_versym.test # no pthread_condattr_setpshared SKIP += 114_bound_signal.test # libc problem signal/fork diff --git a/win32/build-tcc.bat b/win32/build-tcc.bat index e5b576bc..6e8e58e4 100644 --- a/win32/build-tcc.bat +++ b/win32/build-tcc.bat @@ -94,9 +94,12 @@ if (%BINDIR%)==() set BINDIR=%TCCDIR% set D32=-DTCC_TARGET_PE -DTCC_TARGET_I386 set D64=-DTCC_TARGET_PE -DTCC_TARGET_X86_64 +set DARM64=-DTCC_TARGET_PE -DTCC_TARGET_ARM64 set P32=i386-win32 set P64=x86_64-win32 +set PARM64=arm64-win32 +if %T%==arm64 goto :tarm64 if %T%==64 goto :t64 set D=%D32% set P=%P32% @@ -113,6 +116,12 @@ set PX=%P32% set TX=32 goto :p3 +:tarm64 +set D=%DARM64% +set P=%PARM64% +set TCC_C=..\tcc.c +goto :p3 + :p3 git.exe --version 2>nul if not %ERRORLEVEL%==0 goto :git_done @@ -125,12 +134,14 @@ if %ERRORLEVEL%==1 set GITHASH=%GITHASH%* :config.h echo>..\config.h #define TCC_VERSION "%VERSION%" -if not (%GITHASH%)==() echo>> ..\config.h #define TCC_GITHASH "%GITHASH%" +if not "%GITHASH%"=="" echo>> ..\config.h #define TCC_GITHASH "%GITHASH%" @if not (%BINDIR%)==(%TCCDIR%) echo>> ..\config.h #define CONFIG_TCCDIR "%TCCDIR:\=/%" +if "%TX%"=="" goto :skip_cross if %TX%==64 echo>> ..\config.h #ifdef TCC_TARGET_X86_64 if %TX%==32 echo>> ..\config.h #ifdef TCC_TARGET_I386 echo>> ..\config.h #define CONFIG_TCC_CROSSPREFIX "%PX%-" echo>> ..\config.h #endif +:skip_cross @rem echo>> ..\config.h #define CONFIG_TCC_PREDEFS 1 @rem %CC% -DC2STR ..\conftest.c -o c2str.exe @@ -168,6 +179,7 @@ if exist libtcc.dll .\tcc -impdef libtcc.dll -o libtcc\libtcc.def @if errorlevel 1 goto :the_end :lib +@rem ARM64 now supported with implemented assembler call :make_lib %T% || goto :the_end @if exist %PX%-tcc.exe call :make_lib %TX% %PX%- || goto :the_end @@ -206,7 +218,7 @@ exit /B %ERRORLEVEL% .\tcc -B. -m%1 -c ../lib/stdatomic.c .\tcc -B. -m%1 -c ../lib/atomic.S .\tcc -B. -m%1 -c ../lib/builtin.c -.\tcc -B. -m%1 -ar lib/%2libtcc1.a libtcc1.o crt1.o crt1w.o wincrt1.o wincrt1w.o dllcrt1.o dllmain.o chkstk.o alloca.o alloca-bt.o stdatomic.o atomic.o builtin.o +.\tcc -ar lib/%2libtcc1.a libtcc1.o crt1.o crt1w.o wincrt1.o wincrt1w.o dllcrt1.o dllmain.o chkstk.o alloca.o alloca-bt.o stdatomic.o atomic.o builtin.o .\tcc -B. -m%1 -c ../lib/bcheck.c -o lib/%2bcheck.o -bt -I.. .\tcc -B. -m%1 -c ../lib/bt-exe.c -o lib/%2bt-exe.o .\tcc -B. -m%1 -c ../lib/bt-log.c -o lib/%2bt-log.o diff --git a/win32/include/_mingw.h b/win32/include/_mingw.h index d10a6b18..1e06f23c 100644 --- a/win32/include/_mingw.h +++ b/win32/include/_mingw.h @@ -70,12 +70,17 @@ #ifdef _WIN64 #define __stdcall +#if defined(__aarch64__) +#define _M_ARM64 1 +#define _ARM64_ 1 +#else #define _AMD64_ 1 #define __x86_64 1 #define _M_X64 100 /* Visual Studio */ #define _M_AMD64 100 /* Visual Studio */ #define USE_MINGW_SETJMP_TWO_ARGS #define mingw_getsp tinyc_getbp +#endif #else #define __stdcall __attribute__((__stdcall__)) #define _X86_ 1 diff --git a/win32/include/setjmp.h b/win32/include/setjmp.h index e4f142a3..d9b136b5 100644 --- a/win32/include/setjmp.h +++ b/win32/include/setjmp.h @@ -124,37 +124,119 @@ extern "C" { SETJMP_FLOAT128 Xmm14; SETJMP_FLOAT128 Xmm15; } _JUMP_BUFFER; +#elif defined(_ARM_) + +#define _JBLEN 28 +#define _JBTYPE int + + typedef struct __JUMP_BUFFER { + unsigned long Frame; + unsigned long R4; + unsigned long R5; + unsigned long R6; + unsigned long R7; + unsigned long R8; + unsigned long R9; + unsigned long R10; + unsigned long R11; + unsigned long Sp; + unsigned long Pc; + unsigned long Fpscr; + unsigned long long D[8]; + } _JUMP_BUFFER; +#elif defined(_ARM64_) + +#define _JBLEN 24 +#define _JBTYPE unsigned __int64 + + typedef struct __JUMP_BUFFER { + unsigned __int64 Frame; + unsigned __int64 Reserved; + unsigned __int64 X19; + unsigned __int64 X20; + unsigned __int64 X21; + unsigned __int64 X22; + unsigned __int64 X23; + unsigned __int64 X24; + unsigned __int64 X25; + unsigned __int64 X26; + unsigned __int64 X27; + unsigned __int64 X28; + unsigned __int64 Fp; + unsigned __int64 Lr; + unsigned __int64 Sp; + unsigned long Fpcr; + unsigned long Fpsr; + double D[8]; + } _JUMP_BUFFER; +#else + +#define _JBLEN 1 +#define _JBTYPE int #endif #ifndef _JMP_BUF_DEFINED typedef _JBTYPE jmp_buf[_JBLEN]; #define _JMP_BUF_DEFINED #endif + __declspec(noreturn) __attribute__ ((__nothrow__)) void __cdecl longjmp(jmp_buf _Buf, int _Value); + +#if (defined(_X86_) && !defined(__x86_64)) + int __cdecl __attribute__ ((__nothrow__, __returns_twice__)) _setjmp(jmp_buf _Buf); + int __cdecl __attribute__ ((__nothrow__, __returns_twice__)) _setjmp3(jmp_buf _Buf, int _Count, ...); +#else + #ifndef __aarch64__ + int __cdecl __attribute__ ((__nothrow__, __returns_twice__)) _setjmp(jmp_buf _Buf, void *_Frame); + #endif + int __cdecl __attribute__ ((__nothrow__, __returns_twice__)) _setjmpex(jmp_buf _Buf, void *_Frame); +#endif + +#if defined(__arm__) || defined(__aarch64__) + int __cdecl __attribute__ ((__nothrow__, __returns_twice__)) __mingw_setjmp(jmp_buf _Buf); + __declspec(noreturn) __attribute__ ((__nothrow__)) void __cdecl __mingw_longjmp(jmp_buf _Buf, int _Value); +#endif + +#if defined(__TCC_BCHECK__) + __declspec(noreturn) __attribute__ ((__nothrow__)) void __cdecl __bound_longjmp(jmp_buf _Buf, int _Value); +#endif + void * __cdecl __attribute__ ((__nothrow__)) mingw_getsp(void); - -#ifdef USE_MINGW_SETJMP_TWO_ARGS -#ifndef _INC_SETJMPEX -#define setjmp(BUF) _setjmp((BUF),mingw_getsp()) - int __cdecl __attribute__ ((__nothrow__)) _setjmp(jmp_buf _Buf,void *_Ctx); -#else -#undef setjmp -#define setjmp(BUF) _setjmpex((BUF),mingw_getsp()) -#define setjmpex(BUF) _setjmpex((BUF),mingw_getsp()) - int __cdecl __attribute__ ((__nothrow__)) _setjmpex(jmp_buf _Buf,void *_Ctx); -#endif -#else -#ifndef _INC_SETJMPEX -#define setjmp _setjmp -#endif - int __cdecl __attribute__ ((__nothrow__)) setjmp(jmp_buf _Buf); -#endif - - __declspec(noreturn) __attribute__ ((__nothrow__)) void __cdecl ms_longjmp(jmp_buf _Buf,int _Value)/* throw(...)*/; - __declspec(noreturn) __attribute__ ((__nothrow__)) void __cdecl longjmp(jmp_buf _Buf,int _Value); + __declspec(noreturn) __attribute__ ((__nothrow__)) void __cdecl ms_longjmp(jmp_buf _Buf, int _Value); #ifdef __cplusplus } #endif #pragma pack(pop) + +#ifdef setjmp +#undef setjmp +#endif +#if (defined(_X86_) && !defined(__x86_64)) +#define setjmp(BUF) _setjmp3((BUF), 0) +#elif defined(__arm__) || defined(__aarch64__) +#define setjmp(BUF) __mingw_setjmp((BUF)) +#elif defined(USE_MINGW_SETJMP_TWO_ARGS) + #ifndef _INC_SETJMPEX + #define setjmp(BUF) _setjmp((BUF), mingw_getsp()) + #else + #define setjmp(BUF) _setjmpex((BUF), mingw_getsp()) + #define setjmpex(BUF) _setjmpex((BUF), mingw_getsp()) + #endif +#else + #ifndef _INC_SETJMPEX + #define setjmp _setjmp + #endif +#endif + +#ifdef longjmp +#undef longjmp +#endif +#if defined(__TCC_BCHECK__) && defined(__aarch64__) +#define longjmp __bound_longjmp +#elif defined(__arm__) || defined(__aarch64__) +#define longjmp __mingw_longjmp +#else +#define longjmp longjmp +#endif #endif diff --git a/win32/include/stdlib.h b/win32/include/stdlib.h index 21a0fbd9..fb680d95 100644 --- a/win32/include/stdlib.h +++ b/win32/include/stdlib.h @@ -224,6 +224,8 @@ extern "C" { extern wchar_t **_imp___wpgmptr; #endif #endif + _CRTIMP errno_t __cdecl _get_environ(char ***_Value); + _CRTIMP errno_t __cdecl _get_wenviron(wchar_t ***_Value); errno_t __cdecl _get_pgmptr(char **_Value); errno_t __cdecl _get_wpgmptr(wchar_t **_Value); #ifndef _fmode diff --git a/win32/include/winapi/winnt.h b/win32/include/winapi/winnt.h index fb90d216..746e6973 100644 --- a/win32/include/winapi/winnt.h +++ b/win32/include/winapi/winnt.h @@ -21,7 +21,7 @@ extern "C" { #define __CRT_UNALIGNED #endif -#if defined(__ia64__) || defined(__x86_64) +#if defined(__ia64__) || defined(__x86_64) || defined(__aarch64__) #define UNALIGNED __CRT_UNALIGNED #ifdef _WIN64 #define UNALIGNED64 __CRT_UNALIGNED @@ -47,6 +47,9 @@ extern "C" { #endif #endif +#if !defined(I_X86_) && !defined(_IA64_) && !defined(_AMD64_) && defined(__aarch64__) && !defined(_ARM64_) +#define _ARM64_ +#endif #ifdef _WIN64 #define MAX_NATURAL_ALIGNMENT sizeof(ULONGLONG) @@ -65,7 +68,7 @@ extern "C" { #ifdef _WIN64 #ifdef _AMD64_ #define PROBE_ALIGNMENT(_s) TYPE_ALIGNMENT(DWORD) -#elif defined(_IA64_) +#elif defined(_IA64_) || defined(_ARM64_) #define PROBE_ALIGNMENT(_s) (TYPE_ALIGNMENT(_s) > TYPE_ALIGNMENT(DWORD) ? TYPE_ALIGNMENT(_s) : TYPE_ALIGNMENT(DWORD)) #else #error No Target Architecture @@ -79,7 +82,7 @@ extern "C" { #include -#if defined(_X86_) || defined(__ia64__) || defined(__x86_64) +#if defined(_X86_) || defined(__ia64__) || defined(__x86_64) || defined(__aarch64__) #define DECLSPEC_IMPORT __declspec(dllimport) #else #define DECLSPEC_IMPORT @@ -321,7 +324,7 @@ typedef DWORD LCID; #define Int32x32To64(a,b) (LONGLONG)((LONGLONG)(LONG)(a) *(LONG)(b)) #define UInt32x32To64(a,b) (ULONGLONG)((ULONGLONG)(DWORD)(a) *(DWORD)(b)) #define Int64ShrlMod32(a,b) ((DWORDLONG)(a)>>(b)) -#elif defined(__ia64__) || defined(__x86_64) +#elif defined(__ia64__) || defined(__x86_64) || defined(__aarch64__) #define Int32x32To64(a,b) ((LONGLONG)((LONG)(a)) *(LONGLONG)((LONG)(b))) #define UInt32x32To64(a,b) ((ULONGLONG)((DWORD)(a)) *(ULONGLONG)((DWORD)(b))) #define Int64ShrlMod32(a,b) ((ULONGLONG)(a) >> (b)) @@ -829,7 +832,7 @@ typedef DWORD LCID; typedef ULONG_PTR KSPIN_LOCK; typedef KSPIN_LOCK *PKSPIN_LOCK; -#ifdef _AMD64_ +#if defined(_AMD64_) || defined(_ARM64_) #if defined(__x86_64) && !defined(RC_INVOKED) @@ -1336,6 +1339,7 @@ typedef DWORD LCID; #define LEGACY_SAVE_AREA_LENGTH sizeof(XMM_SAVE_AREA32) +#if defined(__x86_64) || defined(_AMD64_) typedef struct DECLSPEC_ALIGN(16) _CONTEXT { DWORD64 P1Home; DWORD64 P2Home; @@ -1407,6 +1411,7 @@ typedef DWORD LCID; DWORD64 LastExceptionToRip; DWORD64 LastExceptionFromRip; } CONTEXT,*PCONTEXT; +#endif /* defined(__x86_64) || defined(_AMD64_) */ #define RUNTIME_FUNCTION_INDIRECT 0x1 @@ -1417,6 +1422,52 @@ typedef DWORD LCID; } RUNTIME_FUNCTION,*PRUNTIME_FUNCTION; typedef PRUNTIME_FUNCTION (*PGET_RUNTIME_FUNCTION_CALLBACK)(DWORD64 ControlPc,PVOID Context); + +#ifdef _ARM64_ + +/* ARM64 Context Definition */ +#define CONTEXT_ARM64 0x00400000 + +#ifndef CONTEXT_CONTROL +#define CONTEXT_CONTROL (CONTEXT_ARM64 | 0x00000001L) +#endif +#ifndef CONTEXT_INTEGER +#define CONTEXT_INTEGER (CONTEXT_ARM64 | 0x00000002L) +#endif +#ifndef CONTEXT_FLOATING_POINT +#define CONTEXT_FLOATING_POINT (CONTEXT_ARM64 | 0x00000004L) +#endif +#ifndef CONTEXT_DEBUG +#define CONTEXT_DEBUG (CONTEXT_ARM64 | 0x00000008L) +#endif + +#ifndef CONTEXT_FULL +#define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) +#endif +#ifndef CONTEXT_ALL +#define CONTEXT_ALL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT | CONTEXT_DEBUG) +#endif + +#ifndef _ARM64_CONTEXT_DECLARED +#define _ARM64_CONTEXT_DECLARED + typedef struct _CONTEXT { + DWORD64 ContextFlags; + DWORD64 X[29]; + DWORD64 Fp; + DWORD64 Lr; + DWORD64 Sp; + DWORD64 Pc; + DWORD64 V[32]; + DWORD Fpcr; + DWORD Fpsr; + DWORD Bcr[8]; + DWORD Bvr[8]; + DWORD Wcr[2]; + DWORD Wvr[2]; + } CONTEXT,*PCONTEXT; +#endif + +#endif /* _ARM64_ */ typedef DWORD (*POUT_OF_PROCESS_FUNCTION_TABLE_CALLBACK)(HANDLE Process,PVOID TableAddress,PDWORD Entries,PRUNTIME_FUNCTION *Functions); #define OUT_OF_PROCESS_FUNCTION_TABLE_CALLBACK_EXPORT_NAME "OutOfProcessFunctionTableCallback" @@ -1962,6 +2013,25 @@ typedef DWORD LCID; DWORD64 ExceptionInformation[EXCEPTION_MAXIMUM_PARAMETERS]; } EXCEPTION_RECORD64,*PEXCEPTION_RECORD64; +#if defined(__aarch64__) && !defined(_ARM64_CONTEXT_DECLARED) +#define _ARM64_CONTEXT_DECLARED + typedef struct _CONTEXT { + DWORD64 ContextFlags; + DWORD64 X[29]; + DWORD64 Fp; + DWORD64 Lr; + DWORD64 Sp; + DWORD64 Pc; + DWORD64 V[32]; + DWORD Fpcr; + DWORD Fpsr; + DWORD Bcr[8]; + DWORD Bvr[8]; + DWORD Wcr[2]; + DWORD Wvr[2]; + } CONTEXT,*PCONTEXT; +#endif + typedef struct _EXCEPTION_POINTERS { PEXCEPTION_RECORD ExceptionRecord; PCONTEXT ContextRecord; @@ -3701,6 +3771,7 @@ typedef DWORD LCID; #define IMAGE_FILE_MACHINE_CEF 0x0CEF #define IMAGE_FILE_MACHINE_EBC 0x0EBC #define IMAGE_FILE_MACHINE_AMD64 0x8664 +#define IMAGE_FILE_MACHINE_ARM64 0xAA64 #define IMAGE_FILE_MACHINE_M32R 0x9041 #define IMAGE_FILE_MACHINE_CEE 0xC0EE diff --git a/win32/lib/chkstk.S b/win32/lib/chkstk.S index 6f583a53..2467138b 100644 --- a/win32/lib/chkstk.S +++ b/win32/lib/chkstk.S @@ -8,7 +8,81 @@ #endif /* ---------------------------------------------- */ -#ifndef __x86_64__ +#if defined(__aarch64__) +/* ---------------------------------------------- */ + +.globl __chkstk +__chkstk: + .long 0x910003f0 + .long 0xd37cedf1 + .long 0xb4000131 + .long 0xf1400620 + .long 0x540000a9 + .long 0xd1400610 + .long 0xf940021f + .long 0xd1400631 + .long 0x17fffffb + .long 0xcb110210 + .long 0xf940021f + .long 0xd65f03c0 + +.globl _(tinyc_getbp) +_(tinyc_getbp): + mov x0, x29 + ret + +.globl _(mingw_getsp) +_(mingw_getsp): + .long 0x910003e0 + .long 0xd65f03c0 + +.globl _(__mingw_setjmp) +_(__mingw_setjmp): + .long 0xf900001f + .long 0xa9015013 + .long 0xa9025815 + .long 0xa9036017 + .long 0xa9046819 + .long 0xa905701b + .long 0xa906781d + .long 0x910003e2 + .long 0xf9003802 + .long 0xd53b4402 + .long 0xb9007802 + .long 0xd53b4422 + .long 0xb9007c02 + .long 0x6d082408 + .long 0x6d092c0a + .long 0x6d0a340c + .long 0x6d0b3c0e + .long 0xd2800000 + .long 0xd65f03c0 + +.globl _(__mingw_longjmp) +_(__mingw_longjmp): + .long 0xa9415013 + .long 0xa9425815 + .long 0xa9436017 + .long 0xa9446819 + .long 0xa945701b + .long 0xa946781d + .long 0xf9403802 + .long 0x9100005f + .long 0xb9407802 + .long 0xd51b4402 + .long 0xb9407c02 + .long 0xd51b4422 + .long 0x6d482408 + .long 0x6d492c0a + .long 0x6d4a340c + .long 0x6d4b3c0e + .long 0xaa0103e0 + .long 0xb5000040 + .long 0xd2800020 + .long 0xd65f03c0 + +/* ---------------------------------------------- */ +#elif !defined(__x86_64__) /* ---------------------------------------------- */ .globl _(__chkstk) @@ -69,5 +143,3 @@ _(tinyc_getbp): /* ---------------------------------------------- */ #endif /* ---------------------------------------------- */ - - diff --git a/win32/lib/crt1.c b/win32/lib/crt1.c index e1910813..a105bd39 100644 --- a/win32/lib/crt1.c +++ b/win32/lib/crt1.c @@ -22,18 +22,26 @@ #define _tstart _wstart #define _tmain wmain #define _runtmain _runwmain +#define get_tenviron _get_wenviron #else #define __tgetmainargs __getmainargs #define _tstart _start #define _tmain main #define _runtmain _runmain +#define get_tenviron _get_environ #endif typedef struct { int newmode; } _startupinfo; int __cdecl __tgetmainargs(int *pargc, _TCHAR ***pargv, _TCHAR ***penv, int globb, _startupinfo*); +int __cdecl get_tenviron(_TCHAR ***penv); void __cdecl __set_app_type(int apptype); unsigned int __cdecl _controlfp(unsigned int new_value, unsigned int mask); extern int _tmain(int argc, _TCHAR * argv[], _TCHAR * env[]); +#ifdef UNICODE +__attribute__((weak)) wchar_t **__cdecl __rt_get_wenviron(void); +#else +__attribute__((weak)) char **__cdecl __rt_get_environ(void); +#endif #include "crtinit.c" @@ -48,6 +56,7 @@ static LONG WINAPI catch_sig(EXCEPTION_POINTERS *ex) void _tstart(void) { int ret; + _TCHAR **env = NULL; _startupinfo start_info = {0}; SetUnhandledExceptionFilter(catch_sig); @@ -60,9 +69,9 @@ void _tstart(void) _controlfp(_PC_53, _MCW_PC); #endif - __tgetmainargs( &__argc, &__targv, &_tenviron, _dowildcard, &start_info); - run_ctors(__argc, __targv, _tenviron); - ret = _tmain(__argc, __targv, _tenviron); + __tgetmainargs(&__argc, &__targv, &env, _dowildcard, &start_info); + run_ctors(__argc, __targv, env); + ret = _tmain(__argc, __targv, env); run_dtors(); exit(ret); } @@ -70,15 +79,52 @@ void _tstart(void) // ============================================= // for 'tcc -run ,,,' -__attribute__((weak)) extern int __run_on_exit(); +__attribute__((weak)) void __run_on_exit(int ret) +{ + (void)ret; +} + +static void run_stdio_init(void) +{ + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); +} int _runtmain(int argc, /* as tcc passed in */ char **argv) { int ret; + _TCHAR **env = NULL; +#ifdef UNICODE + if (__rt_get_wenviron) { + env = __rt_get_wenviron(); +#if defined __i386__ || defined __x86_64__ + _controlfp(_PC_53, _MCW_PC); +#endif + run_stdio_init(); + run_ctors(argc, (_TCHAR **)argv, env); + ret = _tmain(argc, (_TCHAR **)argv, env); + run_dtors(); + __run_on_exit(ret); + return ret; + } +#else + if (__rt_get_environ) { + env = __rt_get_environ(); +#if defined __i386__ || defined __x86_64__ + _controlfp(_PC_53, _MCW_PC); +#endif + run_stdio_init(); + run_ctors(argc, (_TCHAR **)argv, env); + ret = _tmain(argc, (_TCHAR **)argv, env); + run_dtors(); + __run_on_exit(ret); + return ret; + } +#endif #ifdef UNICODE _startupinfo start_info = {0}; - __tgetmainargs(&__argc, &__targv, &_tenviron, _dowildcard, &start_info); + __tgetmainargs(&__argc, &__targv, &env, _dowildcard, &start_info); /* may be wrong when tcc has received wildcards (*.c) */ if (argc < __argc) { __targv += __argc - argc; @@ -87,12 +133,14 @@ int _runtmain(int argc, /* as tcc passed in */ char **argv) #else __argc = argc; __targv = argv; + get_tenviron(&env); #endif #if defined __i386__ || defined __x86_64__ _controlfp(_PC_53, _MCW_PC); #endif - run_ctors(__argc, __targv, _tenviron); - ret = _tmain(__argc, __targv, _tenviron); + run_stdio_init(); + run_ctors(__argc, __targv, env); + ret = _tmain(__argc, __targv, env); run_dtors(); __run_on_exit(ret); return ret; diff --git a/win32/lib/wincrt1.c b/win32/lib/wincrt1.c index d74a0cf8..eb9dd427 100644 --- a/win32/lib/wincrt1.c +++ b/win32/lib/wincrt1.c @@ -16,21 +16,25 @@ void _controlfp(unsigned a, unsigned b); #define __tgetmainargs __wgetmainargs #define _twinstart _wwinstart #define _runtwinmain _runwwinmain +#define get_tenviron _get_wenviron int APIENTRY wWinMain(HINSTANCE, HINSTANCE, LPWSTR, int); #else #define __tgetmainargs __getmainargs #define _twinstart _winstart #define _runtwinmain _runwinmain +#define get_tenviron _get_environ #endif typedef struct { int newmode; } _startupinfo; int __cdecl __tgetmainargs(int *pargc, _TCHAR ***pargv, _TCHAR ***penv, int globb, _startupinfo*); +int __cdecl get_tenviron(_TCHAR ***penv); #include "crtinit.c" static int go_winmain(TCHAR *arg1) { STARTUPINFO si; + _TCHAR **env = NULL; _TCHAR *szCmd, *p; int fShow; int retval; @@ -51,7 +55,8 @@ static int go_winmain(TCHAR *arg1) #if defined __i386__ || defined __x86_64__ _controlfp(0x10000, 0x30000); #endif - run_ctors(__argc, __targv, _tenviron); + get_tenviron(&env); + run_ctors(__argc, __targv, env); retval = _tWinMain(GetModuleHandle(NULL), NULL, szCmd, fShow); run_dtors(); return retval; @@ -67,7 +72,7 @@ int _twinstart(void) _startupinfo start_info_con = {0}; SetUnhandledExceptionFilter(catch_sig); __set_app_type(__GUI_APP); - __tgetmainargs(&__argc, &__targv, &_tenviron, 0, &start_info_con); + __tgetmainargs(&__argc, &__targv, NULL, 0, &start_info_con); exit(go_winmain(__argc > 1 ? __targv[1] : NULL)); } @@ -75,7 +80,7 @@ int _runtwinmain(int argc, /* as tcc passed in */ char **argv) { #ifdef UNICODE _startupinfo start_info = {0}; - __tgetmainargs(&__argc, &__targv, &_tenviron, 0, &start_info); + __tgetmainargs(&__argc, &__targv, NULL, 0, &start_info); /* may be wrong when tcc has received wildcards (*.c) */ if (argc < __argc) __targv += __argc - argc, __argc = argc; diff --git a/win32/test_arm64.c b/win32/test_arm64.c new file mode 100644 index 00000000..651461f6 --- /dev/null +++ b/win32/test_arm64.c @@ -0,0 +1,12 @@ +/* Simple ARM64 Windows test program - no inline asm */ +#include + +int main(int argc, char **argv) { + int x = 42; + int y = x * 2; + + printf("Hello from ARM64 Windows!\n"); + printf("x = %d, y = %d\n", x, y); + + return 0; +}