From 7f764f340f05a910908c15e1458e1a1207f52685 Mon Sep 17 00:00:00 2001 From: Meng Zhuo Date: Thu, 7 May 2026 11:39:41 +0800 Subject: [PATCH] riscv64-asm: complete AMO aq/rl suffixes for all ops Add 48 tokens + handlers + dispatch cases for all AMO aq/rl variants (amoswap, amoand, amoor, amoxor, amomax, amomaxu, amomin, amominu; each x {.w,.d} x {.aq,.rl,.aqrl}). GNU-compatible 2-dot naming via DEF_ASM_WITH_SUFFIXES. Correct funct5 values: amoswap=0x01, amoand=0x0C, amoor=0x08, amoxor=0x04, amomax=0x14, amomaxu=0x1C, amomin=0x10, amominu=0x18. Extend 141_riscv_asm amo_main() with 10 representative aq/rl variants covering all ops x ordering suffix combinations. Verified against riscv64-linux-gnu-as 2.44. --- riscv64-asm.c | 146 +++++++++++++++++++++++++++++++++++ riscv64-tok.h | 50 ++++++++++++ tests/tests2/141_riscv_asm.c | 10 +++ 3 files changed, 206 insertions(+) diff --git a/riscv64-asm.c b/riscv64-asm.c index 0f37384d..583b1901 100644 --- a/riscv64-asm.c +++ b/riscv64-asm.c @@ -1462,6 +1462,103 @@ static void asm_atomic_opcode(TCCState *s1, int token) asm_emit_a(token, 0x2F | 0x3<<12 | 0x0<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; case TOK_ASM_amoadd_d_aqrl: asm_emit_a(token, 0x2F | 0x3<<12 | 0x0<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + + case TOK_ASM_amoswap_w_aq: + asm_emit_a(token, 0x2F | 2<<12 | 0x1<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amoswap_w_rl: + asm_emit_a(token, 0x2F | 2<<12 | 0x1<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amoswap_w_aqrl: + asm_emit_a(token, 0x2F | 2<<12 | 0x1<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amoswap_d_aq: + asm_emit_a(token, 0x2F | 3<<12 | 0x1<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amoswap_d_rl: + asm_emit_a(token, 0x2F | 3<<12 | 0x1<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amoswap_d_aqrl: + asm_emit_a(token, 0x2F | 3<<12 | 0x1<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amoand_w_aq: + asm_emit_a(token, 0x2F | 2<<12 | 0xc<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amoand_w_rl: + asm_emit_a(token, 0x2F | 2<<12 | 0xc<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amoand_w_aqrl: + asm_emit_a(token, 0x2F | 2<<12 | 0xc<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amoand_d_aq: + asm_emit_a(token, 0x2F | 3<<12 | 0xc<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amoand_d_rl: + asm_emit_a(token, 0x2F | 3<<12 | 0xc<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amoand_d_aqrl: + asm_emit_a(token, 0x2F | 3<<12 | 0xc<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amoor_w_aq: + asm_emit_a(token, 0x2F | 2<<12 | 0x8<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amoor_w_rl: + asm_emit_a(token, 0x2F | 2<<12 | 0x8<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amoor_w_aqrl: + asm_emit_a(token, 0x2F | 2<<12 | 0x8<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amoor_d_aq: + asm_emit_a(token, 0x2F | 3<<12 | 0x8<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amoor_d_rl: + asm_emit_a(token, 0x2F | 3<<12 | 0x8<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amoor_d_aqrl: + asm_emit_a(token, 0x2F | 3<<12 | 0x8<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amoxor_w_aq: + asm_emit_a(token, 0x2F | 2<<12 | 0x4<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amoxor_w_rl: + asm_emit_a(token, 0x2F | 2<<12 | 0x4<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amoxor_w_aqrl: + asm_emit_a(token, 0x2F | 2<<12 | 0x4<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amoxor_d_aq: + asm_emit_a(token, 0x2F | 3<<12 | 0x4<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amoxor_d_rl: + asm_emit_a(token, 0x2F | 3<<12 | 0x4<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amoxor_d_aqrl: + asm_emit_a(token, 0x2F | 3<<12 | 0x4<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amomax_w_aq: + asm_emit_a(token, 0x2F | 2<<12 | 0x14<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amomax_w_rl: + asm_emit_a(token, 0x2F | 2<<12 | 0x14<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amomax_w_aqrl: + asm_emit_a(token, 0x2F | 2<<12 | 0x14<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amomax_d_aq: + asm_emit_a(token, 0x2F | 3<<12 | 0x14<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amomax_d_rl: + asm_emit_a(token, 0x2F | 3<<12 | 0x14<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amomax_d_aqrl: + asm_emit_a(token, 0x2F | 3<<12 | 0x14<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amomaxu_w_aq: + asm_emit_a(token, 0x2F | 2<<12 | 0x1c<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amomaxu_w_rl: + asm_emit_a(token, 0x2F | 2<<12 | 0x1c<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amomaxu_w_aqrl: + asm_emit_a(token, 0x2F | 2<<12 | 0x1c<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amomaxu_d_aq: + asm_emit_a(token, 0x2F | 3<<12 | 0x1c<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amomaxu_d_rl: + asm_emit_a(token, 0x2F | 3<<12 | 0x1c<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amomaxu_d_aqrl: + asm_emit_a(token, 0x2F | 3<<12 | 0x1c<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amomin_w_aq: + asm_emit_a(token, 0x2F | 2<<12 | 0x10<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amomin_w_rl: + asm_emit_a(token, 0x2F | 2<<12 | 0x10<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amomin_w_aqrl: + asm_emit_a(token, 0x2F | 2<<12 | 0x10<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amomin_d_aq: + asm_emit_a(token, 0x2F | 3<<12 | 0x10<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amomin_d_rl: + asm_emit_a(token, 0x2F | 3<<12 | 0x10<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amomin_d_aqrl: + asm_emit_a(token, 0x2F | 3<<12 | 0x10<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amominu_w_aq: + asm_emit_a(token, 0x2F | 2<<12 | 0x18<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amominu_w_rl: + asm_emit_a(token, 0x2F | 2<<12 | 0x18<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amominu_w_aqrl: + asm_emit_a(token, 0x2F | 2<<12 | 0x18<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; + case TOK_ASM_amominu_d_aq: + asm_emit_a(token, 0x2F | 3<<12 | 0x18<<27, &ops[0], &ops[1], &ops[2], 1, 0); break; + case TOK_ASM_amominu_d_rl: + asm_emit_a(token, 0x2F | 3<<12 | 0x18<<27, &ops[0], &ops[1], &ops[2], 0, 1); break; + case TOK_ASM_amominu_d_aqrl: + asm_emit_a(token, 0x2F | 3<<12 | 0x18<<27, &ops[0], &ops[1], &ops[2], 1, 1); break; } } @@ -1948,6 +2045,55 @@ ST_FUNC void asm_opcode(TCCState *s1, int token) case TOK_ASM_amoadd_d_aq: case TOK_ASM_amoadd_d_rl: case TOK_ASM_amoadd_d_aqrl: + /* AMO aq/rl (all ops) */ + case TOK_ASM_amoswap_w_aq: + case TOK_ASM_amoswap_w_rl: + case TOK_ASM_amoswap_w_aqrl: + case TOK_ASM_amoswap_d_aq: + case TOK_ASM_amoswap_d_rl: + case TOK_ASM_amoswap_d_aqrl: + case TOK_ASM_amoand_w_aq: + case TOK_ASM_amoand_w_rl: + case TOK_ASM_amoand_w_aqrl: + case TOK_ASM_amoand_d_aq: + case TOK_ASM_amoand_d_rl: + case TOK_ASM_amoand_d_aqrl: + case TOK_ASM_amoor_w_aq: + case TOK_ASM_amoor_w_rl: + case TOK_ASM_amoor_w_aqrl: + case TOK_ASM_amoor_d_aq: + case TOK_ASM_amoor_d_rl: + case TOK_ASM_amoor_d_aqrl: + case TOK_ASM_amoxor_w_aq: + case TOK_ASM_amoxor_w_rl: + case TOK_ASM_amoxor_w_aqrl: + case TOK_ASM_amoxor_d_aq: + case TOK_ASM_amoxor_d_rl: + case TOK_ASM_amoxor_d_aqrl: + case TOK_ASM_amomax_w_aq: + case TOK_ASM_amomax_w_rl: + case TOK_ASM_amomax_w_aqrl: + case TOK_ASM_amomax_d_aq: + case TOK_ASM_amomax_d_rl: + case TOK_ASM_amomax_d_aqrl: + case TOK_ASM_amomaxu_w_aq: + case TOK_ASM_amomaxu_w_rl: + case TOK_ASM_amomaxu_w_aqrl: + case TOK_ASM_amomaxu_d_aq: + case TOK_ASM_amomaxu_d_rl: + case TOK_ASM_amomaxu_d_aqrl: + case TOK_ASM_amomin_w_aq: + case TOK_ASM_amomin_w_rl: + case TOK_ASM_amomin_w_aqrl: + case TOK_ASM_amomin_d_aq: + case TOK_ASM_amomin_d_rl: + case TOK_ASM_amomin_d_aqrl: + case TOK_ASM_amominu_w_aq: + case TOK_ASM_amominu_w_rl: + case TOK_ASM_amominu_w_aqrl: + case TOK_ASM_amominu_d_aq: + case TOK_ASM_amominu_d_rl: + case TOK_ASM_amominu_d_aqrl: asm_atomic_opcode(s1, token); break; diff --git a/riscv64-tok.h b/riscv64-tok.h index b833f411..a94f05ee 100644 --- a/riscv64-tok.h +++ b/riscv64-tok.h @@ -528,6 +528,56 @@ DEF_ASM_WITH_SUFFIXES(amoadd, d, aq) DEF_ASM_WITH_SUFFIXES(amoadd, d, rl) DEF_ASM_WITH_SUFFIXES(amoadd, d, aqrl) + /* Complete AMO aq/rl ordering suffixes (all ops) */ + DEF_ASM_WITH_SUFFIXES(amoswap, w, aq) + DEF_ASM_WITH_SUFFIXES(amoswap, w, rl) + DEF_ASM_WITH_SUFFIXES(amoswap, w, aqrl) + DEF_ASM_WITH_SUFFIXES(amoswap, d, aq) + DEF_ASM_WITH_SUFFIXES(amoswap, d, rl) + DEF_ASM_WITH_SUFFIXES(amoswap, d, aqrl) + DEF_ASM_WITH_SUFFIXES(amoand, w, aq) + DEF_ASM_WITH_SUFFIXES(amoand, w, rl) + DEF_ASM_WITH_SUFFIXES(amoand, w, aqrl) + DEF_ASM_WITH_SUFFIXES(amoand, d, aq) + DEF_ASM_WITH_SUFFIXES(amoand, d, rl) + DEF_ASM_WITH_SUFFIXES(amoand, d, aqrl) + DEF_ASM_WITH_SUFFIXES(amoor, w, aq) + DEF_ASM_WITH_SUFFIXES(amoor, w, rl) + DEF_ASM_WITH_SUFFIXES(amoor, w, aqrl) + DEF_ASM_WITH_SUFFIXES(amoor, d, aq) + DEF_ASM_WITH_SUFFIXES(amoor, d, rl) + DEF_ASM_WITH_SUFFIXES(amoor, d, aqrl) + DEF_ASM_WITH_SUFFIXES(amoxor, w, aq) + DEF_ASM_WITH_SUFFIXES(amoxor, w, rl) + DEF_ASM_WITH_SUFFIXES(amoxor, w, aqrl) + DEF_ASM_WITH_SUFFIXES(amoxor, d, aq) + DEF_ASM_WITH_SUFFIXES(amoxor, d, rl) + DEF_ASM_WITH_SUFFIXES(amoxor, d, aqrl) + DEF_ASM_WITH_SUFFIXES(amomax, w, aq) + DEF_ASM_WITH_SUFFIXES(amomax, w, rl) + DEF_ASM_WITH_SUFFIXES(amomax, w, aqrl) + DEF_ASM_WITH_SUFFIXES(amomax, d, aq) + DEF_ASM_WITH_SUFFIXES(amomax, d, rl) + DEF_ASM_WITH_SUFFIXES(amomax, d, aqrl) + DEF_ASM_WITH_SUFFIXES(amomaxu, w, aq) + DEF_ASM_WITH_SUFFIXES(amomaxu, w, rl) + DEF_ASM_WITH_SUFFIXES(amomaxu, w, aqrl) + DEF_ASM_WITH_SUFFIXES(amomaxu, d, aq) + DEF_ASM_WITH_SUFFIXES(amomaxu, d, rl) + DEF_ASM_WITH_SUFFIXES(amomaxu, d, aqrl) + DEF_ASM_WITH_SUFFIXES(amomin, w, aq) + DEF_ASM_WITH_SUFFIXES(amomin, w, rl) + DEF_ASM_WITH_SUFFIXES(amomin, w, aqrl) + DEF_ASM_WITH_SUFFIXES(amomin, d, aq) + DEF_ASM_WITH_SUFFIXES(amomin, d, rl) + DEF_ASM_WITH_SUFFIXES(amomin, d, aqrl) + DEF_ASM_WITH_SUFFIXES(amominu, w, aq) + DEF_ASM_WITH_SUFFIXES(amominu, w, rl) + DEF_ASM_WITH_SUFFIXES(amominu, w, aqrl) + DEF_ASM_WITH_SUFFIXES(amominu, d, aq) + DEF_ASM_WITH_SUFFIXES(amominu, d, rl) + DEF_ASM_WITH_SUFFIXES(amominu, d, aqrl) + /* rounding mode keywords (used as fcvt operand: fcvt.w.s rd, rs1, rtz) */ DEF_ASM(rne) diff --git a/tests/tests2/141_riscv_asm.c b/tests/tests2/141_riscv_asm.c index 86874068..72a360c0 100644 --- a/tests/tests2/141_riscv_asm.c +++ b/tests/tests2/141_riscv_asm.c @@ -311,6 +311,16 @@ int amo_main(void) asm("amoadd.w.aq %0, %2, (%1)" : "=r"(r) : "r"(&x), "r"(val)); asm("amoadd.w.rl %0, %2, (%1)" : "=r"(r) : "r"(&x), "r"(val)); asm("amoadd.d.aqrl %0, %2, (%1)" : "=r"(rd) : "r"(&xd), "r"(val)); + asm("amoswap.w.aq %0, %2, (%1)" : "=r"(r) : "r"(&x), "r"(val)); + asm("amoswap.d.rl %0, %2, (%1)" : "=r"(rd) : "r"(&xd), "r"(val)); + asm("amoand.w.aqrl %0, %2, (%1)" : "=r"(r) : "r"(&x), "r"(val)); + asm("amoor.w.rl %0, %2, (%1)" : "=r"(r) : "r"(&x), "r"(val)); + asm("amoor.d.aq %0, %2, (%1)" : "=r"(rd) : "r"(&xd), "r"(val)); + asm("amoxor.w.aq %0, %2, (%1)" : "=r"(r) : "r"(&x), "r"(val)); + asm("amomax.d.rl %0, %2, (%1)" : "=r"(rd) : "r"(&xd), "r"(val)); + asm("amomaxu.w.aqrl %0, %2, (%1)" : "=r"(r) : "r"(&x), "r"(val)); + asm("amomin.w.rl %0, %2, (%1)" : "=r"(r) : "r"(&x), "r"(val)); + asm("amominu.d.aq %0, %2, (%1)" : "=r"(rd) : "r"(&xd), "r"(val)); return 1; }