From 1048e2e94c03c85e0b16cb6a2569fe0516a7c13c Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Thu, 13 Feb 2025 16:47:25 +0200 Subject: [PATCH 01/45] Fix AOT exception handling on linux-riscv64 --- .../nativeaot/Runtime/StackFrameIterator.cpp | 2 +- .../Runtime/riscv64/ExceptionHandling.S | 4 +- .../nativeaot/Runtime/unix/UnwindHelpers.cpp | 60 +++++++++++++------ 3 files changed, 45 insertions(+), 21 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 302bd05861a509..722036eb0d069e 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -1285,7 +1285,7 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() #elif defined(TARGET_RISCV64) PTR_uint64_t f = (PTR_uint64_t)(m_RegDisplay.SP); - for (int i = 0; i < 32; i++) + for (int i = 0; i < 8; i++) { m_RegDisplay.F[i] = *f++; } diff --git a/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S index ff20aeb736cc88..65a14ae1c49dfe 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S @@ -4,7 +4,7 @@ #include #include "AsmOffsets.inc" -#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 15)&(~15)) +#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 7) & ~7) #define HARDWARE_EXCEPTION 1 #define SOFTWARE_EXCEPTION 0 @@ -44,7 +44,7 @@ fsd fs10, 0x60(sp) fsd fs11, 0x68(sp) - PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 0x78 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -0x78 sd zero, 0x10(sp) // locations reserved for return value, not used for exception handling sd zero, 0x18(sp) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp index e387f3440e329f..f71a4b1338a6ea 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp @@ -1140,8 +1140,13 @@ bool Registers_REGDISPLAY::validVectorRegister(int num) const inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { switch (regNum) { + case UNW_REG_IP: + return IP; case UNW_RISCV_X1: return *pRA; + case UNW_REG_SP: + case UNW_RISCV_X2: + return SP; case UNW_RISCV_X3: return *pGP; case UNW_RISCV_X4: @@ -1255,60 +1260,79 @@ inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { void Registers_REGDISPLAY::setRegister(int regNum, uint64_t value, uint64_t location) { switch (regNum) { + case UNW_REG_IP: + IP = (uintptr_t)value; + break; case UNW_RISCV_X1: - *pRA = value; + pRA = (PTR_uintptr_t)location; + break; + case UNW_REG_SP: + case UNW_RISCV_X2: + SP = (uintptr_t)value; break; case UNW_RISCV_X3: - *pGP = value; + pGP = (PTR_uintptr_t)location; break; case UNW_RISCV_X4: - *pTP = value; + pTP = (PTR_uintptr_t)location; break; case UNW_RISCV_X5: - *pT0 = value; + pT0 = (PTR_uintptr_t)location; break; case UNW_RISCV_X6: - *pT1 = value; + pT1 = (PTR_uintptr_t)location; break; case UNW_RISCV_X7: - *pT2 = value; + pT2 = (PTR_uintptr_t)location; break; case UNW_RISCV_X28: - *pT3 = value; + pT3 = (PTR_uintptr_t)location; break; case UNW_RISCV_X29: - *pT4 = value; + pT4 = (PTR_uintptr_t)location; break; case UNW_RISCV_X30: - *pT5 = value; + pT5 = (PTR_uintptr_t)location; break; case UNW_RISCV_X31: - *pT6 = value; + pT6 = (PTR_uintptr_t)location; break; case UNW_RISCV_X8: - *pFP = value; + pFP = (PTR_uintptr_t)location; break; case UNW_RISCV_X9: - *pS1 = value; + pS1 = (PTR_uintptr_t)location; break; case UNW_RISCV_X18: - *pS2 = value; + pS2 = (PTR_uintptr_t)location; break; case UNW_RISCV_X19: - *pS3 = value; + pS3 = (PTR_uintptr_t)location; break; case UNW_RISCV_X20: - *pS4 = value; + pS4 = (PTR_uintptr_t)location; break; case UNW_RISCV_X21: - *pS5 = value; + pS5 = (PTR_uintptr_t)location; break; case UNW_RISCV_X22: - *pS6 = value; + pS6 = (PTR_uintptr_t)location; break; case UNW_RISCV_X23: - *pS7 = value; + pS7 = (PTR_uintptr_t)location; + break; + case UNW_RISCV_X24: + pS8 = (PTR_uintptr_t)location; + break; + case UNW_RISCV_X25: + pS9 = (PTR_uintptr_t)location; + break; + case UNW_RISCV_X26: + pS10 = (PTR_uintptr_t)location; + break; + case UNW_RISCV_X27: + pS11 = (PTR_uintptr_t)location; break; // Add other general-purpose registers if needed From 88ce95b910a34133f9c34103af4c388408ffdd5c Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Thu, 13 Feb 2025 19:03:11 +0200 Subject: [PATCH 02/45] Apply more suggestions Co-authored-by: Filip Navara --- src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp | 2 +- src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 722036eb0d069e..efb11aad9d31ba 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -1285,7 +1285,7 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() #elif defined(TARGET_RISCV64) PTR_uint64_t f = (PTR_uint64_t)(m_RegDisplay.SP); - for (int i = 0; i < 8; i++) + for (int i = 0; i < 12; i++) { m_RegDisplay.F[i] = *f++; } diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S index f09e0452042734..702f9356d81047 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S @@ -243,7 +243,7 @@ LOCAL_LABEL(RhpNewArray_Rare): PUSH_COOP_PINVOKE_FRAME a3 // Preserve data we will need later into the callee saved registers - mv s0, a0 // Preserve MethodTable + mv s2, a0 // Preserve MethodTable mv a2, a1 // numElements li a1, 0 // uFlags From 59705b8a60307305247da55dd4636ae14448d3d3 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 13 Feb 2025 16:56:52 +0000 Subject: [PATCH 03/45] Fix unwind information for allocation helpers --- src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S | 6 +++--- src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S index 702f9356d81047..685e4a54cb6df9 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S @@ -75,7 +75,7 @@ LOCAL_LABEL(RhpNewFast_RarePath): // a3: transition frame // Preserve the MethodTable in s0 - mv s0, a0 + mv s2, a0 li a2, 0 // numElements @@ -96,7 +96,7 @@ LOCAL_LABEL(NewOutOfMemory): // This is the OOM failure path. We are going to tail-call to a managed helper that will throw // an out of memory exception that the caller of this allocator understands. - mv a0, s0 // MethodTable pointer + mv a0, s2 // MethodTable pointer li a1, 0 // Indicate that we should throw OOM. POP_COOP_PINVOKE_FRAME @@ -264,7 +264,7 @@ LOCAL_LABEL(ArrayOutOfMemory): // This is the OOM failure path. We are going to tail-call to a managed helper that will throw // an out of memory exception that the caller of this allocator understands. - mv a0, s0 // MethodTable Pointer + mv a0, s2 // MethodTable Pointer li a1, 0 // Indicate that we should throw OOM. POP_COOP_PINVOKE_FRAME diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index 787d80a4000dc9..3b66d199229eec 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -98,7 +98,6 @@ C_FUNC(\Name): .macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ssize, __def_cfa_save=1 addi sp, sp, \ssize .cfi_adjust_cfa_offset -\ssize - .cfi_def_cfa sp, \ssize sd \reg1, 0(sp) sd \reg2, 8(sp) From d116a111ec7ea3bcf5f3a2ff4095fbe01ed96635 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Fri, 14 Feb 2025 14:25:39 +0000 Subject: [PATCH 04/45] Restore correct FP registers in StackFrameIterator::UnwindFuncletInvokeThunk --- src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index efb11aad9d31ba..24b5a6a86afe5a 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -1285,9 +1285,11 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() #elif defined(TARGET_RISCV64) PTR_uint64_t f = (PTR_uint64_t)(m_RegDisplay.SP); - for (int i = 0; i < 12; i++) + m_RegDisplay.F[8] = *f++; + m_RegDisplay.F[9] = *f++; + for (int i = 0; i < 10; i++) { - m_RegDisplay.F[i] = *f++; + m_RegDisplay.F[i + 18] = *f++; } SP = (PTR_uintptr_t)f; From b179a7fa64dd8e50e316d28f3b6d9095b857d04b Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Fri, 14 Feb 2025 14:26:04 +0000 Subject: [PATCH 05/45] Fix register addressing in GcInfoDecoder::GetRegisterSlot --- src/coreclr/vm/gcinfodecoder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index c24a2bc14870bd..1b11a3e8f7ceee 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -1976,7 +1976,7 @@ OBJECTREF* GcInfoDecoder::GetRegisterSlot( _ASSERTE((regNum == 1) || (regNum >= 5 && regNum <= 31)); #ifdef FEATURE_NATIVEAOT - PTR_uintptr_t* ppReg = &pRD->pRA; + PTR_uintptr_t* ppReg = &pRD->pR0; return (OBJECTREF*)*(ppReg + regNum); #else From 1cb6ced20539ef20782e673d4dc0a96ad072b44b Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sat, 15 Feb 2025 07:56:31 +0000 Subject: [PATCH 06/45] Fix and enable software write watch --- src/coreclr/nativeaot/Runtime/CMakeLists.txt | 2 +- src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index 8cf45e0018bfa8..ccf197b08f6602 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -236,7 +236,7 @@ endif() add_definitions(-DFEATURE_BASICFREEZE) add_definitions(-DFEATURE_CONSERVATIVE_GC) -if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) +if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) add_definitions(-DFEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) add_definitions(-DFEATURE_MANUALLY_MANAGED_CARD_BUNDLES) endif() diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S index 1e9fedaa9f21c1..103425ef78d737 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -106,6 +106,7 @@ #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP // Update the write watch table if necessary la t2, g_write_watch_table + ld t2, (t2) beqz t2, 2f srli t6, \destReg, 12 // SoftwareWriteWatch::AddressToTableByteIndexShift @@ -140,6 +141,7 @@ #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES // Check if we need to update the card bundle table la t2, g_card_bundle_table + ld t2, (t2) srli t6, \destReg, 21 add t6, t2, t6 lb t2, 0(t6) From 13953be75aa78d5f258e74d763887e5934e04425 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sat, 15 Feb 2025 09:34:59 +0000 Subject: [PATCH 07/45] Fix COOP frames layout --- src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index 3b66d199229eec..04b1410c401301 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -258,10 +258,11 @@ C_FUNC(\Name): PROLOG_SAVE_REG_PAIR s5, s6, 64 PROLOG_SAVE_REG_PAIR s7, s8, 80 PROLOG_SAVE_REG_PAIR s9, s10, 96 + PROLOG_SAVE_REG s11, 112 // Save the value of SP before stack allocation to the last slot in the frame (slot #15) add \trashReg, sp, 128 - sd \trashReg, 112(sp) + sd \trashReg, 120(sp) // Record the bitmask of saved registers in the frame (slot #3) li \trashReg, DEFAULT_FRAME_SAVE_FLAGS @@ -277,6 +278,7 @@ C_FUNC(\Name): EPILOG_RESTORE_REG_PAIR s5, s6, 64 EPILOG_RESTORE_REG_PAIR s7, s8, 80 EPILOG_RESTORE_REG_PAIR s9, s10, 96 + EPILOG_RESTORE_REG s11, 112 EPILOG_RESTORE_REG_PAIR_INDEXED s0, ra, 128 .endm From 53a4ef046f52c9fe73de8f26f1a0fd459d822ac7 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sat, 15 Feb 2025 22:03:59 +0000 Subject: [PATCH 08/45] Fix indirections, comparison and other assembly for write barrier checks --- .../nativeaot/Runtime/riscv64/WriteBarriers.S | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S index 103425ef78d737..d207adc3dc7fdd 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -121,34 +121,40 @@ // We can skip the card table write if the reference is to // an object not on the ephemeral segment. la t2, g_ephemeral_low + ld t2, (t2) la t6, g_ephemeral_high - bgeu \refReg, t2, 0f - bltu \refReg, t6, 0f + ld t6, (t6) + bltu \refReg, t2, 0f + bgeu \refReg, t6, 0f // Set this object's card, if it has not already been set. la t2, g_card_table + ld t2, (t2) srli t6, \destReg, 11 add t6, t2, t6 // Check that this card has not already been written. Avoiding useless writes // is a big win on multi-proc systems since it avoids cache thrashing. lb t2, 0(t6) - li t6, 0xFF - beq t2, t6, 0f + xori t2, t2, 0xFF + beqz t2, 0f - sb t6, 0(t6) + li t2, 0xFF + sb t2, 0(t6) #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES // Check if we need to update the card bundle table la t2, g_card_bundle_table ld t2, (t2) + srli t6, \destReg, 21 add t6, t2, t6 lb t2, 0(t6) - li t6, 0xFF - beq t2, t6, 0f + xori t2, t2, 0xFF + beqz t2, 0f - sb t6, 0(t6) + li t2, 0xFF + sb t2, 0(t6) #endif 0: From c11dd6260d59ae8bf176a134dd3fb5af05d866e1 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sat, 15 Feb 2025 22:10:53 +0000 Subject: [PATCH 09/45] Fix cross-build on Risc-V host --- eng/native/configureplatform.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eng/native/configureplatform.cmake b/eng/native/configureplatform.cmake index fc207977125c3e..e63df7a170d54d 100644 --- a/eng/native/configureplatform.cmake +++ b/eng/native/configureplatform.cmake @@ -29,6 +29,8 @@ if(CLR_CMAKE_HOST_OS STREQUAL linux) set(CLR_CMAKE_HOST_UNIX_X86 1) elseif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL aarch64) set(CLR_CMAKE_HOST_UNIX_ARM64 1) + elseif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL riscv64) + set(CLR_CMAKE_HOST_UNIX_RISCV64 1) else() clr_unknown_arch() endif() From 37680d7aaea4fc4be1f8258e978b6ef4ba558a54 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Sun, 16 Feb 2025 07:09:23 +0200 Subject: [PATCH 10/45] Disable R2R in stage2 build --- .../Microsoft.NETCore.App.Runtime.CoreCLR.sfxproj | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.CoreCLR.sfxproj b/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.CoreCLR.sfxproj index 14522c2c74e79e..11cdfbfea2eb6a 100644 --- a/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.CoreCLR.sfxproj +++ b/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.CoreCLR.sfxproj @@ -27,6 +27,8 @@ false false + + false true true $(PublishReadyToRun) From f861b44c218e195856572e27057fc6633188ec3e Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 16 Feb 2025 08:12:22 +0000 Subject: [PATCH 11/45] Fix uninitialized value in RhpPInvoke --- src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S index d1264271cc79c3..31bbcf150e7d1d 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S @@ -19,7 +19,7 @@ NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler sd fp, OFFSETOF__PInvokeTransitionFrame__m_FramePointer(a0) sd ra, OFFSETOF__PInvokeTransitionFrame__m_RIP(a0) - sd t0, OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs(a0) + sd sp, OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs(a0) li t0, PTFF_SAVE_SP sd t0, OFFSETOF__PInvokeTransitionFrame__m_Flags(a0) From acab64448b7c5b98f1a649dbbec727e1f679bf1a Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Sun, 16 Feb 2025 12:20:12 +0200 Subject: [PATCH 12/45] Fix register in RhpPInvokeReturn --- src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S index 31bbcf150e7d1d..93b360ebda1c1e 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S @@ -40,7 +40,7 @@ LEAF_ENTRY RhpPInvokeReturn, _TEXT PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, t0 - bnez t1, 0f // If TrapThreadsFlags_None is non-zero, branch + bnez t0, 0f // If TrapThreadsFlags_None is non-zero, branch ret 0: From de03a3ef0d84480c59bfa0fc3a700427cc0d7fbd Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 16 Feb 2025 18:59:09 +0000 Subject: [PATCH 13/45] Initialize FP/RA pointers when creating StackFrameIterator from native context --- src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 24b5a6a86afe5a..97dd26cc435e0e 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -809,6 +809,8 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC m_RegDisplay.pS9 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S9); m_RegDisplay.pS10 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S10); m_RegDisplay.pS11 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S11); + m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_REG(pCtx, Fp); + m_RegDisplay.pRA = (PTR_uintptr_t)PTR_TO_REG(pCtx, Ra); // // scratch regs From eb4449b9e5031ec8beb2d919c9214bbb7ef81d84 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 16 Feb 2025 18:59:33 +0000 Subject: [PATCH 14/45] Fix sign on PROLOG_SAVE_REG_PAIR_INDEXED --- src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S index f5f41c44c78fac..019199d548b33a 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S @@ -17,7 +17,7 @@ # incoming register values into it. # First create PInvokeTransitionFrame - PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, PROBE_FRAME_SIZE # Push down stack pointer and store FP (s10) and RA (ra) + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -PROBE_FRAME_SIZE # Push down stack pointer and store FP (s10) and RA (ra) # Slot at sp+0x10 is reserved for Thread * # Slot at sp+0x18 is reserved for bitmask of saved registers From 541ddb167cffa90d296c5c40599186fa8eb5582f Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 16 Feb 2025 19:00:16 +0000 Subject: [PATCH 15/45] Fix more of the logic in write barriers. Needs further audit. --- .../nativeaot/Runtime/riscv64/WriteBarriers.S | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S index d207adc3dc7fdd..aa1a69e8b704c3 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -31,7 +31,6 @@ la t3, g_GCShadow ld t3, 0(t3) beq t3, zero, 1f - li t4, 0 // Save destReg since we're about to modify it (and we need the original value both within the macro and // once we exit the macro). @@ -174,10 +173,12 @@ // If no, early out. la t2, g_lowest_address - bgeu \destReg, t2, 0f + ld t2, (t2) + bltu \destReg, t2, 0f la t2, g_highest_address - bltu \destReg, t2, 0f + ld t2, (t2) + bgeu \destReg, t2, 0f 1: INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg @@ -228,8 +229,10 @@ LEAF_END RhpByRefAssignRef, _TEXT LEAF_ENTRY RhpCheckedAssignRef, _TEXT # Check if the destination is within the heap bounds - la t2, C_FUNC(g_lowest_address) - la t6, C_FUNC(g_highest_address) + la t2, C_FUNC(g_lowest_address) + ld t2, (t2) + la t6, C_FUNC(g_highest_address) + ld t6, (t6) bltu t3, t2, LOCAL_LABEL(NotInHeap) bgeu t3, t6, LOCAL_LABEL(NotInHeap) From a0e1a8390bc8c81edfb52a832ae8885f5431b12b Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 16 Feb 2025 22:35:32 +0000 Subject: [PATCH 16/45] Fix incorrect unwinding information generated for methods with frame size > PAGE_SIZE --- src/coreclr/jit/codegenriscv64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 54cd716b1ccebd..7b80962becfb1d 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -7223,7 +7223,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe if (leftFrameSize != 0) { - genStackPointerAdjustment(-leftFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true); + genStackPointerAdjustment(-leftFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ false); } } From f48eae58d6962527eca85f36aa449e9c0d22da23 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 16 Feb 2025 23:13:30 +0000 Subject: [PATCH 17/45] Fix emitting jump to bad slot helper --- .../Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs index a285069cfada2d..584b37e53a7e27 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs @@ -47,8 +47,7 @@ protected void EmitDictionaryLookup(NodeFactory factory, ref RiscV64Emitter enco // should be reported by someone else - the system should not rely on it coming from here. if (!relocsOnly && _hasInvalidEntries) { - encoder.EmitXORI(encoder.TargetRegister.IntraProcedureCallScratch1, result, 0); - encoder.EmitJALR(Register.X0, encoder.TargetRegister.IntraProcedureCallScratch1, 0); + encoder.EmitJMPIfZero(result, GetBadSlotHelper(factory)); } } From 77349241635919df742be1f6de22b43319a39ed6 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 16 Feb 2025 23:40:07 +0000 Subject: [PATCH 18/45] Fix unwind info for universal transitions --- src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S index 89691462c1231b..de370da60b2b39 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S @@ -91,9 +91,7 @@ NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler # FP and RA registers - addi sp, sp, -STACK_SIZE - sd s0, 0x0(sp) # Save frame pointer - sd ra, 0x08(sp) # Save return address + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -STACK_SIZE # Floating point registers fsd fa0, FLOAT_ARG_OFFSET(sp) From 19c97fb20ecd1db4c85165f73a26b7ed32f9880f Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 17 Feb 2025 07:33:10 +0000 Subject: [PATCH 19/45] Fix comment --- src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S index de370da60b2b39..3a6bb52c946e89 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S @@ -103,7 +103,7 @@ fsd fa6, FLOAT_ARG_OFFSET + 0x30(sp) fsd fa7, FLOAT_ARG_OFFSET + 0x38(sp) - # Space for return buffer data (0x40 bytes) + # Space for return buffer data (0x20 bytes) # Save argument registers sd a0, ARGUMENT_REGISTERS_OFFSET(sp) From c62c075e5b7322d43027fa21c79b4e03f34f699d Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 17 Feb 2025 08:06:43 +0000 Subject: [PATCH 20/45] Fix flipped RA/FP in universal transition unwinding --- src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 97dd26cc435e0e..e337a4b2510a4a 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -1500,8 +1500,8 @@ struct UniversalTransitionStackFrame // Conservative GC reporting must be applied to everything between the base of the // ReturnBlock and the top of the StackPassedArgs. private: - uintptr_t m_pushedRA; // ChildSP+000 CallerSP-0F0 (0x08 bytes) (ra) - uintptr_t m_pushedFP; // ChildSP+008 CallerSP-0E8 (0x08 bytes) (fp) + uintptr_t m_pushedFP; // ChildSP+000 CallerSP-0F0 (0x08 bytes) (fp) + uintptr_t m_pushedRA; // ChildSP+008 CallerSP-0E8 (0x08 bytes) (ra) Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7) uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-060 (0x20 bytes) uintptr_t m_intArgRegs[8]; // ChildSP+0B0 CallerSP-040 (0x40 bytes) (a0-a7) From ea12bf46fde554499088070b9a6aefe2072dac05 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 17 Feb 2025 16:20:43 +0000 Subject: [PATCH 21/45] WIP: Rewrite thunk code generation --- .../nativeaot/Runtime/ThunksMapping.cpp | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp index 9f7211a2ee7623..7a41f2f9c7eded 100644 --- a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp +++ b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp @@ -25,7 +25,7 @@ #elif TARGET_LOONGARCH64 #define THUNK_SIZE 16 #elif TARGET_RISCV64 -#define THUNK_SIZE 12 +#define THUNK_SIZE 24 #else #define THUNK_SIZE (2 * OS_PAGE_SIZE) // This will cause RhpGetNumThunksPerBlock to return 0 #endif @@ -259,21 +259,31 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() #elif defined(TARGET_RISCV64) - // auipc t0, %hi(delta) // Load upper immediate with address high bits - // ld t1, %lo(delta)(t0) // Load data from address in (t0 + lower immediate) - // jr t1 // Jump and don't link register + //auipc t1, hi() + //addi t1, t1, lo() + //auipc t0, hi() + //addi t0, t0, lo() + //ld t0, (t0) + //jalr zero, t0, 0 int delta = (int)(pCurrentDataAddress - pCurrentThunkAddress); - uint32_t deltaHi = (delta + 0x800) & 0xfffff000; - uint32_t deltaLo = delta << (32 - 12); - - *((uint32_t*)pCurrentThunkAddress) = 0x00000297 | deltaHi; // auipc + *((uint32_t*)pCurrentThunkAddress) = 0x00000317 | ((((delta + 0x800) & 0xFFFFF000) >> 12) << 12); // auipc t1, delta[31:12] + pCurrentThunkAddress += 4; + + *((uint32_t*)pCurrentThunkAddress) = 0x00030313 | ((delta & 0xFFF) << 20); // addi t1, t1, delta[11:0] + pCurrentThunkAddress += 4; + + delta += OS_PAGE_SIZE - POINTER_SIZE - (i * POINTER_SIZE * 2) - 8; + *((uint32_t*)pCurrentThunkAddress) = 0x00000297 | ((((delta + 0x800) & 0xFFFFF000) >> 12) << 12); // auipc t0, delta[31:12] + pCurrentThunkAddress += 4; + + *((uint32_t*)pCurrentThunkAddress) = 0x00028293 | ((delta & 0xFFF) << 20); // addi t0, t0, delta[11:0] pCurrentThunkAddress += 4; - *((uint32_t*)pCurrentThunkAddress) = 0x0002B303 | deltaLo; // addi + *((uint32_t*)pCurrentThunkAddress) = 0x0002b283; // ld t0, (t0) pCurrentThunkAddress += 4; - *((uint32_t*)pCurrentThunkAddress) = 0x00030067; // jr + *((uint32_t*)pCurrentThunkAddress) = 0x00008282; // jalr zero, t0, 0 pCurrentThunkAddress += 4; #else From 35a22b9f474e1141066d92b06700d0282e1de353 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 17 Feb 2025 16:21:16 +0000 Subject: [PATCH 22/45] WIP: Fix RhCommonStub --- .../Runtime/riscv64/InteropThunksHelpers.S | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S index 04f28699dd2940..5b191e83806f86 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S @@ -12,9 +12,9 @@ // // RhCommonStub // - // INPUT: tp: thunk's data block + // INPUT: t1: thunk's data block // - // TRASHES: t0, t1, tp + // TRASHES: t0, t1, t2 // LEAF_ENTRY RhCommonStub, _TEXT // There are arbitrary callers passing arguments with arbitrary signatures. @@ -24,15 +24,15 @@ INLINE_GET_TLS_VAR t0, C_FUNC(tls_thunkData) // t0 = base address of TLS data - // tp = address of context cell in thunk's data + // t1 = address of context cell in thunk's data // Load the thunk address from the data block and store it in the thread's static storage - ld t1, 0(t0) // Load thunk address into t1 from the TLS base address - sd t1, 0(t0) // Store the thunk address in thread static storage + ld t2, 0(t1) // Load thunk data into t1 + sd t2, 0(t0) // Store the thunk address in thread static storage // Load the target address from the data block and jump to it - ld t1, POINTER_SIZE(t0) // Load target address into t1 from the data block - jalr t1 // Jump to the target address in t1 + ld t1, POINTER_SIZE(t1) // Load target address into t1 from the data block + jr t1 // Jump to the target address in t1 LEAF_END RhCommonStub, _TEXT From a719d7d22b1a0d63201561816076d6361cb690d3 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 18 Feb 2025 19:19:41 +0000 Subject: [PATCH 23/45] Fix layout of universal translation for unwinding --- src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp | 12 ++++++------ .../nativeaot/Runtime/riscv64/UniversalTransition.S | 6 +----- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index e337a4b2510a4a..6eed279dd58183 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -1500,12 +1500,12 @@ struct UniversalTransitionStackFrame // Conservative GC reporting must be applied to everything between the base of the // ReturnBlock and the top of the StackPassedArgs. private: - uintptr_t m_pushedFP; // ChildSP+000 CallerSP-0F0 (0x08 bytes) (fp) - uintptr_t m_pushedRA; // ChildSP+008 CallerSP-0E8 (0x08 bytes) (ra) - Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7) - uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-060 (0x20 bytes) - uintptr_t m_intArgRegs[8]; // ChildSP+0B0 CallerSP-040 (0x40 bytes) (a0-a7) - uintptr_t m_stackPassedArgs[1]; // ChildSP+0F0 CallerSP+000 (unknown size) + uintptr_t m_pushedFP; // ChildSP+000 CallerSP-0B0 (0x08 bytes) (fp) + uintptr_t m_pushedRA; // ChildSP+008 CallerSP-0A8 (0x08 bytes) (ra) + uint64_t m_fpArgRegs[8]; // ChildSP+010 CallerSP-0A0 (0x80 bytes) (fa0-fa7) + uintptr_t m_returnBlock[4]; // ChildSP+050 CallerSP-060 (0x20 bytes) + uintptr_t m_intArgRegs[8]; // ChildSP+070 CallerSP-040 (0x40 bytes) (a0-a7) + uintptr_t m_stackPassedArgs[1]; // ChildSP+0B0 CallerSP+000 (unknown size) public: PTR_uintptr_t get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); } diff --git a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S index 3a6bb52c946e89..0637f4258407af 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S @@ -12,9 +12,6 @@ .global RhpFpTrashValues #endif // TRASH_SAVED_ARGUMENT_REGISTERS -// Padding to account for the odd number of saved integer registers -#define ALIGNMENT_PADDING_SIZE (8) - #define COUNT_ARG_REGISTERS (8) #define INTEGER_REGISTER_SIZE (8) #define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) @@ -31,7 +28,6 @@ // From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: // -// ALIGNMENT_PADDING_SIZE // ARGUMENT_REGISTERS_SIZE // RETURN_BLOCK_SIZE // FLOAT_ARG_REGISTERS_SIZE @@ -41,7 +37,7 @@ #define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_RA_SIZE + FLOAT_ARG_REGISTERS_SIZE) -#define STACK_SIZE (ALIGNMENT_PADDING_SIZE + ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_RA_SIZE + PUSHED_FP_SIZE) +#define STACK_SIZE (ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_RA_SIZE + PUSHED_FP_SIZE) #define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_RA_SIZE) #define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE) From 212157a49d5c00187b4a8aae6aa531310c6b8ca1 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 18 Feb 2025 22:24:52 +0000 Subject: [PATCH 24/45] WIP: Add memory barriers to native AOT asm helpers (https://github.com/dotnet/runtime/issues/106219) --- .../DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs | 5 +++++ .../Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs | 2 ++ 2 files changed, 7 insertions(+) diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs index caf1dad16a1af5..cc4ceb02632ef4 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs @@ -25,6 +25,11 @@ public void EmitBreak() Builder.EmitUInt(0x00100073); } + public void EmitFENCE() + { + Builder.EmitUInt(0x0ff0000f); + } + public void EmitLI(Register regDst, int offset) { Debug.Assert((offset >= -2048) && (offset <= 2047)); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs index 584b37e53a7e27..9514c05cb93b44 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs @@ -75,6 +75,7 @@ protected sealed override void EmitCode(NodeFactory factory, ref RiscV64Emitter // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. encoder.EmitADDI(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg0, -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target)); encoder.EmitLD(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg3, 0); + encoder.EmitFENCE(); encoder.EmitRETIfZero(encoder.TargetRegister.Arg2); encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); @@ -106,6 +107,7 @@ protected sealed override void EmitCode(NodeFactory factory, ref RiscV64Emitter encoder.EmitADDI(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg2, -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target)); encoder.EmitLD(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, 0); + encoder.EmitFENCE(); encoder.EmitRETIfZero(encoder.TargetRegister.Arg3); encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); From 9743390282a225a9c6c832f4929bfbf9ce0aae3b Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 18 Feb 2025 22:25:43 +0000 Subject: [PATCH 25/45] WIP: Add fences to RhpCheckedLockCmpXchg/RhpCheckedXchg and fix atomicity guarantees of RhpCheckedXchg --- src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S index aa1a69e8b704c3..0e64bbbb85ad46 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -303,6 +303,7 @@ LEAF_END RhpAssignRef, _TEXT // t0, t1, t2, t6: trashed // LEAF_ENTRY RhpCheckedLockCmpXchg + fence LOCAL_LABEL(CmpXchgRetry): // Load the current value at the destination address. @@ -342,13 +343,15 @@ LEAF_END RhpCheckedLockCmpXchg // // On exit: // a0: original value of objectref -// t1: trashed -// t3, t6, t4: trashed +// t1, t2, t6: trashed // LEAF_ENTRY RhpCheckedXchg + fence - ld t1, 0(a0) - sd a1, 0(a0) +RhpCheckedXchgRetry: + lr.d t1, 0(a0) + sc.d t2, a1, 0(a0) + bnez t2, RhpCheckedXchgRetry // if store conditional failed, retry DoCardsXchg: // We have successfully updated the value of the objectref so now we need a GC write barrier. From 20101ac2932e4e5f3f7593d0dbb06762a6b5775e Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Wed, 19 Feb 2025 12:41:38 +0200 Subject: [PATCH 26/45] Preserve registers in INLINE_GET_TLS_VAR --- .../Runtime/riscv64/InteropThunksHelpers.S | 4 ++- .../Runtime/unix/unixasmmacrosriscv64.inc | 32 +++++++++++-------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S index 5b191e83806f86..a19cf4c0010214 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S @@ -21,13 +21,15 @@ // Custom calling convention: // tp pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + mv t2, a0 INLINE_GET_TLS_VAR t0, C_FUNC(tls_thunkData) + mv a0, t2 // t0 = base address of TLS data // t1 = address of context cell in thunk's data // Load the thunk address from the data block and store it in the thread's static storage - ld t2, 0(t1) // Load thunk data into t1 + ld t2, 0(t1) // Load thunk data into t2 sd t2, 0(t0) // Store the thunk address in thread static storage // Load the target address from the data block and jump to it diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index 04b1410c401301..6e5f424b9c2a48 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -162,25 +162,29 @@ C_FUNC(\Name): .error "target cannot be a0" .endif - addi sp, sp, -48 - sd ra, 40(sp) - sd t1, 32(sp) - sd a1, 24(sp) - sd a2, 16(sp) - sd a3, 8(sp) - sd a4, 0(sp) + addi sp, sp, -64 + sd ra, 56(sp) + sd t1, 48(sp) + sd a1, 40(sp) + sd a2, 32(sp) + sd a3, 24(sp) + sd a4, 16(sp) + sd a5, 8(sp) + sd a6, 0(sp) // global dynamic TLS, see https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/eb2b2962/riscv-elf.adoc#global-dynamic la.tls.gd a0, \var call C_FUNC(__tls_get_addr) - ld ra, 40(sp) - ld t1, 32(sp) - ld a1, 24(sp) - ld a2, 16(sp) - ld a3, 8(sp) - ld a4, 0(sp) - addi sp, sp, 48 + ld ra, 56(sp) + ld t1, 48(sp) + ld a1, 40(sp) + ld a2, 32(sp) + ld a3, 24(sp) + ld a4, 16(sp) + ld a5, 8(sp) + ld a6, 0(sp) + addi sp, sp, 64 mv \target, a0 From 310e8f8cddd07957e1294fe46fa986aa27908417 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Wed, 19 Feb 2025 13:11:35 +0200 Subject: [PATCH 27/45] Preserve a7 as well --- .../Runtime/unix/unixasmmacrosriscv64.inc | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index 6e5f424b9c2a48..f2e7ac8357fdc2 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -162,29 +162,31 @@ C_FUNC(\Name): .error "target cannot be a0" .endif - addi sp, sp, -64 - sd ra, 56(sp) - sd t1, 48(sp) - sd a1, 40(sp) - sd a2, 32(sp) - sd a3, 24(sp) - sd a4, 16(sp) - sd a5, 8(sp) - sd a6, 0(sp) + addi sp, sp, -72 + sd ra, 64(sp) + sd t1, 56(sp) + sd a1, 48(sp) + sd a2, 40(sp) + sd a3, 32(sp) + sd a4, 24(sp) + sd a5, 16(sp) + sd a6, 8(sp) + sd a7, 0(sp) // global dynamic TLS, see https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/eb2b2962/riscv-elf.adoc#global-dynamic la.tls.gd a0, \var call C_FUNC(__tls_get_addr) - ld ra, 56(sp) - ld t1, 48(sp) - ld a1, 40(sp) - ld a2, 32(sp) - ld a3, 24(sp) - ld a4, 16(sp) - ld a5, 8(sp) - ld a6, 0(sp) - addi sp, sp, 64 + ld ra, 64(sp) + ld t1, 56(sp) + ld a1, 48(sp) + ld a2, 40(sp) + ld a3, 32(sp) + ld a4, 24(sp) + ld a5, 16(sp) + ld a6, 8(sp) + ld a7, 0(sp) + addi sp, sp, 72 mv \target, a0 From 5b058e46a377228cad9c059eb182deaf8451ab1c Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 20 Feb 2025 08:32:05 +0000 Subject: [PATCH 28/45] WIP: Attempt to fix masks in IsInProlog/TrailingEpilogueInstructionsCount (to be reviewed) --- .../Runtime/unix/UnixNativeCodeManager.cpp | 53 +++++++++---------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index a1e2e507df816a..98b86e715bbd95 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -699,22 +699,23 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre #elif defined(TARGET_RISCV64) // store pair with signed offset -// 0100 00xx xxxxxxxx xxxx xxxx xxxx xxxx -#define STW_PAIR_BITS 0x04000000 -#define STW_PAIR_MASK 0xFC000000 +#define STW_PAIR_BITS 0x00003023 +#define STW_PAIR_MASK 0x0000707F -// add fp, sp, x -// addi fp, sp, x -// 0000 0001 100x xxxx xxxx xxxx 0000 0000 -#define ADD_FP_SP_BITS 0x01C00000 -#define ADD_FP_SP_MASK 0xFFFFE000 - -#define STW_PAIR_RS1_MASK 0xF80 -#define STW_PAIR_RS1_SP 0xF80 -#define STW_PAIR_RS1_FP 0xF00 -#define STW_PAIR_RS2_MASK 0xF00 -#define STW_PAIR_RS2_FP 0xF00 -#define STW_PAIR_RS2_RA 0xF40 +// add[i] fp, sp, x +#define ADD_FP_SP_BITS 0x00010413 +#define ADD_FP_SP_MASK 0x000FFFFF + +// add[i] sp, sp, x +#define ADD_SP_SP_BITS 0x00010113 +#define ADD_SP_SP_MASK 0x000FFFFF + +#define STW_PAIR_RS1_MASK 0xF8000 +#define STW_PAIR_RS1_SP 0x10000 +#define STW_PAIR_RS1_FP 0x40000 +#define STW_PAIR_RS2_MASK 0x1F00000 +#define STW_PAIR_RS2_FP 0x800000 +#define STW_PAIR_RS2_RA 0x100000 UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; ASSERT(pNativeMethodInfo != NULL); @@ -740,7 +741,7 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre { establishedFp = true; } - else + else if ((instr & ADD_SP_SP_MASK) != ADD_SP_SP_BITS) { // JIT generates other patterns into the prolog that we currently don't // recognize (saving unpaired register, stack pointer adjustments). We @@ -1185,21 +1186,13 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho #elif defined(TARGET_RISCV64) -// Load with immediate -// LUI, LD, etc. -// 0000 0000 0000 0000 1111 1111 1111 1111 -#define LUI_BITS 0x00000037 -#define LUI_MASK 0x0000007F - // Load with register offset // LD with register offset -// 0000 0000 0000 0000 0111 0000 0000 0000 #define LD_BITS 0x00000003 #define LD_MASK 0x0000007F // Branches, Jumps, System calls // BEQ, BNE, JAL, etc. -// 1100 0000 0000 0000 0000 0000 0000 0000 #define BEGS_BITS 0x00000063 #define BEGS_MASK 0x0000007F @@ -1229,14 +1222,20 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho } // Check for restoring registers (FP or RA) with `ld` - int rd = (instr >> 7) & 0x1F; // Extract the destination register - if (rd == 8 || rd == 1) // Check for FP (x8) or RA (x1) + if ((instr & LD_MASK) == LD_BITS) // Match `ld` instruction { - if ((instr & LD_MASK) == LD_BITS) // Match `ld` instruction + int rd = (instr >> 7) & 0x1F; // Extract the destination register + if (rd == 8 || rd == 1) // Check for FP (x8) or RA (x1) { return -1; } } + + // Check for adjusting stack pointer + if ((instr & ADD_SP_SP_MASK) == ADD_SP_SP_BITS) + { + return -1; + } } #endif From e5e96bbc1e1fcef7aaee0193161f11124197ed7c Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 20 Feb 2025 11:05:43 +0000 Subject: [PATCH 29/45] WIP: Fix GC hijacking flags and possible return value trashing --- src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S | 11 ++++++----- .../Runtime/unix/unixasmmacrosloongarch64.inc | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S index 019199d548b33a..ec1a1be613eb0f 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S @@ -44,7 +44,7 @@ # Perform the rest of the PInvokeTransitionFrame initialization. sd \threadReg, OFFSETOF__PInvokeTransitionFrame__m_pThread(sp) # Thread * (unused by stackwalker) - sd \BITMASK, (OFFSETOF__PInvokeTransitionFrame__m_pThread + 8)(sp) # Save the register bitmask passed in by caller + sd \BITMASK, OFFSETOF__PInvokeTransitionFrame__m_Flags(sp) # Save the register bitmask passed in by caller addi \trashReg, sp, PROBE_FRAME_SIZE # Recover value of caller's SP sd \trashReg, 0x78(sp) # Save caller's SP @@ -84,7 +84,9 @@ .macro FixupHijackedCallstack // a2 <- GetThread() + mv t3, a0 INLINE_GETTHREAD a2 + mv a0, t3 // Fix the stack by restoring the original return address ld ra, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) @@ -100,14 +102,13 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler FixupHijackedCallstack - PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a3 - andi t3, a3, 1 << TrapThreadsFlags_TrapThreads_Bit + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, t3 + andi t3, t3, 1 << TrapThreadsFlags_TrapThreads_Bit bnez t3, LOCAL_LABEL(WaitForGC) jr ra LOCAL_LABEL(WaitForGC): - li t6, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A0 + PTFF_SAVE_A1 + PTFF_THREAD_HIJACK_HI) - or t3, t3, t6 + li t3, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A0 + PTFF_SAVE_A1 + (PTFF_THREAD_HIJACK_HI << 32)) tail C_FUNC(RhpWaitForGC) NESTED_END RhpGcProbeHijack diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc index b78210c8f85378..535d7ca303cd5e 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc @@ -198,7 +198,7 @@ C_FUNC(\Name): #define PTFF_SAVE_R4 0x00000800 #define PTFF_SAVE_R5 0x00001000 #define PTFF_SAVE_ALL_PRESERVED 0x000001FF // NOTE: r23-r31 -#define PTFF_THREAD_HIJACK_HI 0x00000002 // upper 32 bits of the PTFF_THREAD_HIJACK +#define PTFF_THREAD_HIJACK_HI 0x00000001 // upper 32 bits of the PTFF_THREAD_HIJACK #define DEFAULT_FRAME_SAVE_FLAGS (PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP) From a5770dd5087598a43fac89379e9e3ede6d59b711 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 20 Feb 2025 11:38:52 +0000 Subject: [PATCH 30/45] Fix return value trashing --- src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S index ec1a1be613eb0f..c8af535e8a8c12 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S @@ -84,9 +84,9 @@ .macro FixupHijackedCallstack // a2 <- GetThread() - mv t3, a0 + mv t1, a0 INLINE_GETTHREAD a2 - mv a0, t3 + mv a0, t1 // Fix the stack by restoring the original return address ld ra, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) From 215e5f27e05c89d4d2d87089b6c57d25f4cca870 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 20 Feb 2025 12:20:53 +0000 Subject: [PATCH 31/45] Actually fix the PTFF_THREAD_HIJACK_HI flag in correct file --- src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc | 2 +- src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc index 535d7ca303cd5e..b78210c8f85378 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc @@ -198,7 +198,7 @@ C_FUNC(\Name): #define PTFF_SAVE_R4 0x00000800 #define PTFF_SAVE_R5 0x00001000 #define PTFF_SAVE_ALL_PRESERVED 0x000001FF // NOTE: r23-r31 -#define PTFF_THREAD_HIJACK_HI 0x00000001 // upper 32 bits of the PTFF_THREAD_HIJACK +#define PTFF_THREAD_HIJACK_HI 0x00000002 // upper 32 bits of the PTFF_THREAD_HIJACK #define DEFAULT_FRAME_SAVE_FLAGS (PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP) diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index f2e7ac8357fdc2..8f0bf57f108f18 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -249,7 +249,7 @@ C_FUNC(\Name): #define PTFF_SAVE_A0 0x00004000 #define PTFF_SAVE_A1 0x00008000 #define PTFF_SAVE_ALL_PRESERVED 0x000007FF // NOTE: S1-S11 -#define PTFF_THREAD_HIJACK_HI 0x00000002 // upper 32 bits of the PTFF_THREAD_HIJACK +#define PTFF_THREAD_HIJACK_HI 0x00000001 // upper 32 bits of the PTFF_THREAD_HIJACK #define DEFAULT_FRAME_SAVE_FLAGS PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP From 1fd17163d867a9b907f053b66b2e5b03594e0075 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Thu, 20 Feb 2025 17:28:03 +0200 Subject: [PATCH 32/45] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tomek Sowiński --- .../nativeaot/Runtime/ThunksMapping.cpp | 6 +---- .../nativeaot/Runtime/riscv64/AllocFast.S | 2 +- .../nativeaot/Runtime/riscv64/WriteBarriers.S | 22 +++++++------------ .../Runtime/unix/UnixNativeCodeManager.cpp | 8 +++---- ...rosoft.NETCore.App.Runtime.CoreCLR.sfxproj | 1 - 5 files changed, 14 insertions(+), 25 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp index 7a41f2f9c7eded..67fb667740be60 100644 --- a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp +++ b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp @@ -262,7 +262,6 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() //auipc t1, hi() //addi t1, t1, lo() //auipc t0, hi() - //addi t0, t0, lo() //ld t0, (t0) //jalr zero, t0, 0 @@ -277,10 +276,7 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() *((uint32_t*)pCurrentThunkAddress) = 0x00000297 | ((((delta + 0x800) & 0xFFFFF000) >> 12) << 12); // auipc t0, delta[31:12] pCurrentThunkAddress += 4; - *((uint32_t*)pCurrentThunkAddress) = 0x00028293 | ((delta & 0xFFF) << 20); // addi t0, t0, delta[11:0] - pCurrentThunkAddress += 4; - - *((uint32_t*)pCurrentThunkAddress) = 0x0002b283; // ld t0, (t0) + *((uint32_t*)pCurrentThunkAddress) = 0x0002b283 | ((delta & 0xFFF) << 20); // ld t0, (delta[11:0])(t0) pCurrentThunkAddress += 4; *((uint32_t*)pCurrentThunkAddress) = 0x00008282; // jalr zero, t0, 0 diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S index 685e4a54cb6df9..4690b12c38dbfb 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S @@ -74,7 +74,7 @@ LOCAL_LABEL(RhpNewFast_RarePath): // a3: transition frame - // Preserve the MethodTable in s0 + // Preserve the MethodTable in s2 mv s2, a0 li a2, 0 // numElements diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S index 0e64bbbb85ad46..bb2019b1f85d97 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -134,8 +134,8 @@ // Check that this card has not already been written. Avoiding useless writes // is a big win on multi-proc systems since it avoids cache thrashing. - lb t2, 0(t6) - xori t2, t2, 0xFF + lbu t2, 0(t6) + addi t2, t2, -0xFF beqz t2, 0f li t2, 0xFF @@ -148,8 +148,8 @@ srli t6, \destReg, 21 add t6, t2, t6 - lb t2, 0(t6) - xori t2, t2, 0xFF + lbu t2, 0(t6) + addi t2, t2, -0xFF beqz t2, 0f li t2, 0xFF @@ -303,16 +303,15 @@ LEAF_END RhpAssignRef, _TEXT // t0, t1, t2, t6: trashed // LEAF_ENTRY RhpCheckedLockCmpXchg - fence LOCAL_LABEL(CmpXchgRetry): // Load the current value at the destination address. - lr.d t0, (a0) // t0 = *dest + lr.d.aqrl t0, (a0) // t0 = *dest (load with sequential consistency) // Compare the loaded value with the comparand. bne t0, a2, LOCAL_LABEL(CmpXchgNoUpdate) // if (*dest != comparand) goto CmpXchgNoUpdate // Attempt to store the exchange value at the destination address. - sc.d t1, a1, (a0) // t1 = (store conditional result: 0 if successful) + sc.d.rl t1, a1, (a0) // t1 = (store conditional result: 0 if successful, with sequential consistency) bnez t1, LOCAL_LABEL(CmpXchgRetry) // if store conditional failed, retry LOCAL_LABEL(DoCardsCmpXchg): @@ -343,15 +342,10 @@ LEAF_END RhpCheckedLockCmpXchg // // On exit: // a0: original value of objectref -// t1, t2, t6: trashed +// t1, t6: trashed // LEAF_ENTRY RhpCheckedXchg - fence - -RhpCheckedXchgRetry: - lr.d t1, 0(a0) - sc.d t2, a1, 0(a0) - bnez t2, RhpCheckedXchgRetry // if store conditional failed, retry + amoswap.d.aqrl t1, a1, (a0) DoCardsXchg: // We have successfully updated the value of the objectref so now we need a GC write barrier. diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 98b86e715bbd95..1013255dca830d 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -702,11 +702,11 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre #define STW_PAIR_BITS 0x00003023 #define STW_PAIR_MASK 0x0000707F -// add[i] fp, sp, x +// addi fp, sp, x #define ADD_FP_SP_BITS 0x00010413 #define ADD_FP_SP_MASK 0x000FFFFF -// add[i] sp, sp, x +// addi sp, sp, x #define ADD_SP_SP_BITS 0x00010113 #define ADD_SP_SP_MASK 0x000FFFFF @@ -1191,8 +1191,8 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho #define LD_BITS 0x00000003 #define LD_MASK 0x0000007F -// Branches, Jumps, System calls -// BEQ, BNE, JAL, etc. +// Branches +// BEQ, BNE, etc. #define BEGS_BITS 0x00000063 #define BEGS_MASK 0x0000007F diff --git a/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.CoreCLR.sfxproj b/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.CoreCLR.sfxproj index 11cdfbfea2eb6a..b6d2d049dc91e6 100644 --- a/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.CoreCLR.sfxproj +++ b/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.CoreCLR.sfxproj @@ -27,7 +27,6 @@ false false - false true true From a310c69c1f97c55ddddd5ed6f12f311fe87f8185 Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Thu, 20 Feb 2025 17:40:42 +0200 Subject: [PATCH 33/45] Update src/coreclr/nativeaot/Runtime/ThunksMapping.cpp Co-authored-by: Filip Navara --- src/coreclr/nativeaot/Runtime/ThunksMapping.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp index 67fb667740be60..01f2dbfec95348 100644 --- a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp +++ b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp @@ -25,7 +25,7 @@ #elif TARGET_LOONGARCH64 #define THUNK_SIZE 16 #elif TARGET_RISCV64 -#define THUNK_SIZE 24 +#define THUNK_SIZE 20 #else #define THUNK_SIZE (2 * OS_PAGE_SIZE) // This will cause RhpGetNumThunksPerBlock to return 0 #endif From a2f842202dc69affc7d05744d82d2018c0e647f0 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Thu, 20 Feb 2025 17:57:06 +0200 Subject: [PATCH 34/45] Make style consistent --- src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 6eed279dd58183..730d0a6859d5aa 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -530,7 +530,7 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO // preserved floating-point registers // int32_t preservedFpIndices[] = {8, 9, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}; - for (int i = 0; i < sizeof(preservedFpIndices) / sizeof(preservedFpIndices[0]); i++) + for (int i = 0; i < ARRAY_SIZE(preservedFpIndices); i++) { m_RegDisplay.F[preservedFpIndices[i]] = pCtx->F[preservedFpIndices[i]]; } @@ -1287,11 +1287,10 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() #elif defined(TARGET_RISCV64) PTR_uint64_t f = (PTR_uint64_t)(m_RegDisplay.SP); - m_RegDisplay.F[8] = *f++; - m_RegDisplay.F[9] = *f++; - for (int i = 0; i < 10; i++) + int32_t preservedFpIndices[] = {8, 9, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}; + for (int i = 0; i < ARRAY_SIZE(preservedFpIndices); i++) { - m_RegDisplay.F[i + 18] = *f++; + m_RegDisplay.F[preservedFpIndices[i]] = *f++; } SP = (PTR_uintptr_t)f; From a9a6a47a813606a208d1d860f8027475d2a18065 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Fri, 21 Feb 2025 08:49:27 +0100 Subject: [PATCH 35/45] Flip the sign of PROLOG_SAVE_REG_PAIR_INDEXED to match CoreCLR definition and PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED --- src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S | 2 +- src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S | 2 +- .../nativeaot/Runtime/riscv64/UniversalTransition.S | 2 +- .../nativeaot/Runtime/unix/unixasmmacrosriscv64.inc | 7 +++---- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S index 65a14ae1c49dfe..8258325967821a 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S @@ -44,7 +44,7 @@ fsd fs10, 0x60(sp) fsd fs11, 0x68(sp) - PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -0x78 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 0x78 sd zero, 0x10(sp) // locations reserved for return value, not used for exception handling sd zero, 0x18(sp) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S index c8af535e8a8c12..29d70d7d1bf108 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S @@ -17,7 +17,7 @@ # incoming register values into it. # First create PInvokeTransitionFrame - PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -PROBE_FRAME_SIZE # Push down stack pointer and store FP (s10) and RA (ra) + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, PROBE_FRAME_SIZE # Push down stack pointer and store FP (s10) and RA (ra) # Slot at sp+0x10 is reserved for Thread * # Slot at sp+0x18 is reserved for bitmask of saved registers diff --git a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S index 0637f4258407af..16f0636e290606 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S @@ -87,7 +87,7 @@ NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler # FP and RA registers - PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -STACK_SIZE + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, STACK_SIZE # Floating point registers fsd fa0, FLOAT_ARG_OFFSET(sp) diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index 8f0bf57f108f18..6732e64f10efbd 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -96,8 +96,8 @@ C_FUNC(\Name): .endm .macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ssize, __def_cfa_save=1 - addi sp, sp, \ssize - .cfi_adjust_cfa_offset -\ssize + addi sp, sp, -\ssize + .cfi_adjust_cfa_offset \ssize sd \reg1, 0(sp) sd \reg2, 8(sp) @@ -112,8 +112,7 @@ C_FUNC(\Name): .macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ssize addi sp, sp, -\ssize - //.cfi_adjust_cfa_offset \ssize - .cfi_def_cfa sp, \ssize + .cfi_adjust_cfa_offset \ssize sd \reg1, 0(sp) sd \reg2, 8(sp) From de257a37b04ada9a8bb14aacaedb575ce85f2682 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Fri, 21 Feb 2025 13:11:04 +0000 Subject: [PATCH 36/45] Fix the last change to sign --- src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index 6732e64f10efbd..26ab0a3a6dbee0 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -253,7 +253,7 @@ C_FUNC(\Name): #define DEFAULT_FRAME_SAVE_FLAGS PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP .macro PUSH_COOP_PINVOKE_FRAME trashReg - PROLOG_SAVE_REG_PAIR_INDEXED s0, ra, -128 // Push down stack pointer and store s0 (fp) and RA + PROLOG_SAVE_REG_PAIR_INDEXED s0, ra, 128 // Push down stack pointer and store s0 (fp) and RA // 16 bytes reserved for Thread* and flags From d5adb94b0744ec1dc281baffddaf03b0d0dc8791 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Fri, 21 Feb 2025 13:32:31 +0000 Subject: [PATCH 37/45] Save one mv instruction --- src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S index 29d70d7d1bf108..0d89b36e1aabdf 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S @@ -50,8 +50,7 @@ sd \trashReg, 0x78(sp) # Save caller's SP # Link the frame into the Thread - mv \trashReg, sp - sd \trashReg, OFFSETOF__Thread__m_pDeferredTransitionFrame(\threadReg) + sd sp, OFFSETOF__Thread__m_pDeferredTransitionFrame(\threadReg) .endm From e34c90c35e70af12acc89dc21e414623ec02aa81 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Fri, 21 Feb 2025 13:33:13 +0000 Subject: [PATCH 38/45] Relax FENCE in R2R helpers --- .../DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs | 4 ++-- .../Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs index cc4ceb02632ef4..3d91276d7fd357 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs @@ -25,9 +25,9 @@ public void EmitBreak() Builder.EmitUInt(0x00100073); } - public void EmitFENCE() + public void EmitFENCE_RW_RW() { - Builder.EmitUInt(0x0ff0000f); + Builder.EmitUInt(0x0330000f); } public void EmitLI(Register regDst, int offset) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs index 9514c05cb93b44..08323b03370e93 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs @@ -75,7 +75,7 @@ protected sealed override void EmitCode(NodeFactory factory, ref RiscV64Emitter // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. encoder.EmitADDI(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg0, -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target)); encoder.EmitLD(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg3, 0); - encoder.EmitFENCE(); + encoder.EmitFENCE_RW_RW(); encoder.EmitRETIfZero(encoder.TargetRegister.Arg2); encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); @@ -107,7 +107,7 @@ protected sealed override void EmitCode(NodeFactory factory, ref RiscV64Emitter encoder.EmitADDI(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg2, -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target)); encoder.EmitLD(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, 0); - encoder.EmitFENCE(); + encoder.EmitFENCE_RW_RW(); encoder.EmitRETIfZero(encoder.TargetRegister.Arg3); encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); From 939c0a24dd99c6a907720905038c3e494bf9371b Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Fri, 21 Feb 2025 12:53:09 +0200 Subject: [PATCH 39/45] Apply suggestiosn from CR review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tomek Sowiński --- .../nativeaot/Runtime/StackFrameIterator.cpp | 12 ++++---- .../Runtime/riscv64/UniversalTransition.S | 7 ++--- .../nativeaot/Runtime/riscv64/WriteBarriers.S | 4 +-- .../Runtime/unix/UnixNativeCodeManager.cpp | 28 +++++++++---------- 4 files changed, 25 insertions(+), 26 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 730d0a6859d5aa..78a39612f907c0 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -1499,12 +1499,12 @@ struct UniversalTransitionStackFrame // Conservative GC reporting must be applied to everything between the base of the // ReturnBlock and the top of the StackPassedArgs. private: - uintptr_t m_pushedFP; // ChildSP+000 CallerSP-0B0 (0x08 bytes) (fp) - uintptr_t m_pushedRA; // ChildSP+008 CallerSP-0A8 (0x08 bytes) (ra) - uint64_t m_fpArgRegs[8]; // ChildSP+010 CallerSP-0A0 (0x80 bytes) (fa0-fa7) - uintptr_t m_returnBlock[4]; // ChildSP+050 CallerSP-060 (0x20 bytes) - uintptr_t m_intArgRegs[8]; // ChildSP+070 CallerSP-040 (0x40 bytes) (a0-a7) - uintptr_t m_stackPassedArgs[1]; // ChildSP+0B0 CallerSP+000 (unknown size) + uintptr_t m_pushedFP; // ChildSP+000 CallerSP-0A0 (0x08 bytes) (fp) + uintptr_t m_pushedRA; // ChildSP+008 CallerSP-098 (0x08 bytes) (ra) + uint64_t m_fpArgRegs[8]; // ChildSP+010 CallerSP-090 (0x40 bytes) (fa0-fa7) + uintptr_t m_returnBlock[2]; // ChildSP+050 CallerSP-050 (0x10 bytes) + uintptr_t m_intArgRegs[8]; // ChildSP+060 CallerSP-040 (0x40 bytes) (a0-a7) + uintptr_t m_stackPassedArgs[1]; // ChildSP+0A0 CallerSP+000 (unknown size) public: PTR_uintptr_t get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); } diff --git a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S index 16f0636e290606..234e6b46357dd5 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S @@ -17,7 +17,7 @@ #define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) // Largest return block is 4 doubles -#define RETURN_BLOCK_SIZE (32) +#define RETURN_BLOCK_SIZE 16 #define COUNT_FLOAT_ARG_REGISTERS (8) #define FLOAT_REGISTER_SIZE (8) @@ -59,9 +59,8 @@ // Frame layout is: // // {StackPassedArgs} ChildSP+100 CallerSP+000 -// {AlignmentPad (0x8 bytes)} ChildSP+0F8 CallerSP-008 // {IntArgRegs (a0-a7) (0x40 bytes)} ChildSP+0B8 CallerSP-048 -// {ReturnBlock (0x20 bytes)} ChildSP+098 CallerSP-068 +// {ReturnBlock (0x10 bytes)} ChildSP+098 CallerSP-068 // -- The base address of the Return block is the TransitionBlock pointer, the floating point args are // in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact // layout of all pieces of the frame that lie at or above the pushed floating point registers. @@ -99,7 +98,7 @@ fsd fa6, FLOAT_ARG_OFFSET + 0x30(sp) fsd fa7, FLOAT_ARG_OFFSET + 0x38(sp) - # Space for return buffer data (0x20 bytes) + # Space for return block data (0x10 bytes) # Save argument registers sd a0, ARGUMENT_REGISTERS_OFFSET(sp) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S index bb2019b1f85d97..1ccc9d16b68d94 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -306,12 +306,12 @@ LEAF_ENTRY RhpCheckedLockCmpXchg LOCAL_LABEL(CmpXchgRetry): // Load the current value at the destination address. - lr.d.aqrl t0, (a0) // t0 = *dest (load with sequential consistency) + lr.d.aqrl t0, (a0) // t0 = *dest (load with release ordering) // Compare the loaded value with the comparand. bne t0, a2, LOCAL_LABEL(CmpXchgNoUpdate) // if (*dest != comparand) goto CmpXchgNoUpdate // Attempt to store the exchange value at the destination address. - sc.d.rl t1, a1, (a0) // t1 = (store conditional result: 0 if successful, with sequential consistency) + sc.d.rl t1, a1, (a0) // t1 = (store conditional result: 0 if successful, with release ordering) bnez t1, LOCAL_LABEL(CmpXchgRetry) // if store conditional failed, retry LOCAL_LABEL(DoCardsCmpXchg): diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 1013255dca830d..231be716316475 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -698,9 +698,9 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre #elif defined(TARGET_RISCV64) -// store pair with signed offset -#define STW_PAIR_BITS 0x00003023 -#define STW_PAIR_MASK 0x0000707F +// store doubleword with signed offset +#define SD_BITS 0x00003023 +#define SD_MASK 0x0000707F // addi fp, sp, x #define ADD_FP_SP_BITS 0x00010413 @@ -710,12 +710,12 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre #define ADD_SP_SP_BITS 0x00010113 #define ADD_SP_SP_MASK 0x000FFFFF -#define STW_PAIR_RS1_MASK 0xF8000 -#define STW_PAIR_RS1_SP 0x10000 -#define STW_PAIR_RS1_FP 0x40000 -#define STW_PAIR_RS2_MASK 0x1F00000 -#define STW_PAIR_RS2_FP 0x800000 -#define STW_PAIR_RS2_RA 0x100000 +#define SD_RS1_MASK 0xF8000 +#define SD_RS1_SP 0x10000 +#define SD_RS1_FP 0x40000 +#define SD_RS2_MASK 0x1F00000 +#define SD_RS2_FP 0x800000 +#define SD_RS2_RA 0x100000 UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; ASSERT(pNativeMethodInfo != NULL); @@ -729,13 +729,13 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre { uint32_t instr = *pInstr; - if (((instr & STW_PAIR_MASK) == STW_PAIR_BITS) && - ((instr & STW_PAIR_RS1_MASK) == STW_PAIR_RS1_SP || (instr & STW_PAIR_RS1_MASK) == STW_PAIR_RS1_FP) && - ((instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_FP || (instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_RA)) + if (((instr & SD_MASK) == SD_BITS) && + ((instr & SD_RS1_MASK) == SD_RS1_SP || (instr & SD_RS1_MASK) == SD_RS1_FP) && + ((instr & SD_RS2_MASK) == SD_RS2_FP || (instr & SD_RS2_MASK) == SD_RS2_RA)) { // SP/FP-relative store of pair of registers - savedFp |= (instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_FP; - savedRa |= (instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_RA; + savedFp |= (instr & SD_RS2_MASK) == SD_RS2_FP; + savedRa |= (instr & SD_RS2_MASK) == SD_RS2_RA; } else if ((instr & ADD_FP_SP_MASK) == ADD_FP_SP_BITS) { From 2fd845ad9515d7e1c5d4723580b2a97eba7388f0 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Fri, 21 Feb 2025 14:38:08 +0000 Subject: [PATCH 40/45] Update managed defintion of TransitionBlock too --- .../nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs b/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs index 7932ba9300dc3a..612430714bd2ca 100644 --- a/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs +++ b/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs @@ -366,8 +366,6 @@ internal struct ReturnBlock { private IntPtr returnValue; private IntPtr returnValue2; - private IntPtr returnValue3; - private IntPtr returnValue4; } [StructLayout(LayoutKind.Sequential)] From 84504b52b26be2026d1b12684dba16dd0b1726dc Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Fri, 21 Feb 2025 22:11:55 +0200 Subject: [PATCH 41/45] Update TLSDESC comments --- src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index 26ab0a3a6dbee0..cbfc289518db11 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -190,8 +190,8 @@ C_FUNC(\Name): mv \target, a0 /* - In the future we should switch to TLS descriptors. The support was added in 2024 in glibc, musl, llvm, gcc and binutils, - so its support is currently unavailable on majority devices. See https://maskray.me/blog/2024-01-23-riscv-tlsdesc-works + In the future we should switch to TLS descriptors. Its support was added in 2024 in glibc, musl, llvm, gcc and binutils, + which is currently unavailable on majority devices. See https://maskray.me/blog/2024-01-23-riscv-tlsdesc-works When the support for TLS descriptors is available in NativeAOT baseline, actions to perform: * Apply this patch: @@ -208,6 +208,7 @@ C_FUNC(\Name): add_subdirectory(Bootstrap) ``` + * Remove global dynamic code including prolog and epilog. * Uncomment the following code and remove these comments. // TLS descriptor, see https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/eb2b2962/riscv-elf.adoc#tls-descriptors From 62c35d60ba3a1469bdfcb09f6f9e2a399d752d48 Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Mon, 24 Feb 2025 13:31:39 +0200 Subject: [PATCH 42/45] Update src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tomasz Sowiński --- src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S index 1ccc9d16b68d94..7529bb87a4f596 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -306,7 +306,7 @@ LEAF_ENTRY RhpCheckedLockCmpXchg LOCAL_LABEL(CmpXchgRetry): // Load the current value at the destination address. - lr.d.aqrl t0, (a0) // t0 = *dest (load with release ordering) + lr.d.aqrl t0, (a0) // t0 = *dest (load with acquire-release ordering) // Compare the loaded value with the comparand. bne t0, a2, LOCAL_LABEL(CmpXchgNoUpdate) // if (*dest != comparand) goto CmpXchgNoUpdate From b3320a51e8585c2d2218762f18cb94dc90ec8b10 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 24 Feb 2025 16:14:21 +0000 Subject: [PATCH 43/45] Relax semantics of a fence in R2R helper to match ARM64 --- .../DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs | 4 ++-- .../Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs index 3d91276d7fd357..380163ed2fbf20 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64Emitter.cs @@ -25,9 +25,9 @@ public void EmitBreak() Builder.EmitUInt(0x00100073); } - public void EmitFENCE_RW_RW() + public void EmitFENCE_R_RW() { - Builder.EmitUInt(0x0330000f); + Builder.EmitUInt(0x0230000f); } public void EmitLI(Register regDst, int offset) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs index 08323b03370e93..c6e2364766fa64 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs @@ -75,7 +75,7 @@ protected sealed override void EmitCode(NodeFactory factory, ref RiscV64Emitter // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. encoder.EmitADDI(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg0, -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target)); encoder.EmitLD(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg3, 0); - encoder.EmitFENCE_RW_RW(); + encoder.EmitFENCE_R_RW(); encoder.EmitRETIfZero(encoder.TargetRegister.Arg2); encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); @@ -107,7 +107,7 @@ protected sealed override void EmitCode(NodeFactory factory, ref RiscV64Emitter encoder.EmitADDI(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg2, -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target)); encoder.EmitLD(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, 0); - encoder.EmitFENCE_RW_RW(); + encoder.EmitFENCE_R_RW(); encoder.EmitRETIfZero(encoder.TargetRegister.Arg3); encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); From 78efdc7df09abf5b5371d7099c0725a82bdc6406 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 24 Feb 2025 16:15:17 +0000 Subject: [PATCH 44/45] Add missing fence in RhpAssignRefRiscV64 (matches ARM code and CoreCLR/Risc-V code) --- src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S index 7529bb87a4f596..467ec5332d516c 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -261,6 +261,7 @@ LEAF_END RhpCheckedAssignRef, _TEXT // t2, t6 : trashed // t3 : incremented by 8 LEAF_ENTRY RhpAssignRefRiscV64, _TEXT + fence rw, rw ALTERNATE_ENTRY RhpAssignRefAVLocation sd t4, 0(t3) From d8486a6174f5ee81388319d64efb711fca7ba045 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 24 Feb 2025 20:09:18 +0000 Subject: [PATCH 45/45] Match barriers emitted by PalInterlockedOperationBarrier in NativeAOT and equivalent code in CoreCLR in ExchangeObject/CompareExchangeObject --- src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S index 467ec5332d516c..e063f549a398a1 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -315,6 +315,10 @@ LOCAL_LABEL(CmpXchgRetry): sc.d.rl t1, a1, (a0) // t1 = (store conditional result: 0 if successful, with release ordering) bnez t1, LOCAL_LABEL(CmpXchgRetry) // if store conditional failed, retry + // See comment at the top of PalInterlockedOperationBarrier method for explanation why this memory + // barrier is necessary. + fence rw, rw + LOCAL_LABEL(DoCardsCmpXchg): // We have successfully updated the value of the objectref so now we need a GC write barrier. // The following barrier code takes the destination in a0 and the value in a1 so the arguments are @@ -348,6 +352,10 @@ LEAF_END RhpCheckedLockCmpXchg LEAF_ENTRY RhpCheckedXchg amoswap.d.aqrl t1, a1, (a0) + // See comment at the top of PalInterlockedOperationBarrier method for explanation why this memory + // barrier is necessary. + fence rw, rw + DoCardsXchg: // We have successfully updated the value of the objectref so now we need a GC write barrier. // The following barrier code takes the destination in a0 and the value in a1 so the arguments are