From 242d361dfe1e8df70c7f377db90532c918ec9414 Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Fri, 11 Mar 2022 10:27:30 +0100 Subject: [PATCH 01/23] Initial version using a byte[] lookup table for the generations, using 4 bits for the current and planned generation. WKS shows no overall improvement, SVR crashes. --- src/coreclr/gc/gc.cpp | 64 +++++++++++++++++++++++++++++++++++++++-- src/coreclr/gc/gcpriv.h | 18 ++++++++++++ 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 4b3c07d8a95f56..7d999098903342 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -2283,6 +2283,9 @@ region_allocator global_region_allocator; uint8_t*(*initial_regions)[total_generation_count][2] = nullptr; size_t gc_heap::region_count = 0; +uint8_t* gc_heap::map_region_to_generation = nullptr; +uint8_t* gc_heap::map_region_to_generation_skewed = nullptr; + #endif //USE_REGIONS #ifdef BACKGROUND_GC @@ -13106,6 +13109,12 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, if (!allocate_initial_regions(number_of_heaps)) return E_OUTOFMEMORY; + + size_t num_region_units = (g_gc_highest_address - g_gc_lowest_address) >> min_segment_size_shr; + map_region_to_generation = new (nothrow) uint8_t[num_region_units]; + if (map_region_to_generation == nullptr) + return E_OUTOFMEMORY; + map_region_to_generation_skewed = map_region_to_generation - ((size_t)g_gc_lowest_address >> min_segment_size_shr); } else { @@ -25569,6 +25578,27 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) special_sweep_p = false; region_count = global_region_allocator.get_used_region_count(); grow_mark_list_piece(); + + memset (map_region_to_generation, 0x22, region_count*sizeof(map_region_to_generation[0])); + for (int gen_number = soh_gen0; gen_number <= soh_gen1; gen_number++) + { +#ifdef MULTIPLE_HEAPS + for (int i = 0; i < n_heaps; i++) + { + gc_heap* hp = g_heaps[i]; +#else //MULTIPLE_HEAPS + { + gc_heap* hp = pGenGCHeap; +#endif //MULTIPLE_HEAPS + generation *gen = generation_of (gen_number); + for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region)) + { + uint8_t* addr = heap_segment_mem (region); + size_t index = (size_t)addr >> gc_heap::min_segment_size_shr; + map_region_to_generation_skewed[index] = 0xf0 | (uint8_t)gen_number; + } + } + } #endif //USE_REGIONS GCToEEInterface::BeforeGcScanRoots(condemned_gen_number, /* is_bgc */ false, /* is_concurrent */ false); @@ -32267,6 +32297,29 @@ void gc_heap::relocate_phase (int condemned_gen_number, } #endif //FEATURE_EVENT_TRACE +#ifdef USE_REGIONS + for (int gen_number = soh_gen0; gen_number <= soh_gen2; gen_number++) + { +#ifdef MULTIPLE_HEAPS + for (int i = 0; i < n_heaps; i++) + { + gc_heap* hp = g_heaps[i]; +#else //MULTIPLE_HEAPS + { + gc_heap* hp = pGenGCHeap; +#endif //MULTIPLE_HEAPS + generation *gen = generation_of (gen_number); + for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region)) + { + uint8_t* addr = heap_segment_mem (region); + size_t index = (size_t)addr >> gc_heap::min_segment_size_shr; + int plan_gen = heap_segment_plan_gen_num (region); + map_region_to_generation_skewed[index] = (uint8_t)((plan_gen<<4) | (map_region_to_generation_skewed[index] & 0x0f)); + } + } + } +#endif //USE_REGIONS + #ifdef MULTIPLE_HEAPS //join all threads to make sure they are synchronized dprintf(3, ("Restarting for relocation")); @@ -36733,7 +36786,10 @@ gc_heap::mark_through_cards_helper (uint8_t** poo, size_t& n_gen, uint8_t* child_object = *poo; if (!is_in_heap_range (child_object)) return; - int child_object_gen = get_region_gen_num (child_object); + + size_t child_region_index = (size_t)child_object >> gc_heap::min_segment_size_shr; + int child_object_gen = map_region_to_generation_skewed[child_region_index] & 0x0f; + assert (child_object_gen == get_region_gen_num (child_object)); int saved_child_object_gen = child_object_gen; uint8_t* saved_child_object = child_object; @@ -36745,14 +36801,16 @@ gc_heap::mark_through_cards_helper (uint8_t** poo, size_t& n_gen, if (fn == &gc_heap::relocate_address) { - child_object_gen = get_region_plan_gen_num (*poo); + size_t new_child_region_index = (size_t)*poo >> gc_heap::min_segment_size_shr; + child_object_gen = map_region_to_generation_skewed[new_child_region_index] >> 4; + assert (child_object_gen == get_region_plan_gen_num (*poo)); } if (child_object_gen < current_gen) { cg_pointers_found++; dprintf (4, ("cg pointer %Ix found, %Id so far", - (size_t)*poo, cg_pointers_found )); + (size_t)*poo, cg_pointers_found )); } #else //USE_REGIONS assert (condemned_gen == -1); diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h index 0dfae746e10d3f..04f54bf5fb237f 100644 --- a/src/coreclr/gc/gcpriv.h +++ b/src/coreclr/gc/gcpriv.h @@ -1441,6 +1441,13 @@ class gc_heap // This relocates the SIP regions and return the next non SIP region. PER_HEAP heap_segment* relocate_advance_to_non_sip (heap_segment* region); + + PER_HEAP_ISOLATED + int generation_of_addr (uint8_t* addr); + + PER_HEAP_ISOLATED + int plan_generation_of_addr (uint8_t* addr); + #ifdef STRESS_REGIONS PER_HEAP void pin_by_gc (uint8_t* object); @@ -3662,6 +3669,17 @@ class gc_heap size_t* old_card_survived_per_region; PER_HEAP_ISOLATED size_t region_count; + + // table mapping region number to generation + // there are actually two generation numbers per entry: + // - the region's current generation + // - the region's planned generation, i.e. after the GC + PER_HEAP_ISOLATED + uint8_t* map_region_to_generation; + // same table as above, but skewed so that we can index + // directly with address >> min_segment_size_shr + PER_HEAP_ISOLATED + uint8_t* map_region_to_generation_skewed; #endif //USE_REGIONS #define max_oom_history_count 4 From dfc2cd86b1d60262e16f4e957127046af9c3b30a Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Mon, 14 Mar 2022 10:16:55 +0100 Subject: [PATCH 02/23] Fix server GC issue. --- src/coreclr/gc/gc.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 7d999098903342..5cecfa1c4d4d85 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -25590,7 +25590,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) { gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS - generation *gen = generation_of (gen_number); + generation *gen = hp->generation_of (gen_number); for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region)) { uint8_t* addr = heap_segment_mem (region); @@ -32308,7 +32308,7 @@ void gc_heap::relocate_phase (int condemned_gen_number, { gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS - generation *gen = generation_of (gen_number); + generation *gen = hp->generation_of (gen_number); for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region)) { uint8_t* addr = heap_segment_mem (region); From 2d527b5a8206d76a7fb1329a0e770cfc2be1be1e Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Tue, 15 Mar 2022 12:27:20 +0100 Subject: [PATCH 03/23] Introduce ephemeral range (ephemeral_low/ephemeral_high) as the global limits where objects in ephemeral regions may be located. We have to do a range check on the child object in mark_through_cards_helper anyway, and using the ephemeral range allows us to skip the table lookup in the cases where a child object cannot possibly be in an ephemeral region. --- src/coreclr/gc/gc.cpp | 11 ++++++++++- src/coreclr/gc/gcpriv.h | 8 +++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index c05fe994e30265..8542b369452e1c 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -2324,6 +2324,11 @@ VOLATILE(c_gc_state) gc_heap::current_c_gc_state = c_gc_state_free; VOLATILE(BOOL) gc_heap::gc_background_running = FALSE; #endif //BACKGROUND_GC +#ifdef USE_REGIONS +uint8_t* gc_heap::ephemeral_low; +uint8_t* gc_heap::ephemeral_high; +#endif //USE_REGIONS + #ifndef MULTIPLE_HEAPS #ifdef SPINLOCK_HISTORY int gc_heap::spinlock_info_index = 0; @@ -25569,6 +25574,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) grow_mark_list_piece(); memset (map_region_to_generation, 0x22, region_count*sizeof(map_region_to_generation[0])); + ephemeral_low = MAX_PTR; + ephemeral_high = nullptr; for (int gen_number = soh_gen0; gen_number <= soh_gen1; gen_number++) { #ifdef MULTIPLE_HEAPS @@ -25585,6 +25592,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) uint8_t* addr = heap_segment_mem (region); size_t index = (size_t)addr >> gc_heap::min_segment_size_shr; map_region_to_generation_skewed[index] = 0xf0 | (uint8_t)gen_number; + ephemeral_low = min (ephemeral_low, addr); + ephemeral_high = max (ephemeral_high, heap_segment_reserved (region)); } } } @@ -36773,7 +36782,7 @@ gc_heap::mark_through_cards_helper (uint8_t** poo, size_t& n_gen, assert (nhigh == 0); assert (next_boundary == 0); uint8_t* child_object = *poo; - if (!is_in_heap_range (child_object)) + if ((child_object < ephemeral_low) || (ephemeral_high <= child_object)) return; size_t child_region_index = (size_t)child_object >> gc_heap::min_segment_size_shr; diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h index d6bdb5838366ff..efabf004f15ec6 100644 --- a/src/coreclr/gc/gcpriv.h +++ b/src/coreclr/gc/gcpriv.h @@ -3730,7 +3730,13 @@ class gc_heap PER_HEAP void exit_gc_done_event_lock(); -#ifndef USE_REGIONS +#ifdef USE_REGIONS + PER_HEAP_ISOLATED + uint8_t* ephemeral_low; //lowest ephemeral address + + PER_HEAP_ISOLATED + uint8_t* ephemeral_high; //highest ephemeral address +#else //!USE_REGIONS PER_HEAP uint8_t* ephemeral_low; //lowest ephemeral address From c9e9694532eca7b0058bcce1fbcf81f8fd0fb58f Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Fri, 18 Mar 2022 14:50:35 +0100 Subject: [PATCH 04/23] Snapshot. --- src/coreclr/gc/gc.cpp | 45 ++++- src/coreclr/gc/gcinterface.h | 6 + src/coreclr/vm/amd64/JitHelpers_Fast.asm | 14 ++ .../vm/amd64/JitHelpers_FastWriteBarriers.asm | 161 ++++++++++++++++ src/coreclr/vm/amd64/jitinterfaceamd64.cpp | 173 +++++++++++++++++- src/coreclr/vm/gcenv.ee.cpp | 2 + src/coreclr/vm/gcheaputilities.cpp | 2 + src/coreclr/vm/gcheaputilities.h | 3 + src/coreclr/vm/jitinterface.h | 15 +- 9 files changed, 406 insertions(+), 15 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 8542b369452e1c..8658de8fc76dba 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -2010,7 +2010,12 @@ void stomp_write_barrier_ephemeral(uint8_t* ephemeral_low, uint8_t* ephemeral_hi GCToEEInterface::StompWriteBarrier(&args); } -void stomp_write_barrier_initialize(uint8_t* ephemeral_low, uint8_t* ephemeral_high) +void stomp_write_barrier_initialize(uint8_t* ephemeral_low, uint8_t* ephemeral_high +#ifdef USE_REGIONS + , uint8_t* map_region_to_generation_skewed + , uint8_t region_shr +#endif //USE_REGIONS + ) { WriteBarrierParameters args = {}; args.operation = WriteBarrierOp::Initialize; @@ -2026,6 +2031,12 @@ void stomp_write_barrier_initialize(uint8_t* ephemeral_low, uint8_t* ephemeral_h args.highest_address = g_gc_highest_address; args.ephemeral_low = ephemeral_low; args.ephemeral_high = ephemeral_high; + +#ifdef USE_REGIONS + args.region_to_generation_table = map_region_to_generation_skewed; + args.region_shr = region_shr; +#endif //USE_REGIONS + GCToEEInterface::StompWriteBarrier(&args); } @@ -11267,6 +11278,17 @@ void gc_heap::set_region_gen_num (heap_segment* region, int gen_num) assert (gen_num < (1 << (sizeof (uint8_t) * 8))); assert (gen_num >= 0); heap_segment_gen_num (region) = (uint8_t)gen_num; + + uint8_t* region_start = get_region_start (region); + uint8_t* region_end = heap_segment_reserved (region); + + size_t region_index_start = ((size_t)region_start) >> min_segment_size_shr; + size_t region_index_end = ((size_t)region_end) >> min_segment_size_shr; + for (size_t region_index = region_index_start; region_index < region_index_end; region_index++) + { + assert (gen_num <= max_generation); + map_region_to_generation_skewed[region_index] = (uint8_t)gen_num; + } } inline @@ -11295,6 +11317,17 @@ void gc_heap::set_region_plan_gen_num (heap_segment* region, int plan_gen_num) } heap_segment_plan_gen_num (region) = plan_gen_num; + + uint8_t* region_start = get_region_start (region); + uint8_t* region_end = heap_segment_reserved (region); + + size_t region_index_start = ((size_t)region_start) >> min_segment_size_shr; + size_t region_index_end = ((size_t)region_end) >> min_segment_size_shr; + for (size_t region_index = region_index_start; region_index < region_index_end; region_index++) + { + assert (gen_num <= max_generation); + map_region_to_generation_skewed[region_index] = (map_region_to_generation_skewed[region_index] & 0x0f) | (plan_gen_num & 0xf0); + } } inline @@ -11443,8 +11476,7 @@ void gc_heap::init_heap_segment (heap_segment* seg, gc_heap* hp #ifdef USE_REGIONS int gen_num_for_region = min (gen_num, max_generation); - heap_segment_gen_num (seg) = (uint8_t)gen_num_for_region; - heap_segment_plan_gen_num (seg) = gen_num_for_region; + set_region_gen_num (seg, gen_num_for_region); heap_segment_swept_in_plan (seg) = false; #endif //USE_REGIONS @@ -13975,11 +14007,14 @@ gc_heap::init_gc_heap (int h_number) if (heap_number == 0) { stomp_write_barrier_initialize( -#if defined(MULTIPLE_HEAPS) || defined(USE_REGIONS) +#if defined(USE_REGIONS) + g_gc_lowest_address, g_gc_highest_address, + map_region_to_generation_skewed, (uint8_t)min_segment_size_shr +#elif defined(MULTIPLE_HEAPS) reinterpret_cast(1), reinterpret_cast(~0) #else ephemeral_low, ephemeral_high -#endif //!MULTIPLE_HEAPS || USE_REGIONS +#endif //MULTIPLE_HEAPS || USE_REGIONS ); } diff --git a/src/coreclr/gc/gcinterface.h b/src/coreclr/gc/gcinterface.h index d7c08c44d9adce..b96b01b9936eaa 100644 --- a/src/coreclr/gc/gcinterface.h +++ b/src/coreclr/gc/gcinterface.h @@ -108,6 +108,12 @@ struct WriteBarrierParameters // The new write watch table, if we are using our own write watch // implementation. Used for WriteBarrierOp::SwitchToWriteWatch only. uint8_t* write_watch_table; + + // mapping table from region index to generation + uint8_t* region_to_generation_table; + + // shift count - how many bits to shift right to obtain region index from address + uint8_t region_shr; }; struct EtwGCSettingsInfo diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index 0060ff036d3b0a..501307d0bbecab 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -140,6 +140,20 @@ endif align 16 Exit: REPRET + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + + NOP_3_BYTE else ; JIT_WriteBarrier_PostGrow64 diff --git a/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm b/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm index 63dd1fadc73b59..509f02ffb65895 100644 --- a/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm +++ b/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm @@ -201,6 +201,81 @@ endif ret LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT +LEAF_ENTRY JIT_WriteBarrier_Region64, _TEXT + align 8 + + ; Do the move into the GC . It is correct to take an AV here, the EH code + ; figures out that this came from a WriteBarrier and correctly maps it back + ; to the managed method which called the WriteBarrier (see setup in + ; InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rcx], rdx + + mov r8, rcx + +PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionToGeneration + mov rax, 0F0F0F0F0F0F0F0F0h + +PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrDest + shr rcx, 16h ; compute region index + + ; Check whether the region we're storing into is gen 0 - nothing to do in this case + cmp byte ptr [rcx + rax], 0 + jne NotGen0 + REPRET + + NOP_2_BYTE ; padding for alignment of constant + + NotGen0: +PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Lower + mov r9, 0F0F0F0F0F0F0F0F0h + cmp rdx, r9 + jae NotLow + ret + NotLow: +PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Upper + mov r9, 0F0F0F0F0F0F0F0F0h + cmp rdx, r9 + jb NotHigh + REPRET + NotHigh: +PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrSrc + shr rdx, 16h ; compute region index + mov dl, [rdx + rax] + cmp dl, [rcx + rax] + jb isOldToYoung + REPRET + nop + + IsOldToYoung: +PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardTable + mov rax, 0F0F0F0F0F0F0F0F0h + + mov ecx, r8d + shr r8, 0Bh + shr ecx, 8 + and ecx, 7 + mov dl, 1 + shl dl, cl + test byte ptr [r8 + rax], dl + je UpdateCardTable + REPRET + + UpdateCardTable: + lock or byte ptr [r8 + rax], dl +ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES +PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardBundleTable + mov rax, 0F0F0F0F0F0F0F0F0h + shr r8, 0Ah + cmp byte ptr [r8 + rax], 0FFh + jne UpdateCardBundleTable + REPRET + + UpdateCardBundleTable: + mov byte ptr [r8 + rax], 0FFh +endif + ret +LEAF_END_MARKED JIT_WriteBarrier_Region64, _TEXT + endif @@ -410,6 +485,92 @@ endif LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_SVR64, _TEXT endif + +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_Region64, _TEXT + align 8 + + ; Do the move into the GC . It is correct to take an AV here, the EH code + ; figures out that this came from a WriteBarrier and correctly maps it back + ; to the managed method which called the WriteBarrier (see setup in + ; InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rcx], rdx + + ; Update the write watch table if necessary + mov rax, rcx +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_WriteWatchTable + mov r8, 0F0F0F0F0F0F0F0F0h + shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift + add rax, r8 + mov r8, rcx +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrDest + shr rcx, 16h ; compute region index + cmp byte ptr [rax], 0h + jne JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration + mov byte ptr [rax], 0FFh + +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration + mov rax, 0F0F0F0F0F0F0F0F0h + + ; Check whether the region we're storing into is gen 0 - nothing to do in this case + cmp byte ptr [rcx + rax], 0 + jne NotGen0 + REPRET + + NOP_2_BYTE ; padding for alignment of constant + NOP_2_BYTE ; padding for alignment of constant + NOP_2_BYTE ; padding for alignment of constant + + NotGen0: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Lower + mov r9, 0F0F0F0F0F0F0F0F0h + cmp rdx, r9 + jae NotLow + ret + NotLow: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Upper + mov r9, 0F0F0F0F0F0F0F0F0h + cmp rdx, r9 + jb NotHigh + REPRET + NotHigh: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrSrc + shr rdx, 16h ; compute region index + mov dl, [rdx + rax] + cmp dl, [rcx + rax] + jb isOldToYoung + REPRET + nop + + IsOldToYoung: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardTable + mov rax, 0F0F0F0F0F0F0F0F0h + + mov ecx, r8d + shr r8, 0Bh + shr ecx, 8 + and ecx, 7 + mov dl, 1 + shl dl, cl + test byte ptr [r8 + rax], dl + je UpdateCardTable + REPRET + + UpdateCardTable: + lock or byte ptr [r8 + rax], dl +ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardBundleTable + mov rax, 0F0F0F0F0F0F0F0F0h + shr r8, 0Ah + cmp byte ptr [r8 + rax], 0FFh + jne UpdateCardBundleTable + REPRET + + UpdateCardBundleTable: + mov byte ptr [r8 + rax], 0FFh +endif + ret +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_Region64, _TEXT + endif diff --git a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp index 02b023777b8a94..d23b3c7511dccd 100644 --- a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp +++ b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp @@ -50,6 +50,18 @@ EXTERN_C void JIT_WriteBarrier_SVR64_PatchLabel_CardBundleTable(); EXTERN_C void JIT_WriteBarrier_SVR64_End(); #endif // FEATURE_SVR_GC +EXTERN_C void JIT_WriteBarrier_Region64(Object **dst, Object *ref); +EXTERN_C void JIT_WriteBarrier_Region64_Patch_Label_RegionToGeneration(); +EXTERN_C void JIT_WriteBarrier_Region64_Patch_Label_RegionShrDest(); +EXTERN_C void JIT_WriteBarrier_Region64_Patch_Label_Lower(); +EXTERN_C void JIT_WriteBarrier_Region64_Patch_Label_Upper(); +EXTERN_C void JIT_WriteBarrier_Region64_Patch_Label_RegionShrSrc(); +EXTERN_C void JIT_WriteBarrier_Region64_Patch_Label_CardTable(); +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES +EXTERN_C void JIT_WriteBarrier_Region64_Patch_Label_CardBundleTable(); +#endif +EXTERN_C void JIT_WriteBarrier_Region64_End(); + #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64(Object **dst, Object *ref); EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_WriteWatchTable(); @@ -79,6 +91,18 @@ EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardBundleTable(); #endif EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_End(); #endif // FEATURE_SVR_GC +EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64(Object **dst, Object *ref); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_WriteWatchTable(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrDest(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Lower(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Upper(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrSrc(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardTable(); +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES +EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardBundleTable(); +#endif +EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_End(); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP WriteBarrierManager g_WriteBarrierManager; @@ -152,6 +176,21 @@ void WriteBarrierManager::Validate() #endif // FEATURE_MANUALLY_MANAGED_CARD_BUNDLES #endif // FEATURE_SVR_GC + PBYTE pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_RegionToGeneration, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); + + pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_Lower, 2); + pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_Upper, 2); + pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_CardTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardTableImmediate) & 0x7) == 0); + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_CardBundleTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); +#endif + #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP PBYTE pWriteWatchTableImmediate; @@ -194,6 +233,22 @@ void WriteBarrierManager::Validate() _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); #endif // FEATURE_MANUALLY_MANAGED_CARD_BUNDLES #endif // FEATURE_SVR_GC + + pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_RegionToGeneration, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); + + pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_Lower, 2); + pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_Upper, 2); + pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_CardTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardTableImmediate) & 0x7) == 0); + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_CardBundleTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); +#endif + #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP } @@ -214,6 +269,8 @@ PCODE WriteBarrierManager::GetCurrentWriteBarrierCode() case WRITE_BARRIER_SVR64: return GetEEFuncEntryPoint(JIT_WriteBarrier_SVR64); #endif // FEATURE_SVR_GC + case WRITE_BARRIER_REGIONS64: + return GetEEFuncEntryPoint(JIT_WriteBarrier_Region64); #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP case WRITE_BARRIER_WRITE_WATCH_PREGROW64: return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_PreGrow64); @@ -223,6 +280,8 @@ PCODE WriteBarrierManager::GetCurrentWriteBarrierCode() case WRITE_BARRIER_WRITE_WATCH_SVR64: return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_SVR64); #endif // FEATURE_SVR_GC + case WRITE_BARRIER_WRITE_WATCH_REGIONS64: + return GetEEFuncEntryPoint(JIT_WriteBarrier_Region64); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP default: UNREACHABLE_MSG("unexpected m_currentWriteBarrier!"); @@ -246,6 +305,8 @@ size_t WriteBarrierManager::GetSpecificWriteBarrierSize(WriteBarrierType writeBa case WRITE_BARRIER_SVR64: return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_SVR64); #endif // FEATURE_SVR_GC + case WRITE_BARRIER_REGIONS64: + return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_Region64); #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP case WRITE_BARRIER_WRITE_WATCH_PREGROW64: return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_PreGrow64); @@ -255,6 +316,8 @@ size_t WriteBarrierManager::GetSpecificWriteBarrierSize(WriteBarrierType writeBa case WRITE_BARRIER_WRITE_WATCH_SVR64: return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_SVR64); #endif // FEATURE_SVR_GC + case WRITE_BARRIER_WRITE_WATCH_REGIONS64: + return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_Region64); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP case WRITE_BARRIER_BUFFER: return MARKED_FUNCTION_SIZE(JIT_WriteBarrier); @@ -350,6 +413,28 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, } #endif // FEATURE_SVR_GC + case WRITE_BARRIER_REGIONS64: + m_pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_RegionToGeneration, 2); + m_pRegionShrDest = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_RegionShrDest, 3); + m_pRegionShrSrc = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_RegionShrSrc, 3); + m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_Lower, 2); + m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_Upper, 2); + m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_CardTable, 2); + + // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrDest); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrSrc); + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_CardBundleTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); +#endif + break; + #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP case WRITE_BARRIER_WRITE_WATCH_PREGROW64: { @@ -406,6 +491,32 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, break; } #endif // FEATURE_SVR_GC + + case WRITE_BARRIER_WRITE_WATCH_REGIONS64: + m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_WriteWatchTable, 2); + m_pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_RegionToGeneration, 2); + m_pRegionShrDest = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_RegionShrDest, 3); + m_pRegionShrSrc = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_RegionShrSrc, 3); + m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_Lower, 2); + m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_Upper, 2); + m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_CardTable, 2); + + // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrDest); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrSrc); + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_CardBundleTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); +#endif + break; + + #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP default: @@ -440,12 +551,14 @@ void WriteBarrierManager::Initialize() #ifdef FEATURE_SVR_GC _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_SVR64)); #endif // FEATURE_SVR_GC + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_REGIONS64)); #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_PREGROW64)); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_POSTGROW64)); #ifdef FEATURE_SVR_GC _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_SVR64)); #endif // FEATURE_SVR_GC + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_REGIONS64)); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP #if !defined(CODECOVERAGE) @@ -471,12 +584,18 @@ bool WriteBarrierManager::NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, W case WRITE_BARRIER_UNINITIALIZED: #ifdef _DEBUG // Use the default slow write barrier some of the time in debug builds because of of contains some good asserts - if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK) || DbgRandomOnExe(0.5)) { - break; - } + //if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK) || DbgRandomOnExe(0.5)) { + // break; + //} #endif - - writeBarrierType = GCHeapUtilities::IsServerHeap() ? WRITE_BARRIER_SVR64 : WRITE_BARRIER_PREGROW64; + if (g_region_shr != 0) + { + writeBarrierType = WRITE_BARRIER_REGIONS64; + } + else + { + writeBarrierType = GCHeapUtilities::IsServerHeap() ? WRITE_BARRIER_SVR64 : WRITE_BARRIER_PREGROW64; + } continue; case WRITE_BARRIER_PREGROW64: @@ -494,6 +613,9 @@ bool WriteBarrierManager::NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, W break; #endif // FEATURE_SVR_GC + case WRITE_BARRIER_REGIONS64: + break; + #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP case WRITE_BARRIER_WRITE_WATCH_PREGROW64: if (bReqUpperBoundsCheck) @@ -509,6 +631,8 @@ bool WriteBarrierManager::NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, W case WRITE_BARRIER_WRITE_WATCH_SVR64: break; #endif // FEATURE_SVR_GC + case WRITE_BARRIER_WRITE_WATCH_REGIONS64: + break; #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP default: @@ -540,8 +664,10 @@ int WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended) switch (m_currentWriteBarrier) { case WRITE_BARRIER_POSTGROW64: + case WRITE_BARRIER_REGIONS64: #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: + case WRITE_BARRIER_WRITE_WATCH_REGIONS64: #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP { // Change immediate if different from new g_ephermeral_high. @@ -612,6 +738,7 @@ int WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSus #ifdef FEATURE_SVR_GC case WRITE_BARRIER_WRITE_WATCH_SVR64: #endif // FEATURE_SVR_GC + case WRITE_BARRIER_WRITE_WATCH_REGIONS64: if (*(UINT64*)m_pWriteWatchTableImmediate != (size_t)g_sw_ww_table) { ExecutableWriterHolder writeWatchTableImmediateWriterHolder((UINT64*)m_pWriteWatchTableImmediate, sizeof(UINT64)); @@ -625,6 +752,34 @@ int WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSus } #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + switch (m_currentWriteBarrier) + { + case WRITE_BARRIER_REGIONS64: + case WRITE_BARRIER_WRITE_WATCH_REGIONS64: + if (*(UINT64*)m_pRegionToGenTableImmediate != (size_t)g_region_to_generation_table) + { + ExecutableWriterHolder writeWatchTableImmediateWriterHolder((UINT64*)m_pRegionToGenTableImmediate, sizeof(UINT64)); + *writeWatchTableImmediateWriterHolder.GetRW() = (size_t)g_region_to_generation_table; + stompWBCompleteActions |= SWB_ICACHE_FLUSH; + } + if (*m_pRegionShrDest != g_region_shr) + { + ExecutableWriterHolder writeWatchTableImmediateWriterHolder(m_pRegionShrDest, sizeof(UINT8)); + *writeWatchTableImmediateWriterHolder.GetRW() = g_region_shr; + stompWBCompleteActions |= SWB_ICACHE_FLUSH; + } + if (*m_pRegionShrSrc != g_region_shr) + { + ExecutableWriterHolder writeWatchTableImmediateWriterHolder(m_pRegionShrSrc, sizeof(UINT8)); + *writeWatchTableImmediateWriterHolder.GetRW() = g_region_shr; + stompWBCompleteActions |= SWB_ICACHE_FLUSH; + } + break; + + default: + break; // clang seems to require all enum values to be covered for some reason + } + if (*(UINT64*)m_pCardTableImmediate != (size_t)g_card_table) { ExecutableWriterHolder cardTableImmediateWriterHolder((UINT64*)m_pCardTableImmediate, sizeof(UINT64)); @@ -668,6 +823,10 @@ int WriteBarrierManager::SwitchToWriteWatchBarrier(bool isRuntimeSuspended) break; #endif // FEATURE_SVR_GC + case WRITE_BARRIER_REGIONS64: + newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_REGIONS64; + break; + default: UNREACHABLE(); } @@ -698,6 +857,10 @@ int WriteBarrierManager::SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended) break; #endif // FEATURE_SVR_GC + case WRITE_BARRIER_WRITE_WATCH_REGIONS64: + newWriteBarrierType = WRITE_BARRIER_REGIONS64; + break; + default: UNREACHABLE(); } diff --git a/src/coreclr/vm/gcenv.ee.cpp b/src/coreclr/vm/gcenv.ee.cpp index 2085ff2a1024ba..873613665114db 100644 --- a/src/coreclr/vm/gcenv.ee.cpp +++ b/src/coreclr/vm/gcenv.ee.cpp @@ -1026,6 +1026,8 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args) g_lowest_address = args->lowest_address; g_highest_address = args->highest_address; + g_region_to_generation_table = args->region_to_generation_table; + g_region_shr = args->region_shr; stompWBCompleteActions |= ::StompWriteBarrierResize(true, false); // StompWriteBarrierResize does not necessarily bash g_ephemeral_low diff --git a/src/coreclr/vm/gcheaputilities.cpp b/src/coreclr/vm/gcheaputilities.cpp index 08f864bcc26d7b..44b70796f01543 100644 --- a/src/coreclr/vm/gcheaputilities.cpp +++ b/src/coreclr/vm/gcheaputilities.cpp @@ -17,6 +17,8 @@ GPTR_IMPL_INIT(uint8_t, g_highest_address, nullptr); GVAL_IMPL_INIT(GCHeapType, g_heap_type, GC_HEAP_INVALID); uint8_t* g_ephemeral_low = (uint8_t*)1; uint8_t* g_ephemeral_high = (uint8_t*)~0; +uint8_t* g_region_to_generation_table = nullptr; +uint8_t g_region_shr = 0; #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES uint32_t* g_card_bundle_table = nullptr; diff --git a/src/coreclr/vm/gcheaputilities.h b/src/coreclr/vm/gcheaputilities.h index 3d648862740afb..5549f908640639 100644 --- a/src/coreclr/vm/gcheaputilities.h +++ b/src/coreclr/vm/gcheaputilities.h @@ -29,6 +29,9 @@ extern "C" gc_alloc_context g_global_alloc_context; extern "C" uint32_t* g_card_bundle_table; extern "C" uint8_t* g_ephemeral_low; extern "C" uint8_t* g_ephemeral_high; +extern "C" uint8_t* g_region_to_generation_table; +extern "C" uint8_t g_region_shr; + #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index 4c35ac5713c0fa..863aa974e45a4a 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -262,12 +262,14 @@ class WriteBarrierManager #ifdef FEATURE_SVR_GC WRITE_BARRIER_SVR64, #endif // FEATURE_SVR_GC + WRITE_BARRIER_REGIONS64, #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP WRITE_BARRIER_WRITE_WATCH_PREGROW64, WRITE_BARRIER_WRITE_WATCH_POSTGROW64, #ifdef FEATURE_SVR_GC WRITE_BARRIER_WRITE_WATCH_SVR64, #endif // FEATURE_SVR_GC + WRITE_BARRIER_WRITE_WATCH_REGIONS64, #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP WRITE_BARRIER_BUFFER }; @@ -296,11 +298,14 @@ class WriteBarrierManager WriteBarrierType m_currentWriteBarrier; - PBYTE m_pWriteWatchTableImmediate; // PREGROW | POSTGROW | SVR | WRITE_WATCH | - PBYTE m_pLowerBoundImmediate; // PREGROW | POSTGROW | | WRITE_WATCH | - PBYTE m_pCardTableImmediate; // PREGROW | POSTGROW | SVR | WRITE_WATCH | - PBYTE m_pCardBundleTableImmediate; // PREGROW | POSTGROW | SVR | WRITE_WATCH | - PBYTE m_pUpperBoundImmediate; // | POSTGROW | | WRITE_WATCH | + PBYTE m_pWriteWatchTableImmediate; // PREGROW | POSTGROW | SVR | WRITE_WATCH | REGION + PBYTE m_pLowerBoundImmediate; // PREGROW | POSTGROW | | WRITE_WATCH | REGION + PBYTE m_pCardTableImmediate; // PREGROW | POSTGROW | SVR | WRITE_WATCH | REGION + PBYTE m_pCardBundleTableImmediate; // PREGROW | POSTGROW | SVR | WRITE_WATCH | REGION + PBYTE m_pUpperBoundImmediate; // | POSTGROW | | WRITE_WATCH | REGION + PBYTE m_pRegionToGenTableImmediate; // | | | WRITE_WATCH | REGION + PBYTE m_pRegionShrDest; // | | | WRITE_WATCH | REGION + PBYTE m_pRegionShrSrc; // | | | WRITE_WATCH | RETION }; #endif // TARGET_AMD64 From 242101dc9eef00adc9bbeb63595050aa70b3a296 Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Mon, 21 Mar 2022 17:40:14 +0100 Subject: [PATCH 05/23] Changes to update ephemeral_low, ephemeral_high and the writebarrier. --- src/coreclr/gc/gc.cpp | 83 ++++++++++++++++++++++++++++++++++++- src/coreclr/vm/gcenv.ee.cpp | 2 + 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 8658de8fc76dba..0ca8c7249d6730 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -1998,7 +1998,12 @@ void stomp_write_barrier_resize(bool is_runtime_suspended, bool requires_upper_b GCToEEInterface::StompWriteBarrier(&args); } -void stomp_write_barrier_ephemeral(uint8_t* ephemeral_low, uint8_t* ephemeral_high) +void stomp_write_barrier_ephemeral (uint8_t* ephemeral_low, uint8_t* ephemeral_high +#ifdef USE_REGIONS + , uint8_t* map_region_to_generation_skewed + , uint8_t region_shr +#endif //USE_REGIONS + ) { initGCShadow(); @@ -2007,6 +2012,10 @@ void stomp_write_barrier_ephemeral(uint8_t* ephemeral_low, uint8_t* ephemeral_hi args.is_runtime_suspended = true; args.ephemeral_low = ephemeral_low; args.ephemeral_high = ephemeral_high; +#ifdef USE_REGIONS + args.region_to_generation_table = map_region_to_generation_skewed; + args.region_shr = region_shr; +#endif //USE_REGIONS GCToEEInterface::StompWriteBarrier(&args); } @@ -11284,10 +11293,44 @@ void gc_heap::set_region_gen_num (heap_segment* region, int gen_num) size_t region_index_start = ((size_t)region_start) >> min_segment_size_shr; size_t region_index_end = ((size_t)region_end) >> min_segment_size_shr; + uint8_t entry = (uint8_t)((gen_num << 4) | gen_num); for (size_t region_index = region_index_start; region_index < region_index_end; region_index++) { assert (gen_num <= max_generation); - map_region_to_generation_skewed[region_index] = (uint8_t)gen_num; + map_region_to_generation_skewed[region_index] = entry; + } + if (gen_num <= soh_gen1) + { + bool success_low = false; + bool success_high = false; + bool ephemeral_change = false; + while (!(success_low && success_high)) + { + uint8_t* current_ephemeral_low = ephemeral_low; + if (current_ephemeral_low <= region_start) + success_low = true; + else + { + success_low = (Interlocked::CompareExchangePointer (&ephemeral_low, region_start, current_ephemeral_low) == current_ephemeral_low); + if (success_low) + ephemeral_change = true; + } + + uint8_t* current_ephemeral_high = ephemeral_high; + if (current_ephemeral_high >= region_end) + success_high = true; + else + { + success_high = (Interlocked::CompareExchangePointer (&ephemeral_high, region_end, current_ephemeral_high) == current_ephemeral_high); + if (success_high) + ephemeral_change = true; + } + } + if (ephemeral_change) + { + stomp_write_barrier_ephemeral (ephemeral_low, ephemeral_high, + map_region_to_generation_skewed, (uint8_t)min_segment_size_shr); + } } } @@ -30016,6 +30059,36 @@ void gc_heap::plan_phase (int condemned_gen_number) } #endif //!USE_REGIONS +#ifdef USE_REGIONS + memset (map_region_to_generation, 0x22, region_count*sizeof(map_region_to_generation[0])); + ephemeral_low = MAX_PTR; + ephemeral_high = nullptr; + for (int gen_number = soh_gen0; gen_number <= soh_gen1; gen_number++) + { + uint8_t table_entry = (uint8_t)((gen_number << 4) | gen_number); +#ifdef MULTIPLE_HEAPS + for (int i = 0; i < n_heaps; i++) + { + gc_heap* hp = g_heaps[i]; +#else //MULTIPLE_HEAPS + { + gc_heap* hp = pGenGCHeap; +#endif //MULTIPLE_HEAPS + generation *gen = hp->generation_of (gen_number); + for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region)) + { + uint8_t* addr = heap_segment_mem (region); + size_t index = (size_t)addr >> gc_heap::min_segment_size_shr; + map_region_to_generation_skewed[index] = table_entry; + ephemeral_low = min (ephemeral_low, addr); + ephemeral_high = max (ephemeral_high, heap_segment_reserved (region)); + } + } + } + stomp_write_barrier_ephemeral (ephemeral_low, ephemeral_high, + map_region_to_generation_skewed, (uint8_t)min_segment_size_shr); +#endif //USE_REGIONS + #ifdef MULTIPLE_HEAPS //join all threads to make sure they are synchronized dprintf(3, ("Restarting after Promotion granted")); @@ -44137,6 +44210,12 @@ HRESULT GCHeap::Initialize() initGCShadow(); // If we are debugging write barriers, initialize heap shadow +#ifdef USE_REGIONS + gc_heap::ephemeral_low = MAX_PTR; + + gc_heap::ephemeral_high = nullptr; +#endif //!USE_REGIONS + #ifdef MULTIPLE_HEAPS for (uint32_t i = 0; i < nhp; i++) diff --git a/src/coreclr/vm/gcenv.ee.cpp b/src/coreclr/vm/gcenv.ee.cpp index 873613665114db..8011e2f0b3e52a 100644 --- a/src/coreclr/vm/gcenv.ee.cpp +++ b/src/coreclr/vm/gcenv.ee.cpp @@ -1002,6 +1002,8 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args) assert(args->ephemeral_high != nullptr); g_ephemeral_low = args->ephemeral_low; g_ephemeral_high = args->ephemeral_high; + g_region_to_generation_table = args->region_to_generation_table; + g_region_shr = args->region_shr; stompWBCompleteActions |= ::StompWriteBarrierEphemeral(args->is_runtime_suspended); break; case WriteBarrierOp::Initialize: From 01c18feb12b671350a3b80cd98e160813069c4ed Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Wed, 23 Mar 2022 08:30:54 +0100 Subject: [PATCH 06/23] Fix issues: - accidentally removed setting plan gen num - need to make the default write barrier larger so we have enough space - fix copy & paste issue in GetCurrentWriteBarrierCode --- src/coreclr/gc/gc.cpp | 1 + src/coreclr/vm/amd64/JitHelpers_Fast.asm | 11 +++++++++++ src/coreclr/vm/amd64/jitinterfaceamd64.cpp | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 0ca8c7249d6730..18788956069e88 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -11520,6 +11520,7 @@ void gc_heap::init_heap_segment (heap_segment* seg, gc_heap* hp #ifdef USE_REGIONS int gen_num_for_region = min (gen_num, max_generation); set_region_gen_num (seg, gen_num_for_region); + heap_segment_plan_gen_num (seg) = gen_num_for_region; heap_segment_swept_in_plan (seg) = false; #endif //USE_REGIONS diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index 501307d0bbecab..9a159d4d446405 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -154,6 +154,17 @@ endif NOP_3_BYTE NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + else ; JIT_WriteBarrier_PostGrow64 diff --git a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp index d23b3c7511dccd..a33f8dbb4ef02c 100644 --- a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp +++ b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp @@ -281,7 +281,7 @@ PCODE WriteBarrierManager::GetCurrentWriteBarrierCode() return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_SVR64); #endif // FEATURE_SVR_GC case WRITE_BARRIER_WRITE_WATCH_REGIONS64: - return GetEEFuncEntryPoint(JIT_WriteBarrier_Region64); + return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_Region64); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP default: UNREACHABLE_MSG("unexpected m_currentWriteBarrier!"); From d5714e90c78e5f83e456a55f10dd257da59f66f0 Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Sat, 26 Mar 2022 13:55:45 +0100 Subject: [PATCH 07/23] Fix issue where the card bundle bits at the beginning and end of regions would remain stuck. This was because in these cases we would not explore the complete range of card table entries for the card bundle. --- src/coreclr/gc/gc.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 18788956069e88..a1b71270b4760a 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -36694,8 +36694,19 @@ BOOL gc_heap::find_card_dword (size_t& cardw, size_t cardw_end) cardw = (card_word - &card_table[0]); return TRUE; } - else if ((cardw <= card_bundle_cardw (cardb)) && - (card_word == &card_table [card_bundle_cardw (cardb+1)])) + // explore the beginning of the card bundle so we can possibly clear it + if (cardw == (card_bundle_cardw (cardb) + 1) && !card_table[cardw-1]) + { + cardw--; + } + // explore the end of the card bundle so we can possibly clear it + card_word_end = &card_table[card_bundle_cardw (cardb+1)]; + while ((card_word < card_word_end) && !(*card_word)) + { + card_word++; + } + if ((cardw <= card_bundle_cardw (cardb)) && + (card_word == card_word_end)) { // a whole bundle was explored and is empty dprintf (3, ("gc: %d, find_card_dword clear bundle: %Ix cardw:[%Ix,%Ix[", From 3fcca4ea4d7b30e4f3de25fc0d9cb0be4afb0f5f Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Thu, 31 Mar 2022 15:39:57 +0200 Subject: [PATCH 08/23] More optimizations: - use BitScanForward in find_card, find_card_dword - when we start a new card dword, consult the card bundles first - change JIT_ByRefWriteBarrier to consult the region_to_generation_table and set only single bits in the card table. --- src/coreclr/gc/gc.cpp | 46 +++++++++++++++------ src/coreclr/vm/amd64/JitHelpers_Fast.asm | 52 ++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 13 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index a1b71270b4760a..4f7c43f59d93f8 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -11345,7 +11345,7 @@ void gc_heap::set_region_plan_gen_num (heap_segment* region, int plan_gen_num) gen_num, plan_gen_num, supposed_plan_gen_num, ((plan_gen_num < supposed_plan_gen_num) ? "DEMOTED" : "ND"))); - if (plan_gen_num < supposed_plan_gen_num) + if ((plan_gen_num < supposed_plan_gen_num) && (heap_segment_pinned_survived (region) != 0)) { if (!settings.demotion) { @@ -14052,7 +14052,7 @@ gc_heap::init_gc_heap (int h_number) { stomp_write_barrier_initialize( #if defined(USE_REGIONS) - g_gc_lowest_address, g_gc_highest_address, + ephemeral_low, ephemeral_high, map_region_to_generation_skewed, (uint8_t)min_segment_size_shr #elif defined(MULTIPLE_HEAPS) reinterpret_cast(1), reinterpret_cast(~0) @@ -36675,11 +36675,21 @@ BOOL gc_heap::find_card_dword (size_t& cardw, size_t cardw_end) while (1) { // Find a non-zero bundle - while ((cardb < end_cardb) && (card_bundle_set_p (cardb) == 0)) + while (cardb < end_cardb) { - cardb++; + uint32_t cbw = card_bundle_table[card_bundle_word(cardb)] >> card_bundle_bit (cardb); + DWORD bit_index; + if (BitScanForward (&bit_index, cbw)) + { + cardb += bit_index; + break; + } + else + { + cardb += sizeof(cbw)*8 - card_bundle_bit (cardb); + } } - if (cardb == end_cardb) + if (cardb >= end_cardb) return FALSE; uint32_t* card_word = &card_table[max(card_bundle_cardw (cardb),cardw)]; @@ -36762,14 +36772,23 @@ BOOL gc_heap::find_card(uint32_t* card_table, // Find the first card which is set last_card_word = &card_table [card_word (card)]; bit_position = card_bit (card); - card_word_value = (*last_card_word) >> bit_position; +#ifdef CARD_BUNDLE + // if we have card bundles, consult them before fetching a new card word + if (bit_position == 0) + { + card_word_value = 0; + } + else +#endif + { + card_word_value = (*last_card_word) >> bit_position; + } if (!card_word_value) { - bit_position = 0; #ifdef CARD_BUNDLE // Using the card bundle, go through the remaining card words between here and // card_word_end until we find one that is non-zero. - size_t lcw = card_word(card) + 1; + size_t lcw = card_word(card) + (bit_position != 0); if (gc_heap::find_card_dword (lcw, card_word_end) == FALSE) { return FALSE; @@ -36779,6 +36798,7 @@ BOOL gc_heap::find_card(uint32_t* card_table, last_card_word = &card_table [lcw]; card_word_value = *last_card_word; } + bit_position = 0; #else //CARD_BUNDLE // Go through the remaining card words between here and card_word_end until we find // one that is non-zero. @@ -36803,11 +36823,11 @@ BOOL gc_heap::find_card(uint32_t* card_table, // Look for the lowest bit set if (card_word_value) { - while (!(card_word_value & 1)) - { - bit_position++; - card_word_value = card_word_value / 2; - } + DWORD bit_index; + uint8_t res = BitScanForward (&bit_index, card_word_value); + assert (res != 0); + card_word_value >>= bit_index; + bit_position += bit_index; } // card is the card word index * card size + the bit index within the card diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index 9a159d4d446405..28c7d4dc62bdac 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -25,6 +25,8 @@ EXTERN g_ephemeral_high:QWORD EXTERN g_lowest_address:QWORD EXTERN g_highest_address:QWORD EXTERN g_card_table:QWORD +EXTERN g_region_shr:BYTE +EXTERN g_region_to_generation_table:QWORD ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES EXTERN g_card_bundle_table:QWORD @@ -335,6 +337,55 @@ endif cmp rcx, [g_ephemeral_high] jnb Exit + ; do the following checks only if we are allowed to trash rax + ; otherwise we don't have enough registers +ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + mov rax, rcx + + mov cl, [g_region_shr] + test cl, cl + je SkipCheck + + ; check if the source is in gen 2 - then it's not an ephemeral pointer + shr rax, cl + add rax, [g_region_to_generation_table] + cmp byte ptr [rax], 22h + je Exit + + ; check if the destination happens to be in gen 0 + mov rax, rdi + shr rax, cl + add rax, [g_region_to_generation_table] + cmp byte ptr [rax], 0 + je Exit + SkipCheck: + + ; compute card table bit + mov rcx, rdi + mov al, 1 + shr rcx, 8 + and cl, 7 + shl al, cl + + ; move current rdi value into rcx and then increment the pointers + mov rcx, rdi + add rsi, 8h + add rdi, 8h + + ; Check if we need to update the card table + ; Calc pCardByte + shr rcx, 0Bh + add rcx, [g_card_table] + + ; Check if this card table bit is already set + test byte ptr [rcx], al + je SetCardTableBit + REPRET + + SetCardTableBit: + lock or byte ptr [rcx], al +else + ; move current rdi value into rcx and then increment the pointers mov rcx, rdi add rsi, 8h @@ -352,6 +403,7 @@ endif UpdateCardTable: mov byte ptr [rcx], 0FFh +endif ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES ; check if we need to update the card bundle table ; restore destination address from rdi - rdi has been incremented by 8 already From 08fbf50c4b809f6e1292c32fe67dc177f0ca31ba Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Thu, 31 Mar 2022 17:58:26 +0200 Subject: [PATCH 09/23] Cleanup. --- src/coreclr/gc/gc.cpp | 178 +++++++++++++++++++++------------------- src/coreclr/gc/gcpriv.h | 5 +- 2 files changed, 97 insertions(+), 86 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 4f7c43f59d93f8..1e6f85aa98af4f 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -3528,6 +3528,24 @@ size_t get_basic_region_index_for_address (uint8_t* address) return (basic_region_index - ((size_t)g_gc_lowest_address >> gc_heap::min_segment_size_shr)); } +inline int get_gen_num_for_address (uint8_t *address) +{ + assert ((g_gc_lowest_address <= address) && (address < g_gc_highest_address)); + size_t region_index = (size_t)address >> gc_heap::min_segment_size_shr; + int gen_num = gc_heap::map_region_to_generation_skewed[region_index] & 0x0f; + assert ((0 <= gen_num) && (gen_num <= 2)); + return gen_num; +} + +inline int get_plan_gen_num_for_address (uint8_t *address) +{ + assert ((g_gc_lowest_address <= address) && (address < g_gc_highest_address)); + size_t region_index = (size_t)address >> gc_heap::min_segment_size_shr; + int plan_gen_num = gc_heap::map_region_to_generation_skewed[region_index] >> 4; + assert ((0 <= plan_gen_num) && (plan_gen_num <= 2)); + return plan_gen_num; +} + // Go from a random address to its region info. The random address could be // in one of the basic regions of a larger region so we need to check for that. inline @@ -11291,13 +11309,13 @@ void gc_heap::set_region_gen_num (heap_segment* region, int gen_num) uint8_t* region_start = get_region_start (region); uint8_t* region_end = heap_segment_reserved (region); - size_t region_index_start = ((size_t)region_start) >> min_segment_size_shr; - size_t region_index_end = ((size_t)region_end) >> min_segment_size_shr; + size_t region_index_start = get_basic_region_index_for_address (region_start); + size_t region_index_end = get_basic_region_index_for_address (region_end); uint8_t entry = (uint8_t)((gen_num << 4) | gen_num); for (size_t region_index = region_index_start; region_index < region_index_end; region_index++) { assert (gen_num <= max_generation); - map_region_to_generation_skewed[region_index] = entry; + map_region_to_generation[region_index] = entry; } if (gen_num <= soh_gen1) { @@ -11364,12 +11382,12 @@ void gc_heap::set_region_plan_gen_num (heap_segment* region, int plan_gen_num) uint8_t* region_start = get_region_start (region); uint8_t* region_end = heap_segment_reserved (region); - size_t region_index_start = ((size_t)region_start) >> min_segment_size_shr; - size_t region_index_end = ((size_t)region_end) >> min_segment_size_shr; + size_t region_index_start = get_basic_region_index_for_address (region_start); + size_t region_index_end = get_basic_region_index_for_address (region_end); for (size_t region_index = region_index_start; region_index < region_index_end; region_index++) { - assert (gen_num <= max_generation); - map_region_to_generation_skewed[region_index] = (map_region_to_generation_skewed[region_index] & 0x0f) | (plan_gen_num & 0xf0); + assert (plan_gen_num <= max_generation); + map_region_to_generation[region_index] = (plan_gen_num << 4) | (map_region_to_generation[region_index] & 0x0f); } } @@ -25518,6 +25536,68 @@ void gc_heap::record_mark_time (uint64_t& mark_time, } #endif // FEATURE_EVENT_TRACE +#ifdef USE_REGIONS +void gc_heap::verify_region_to_generation_map() +{ +#ifdef _DEBUG + uint8_t* local_ephemeral_low = MAX_PTR; + uint8_t* local_ephemeral_high = nullptr; + for (int gen_number = soh_gen0; gen_number < total_generation_count; gen_number++) + { +#ifdef MULTIPLE_HEAPS + for (int i = 0; i < n_heaps; i++) + { + gc_heap* hp = g_heaps[i]; +#else //MULTIPLE_HEAPS + { + gc_heap* hp = pGenGCHeap; +#endif //MULTIPLE_HEAPS + generation *gen = hp->generation_of (gen_number); + for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region)) + { + size_t region_index_start = get_basic_region_index_for_address (get_region_start (region)); + size_t region_index_end = get_basic_region_index_for_address (heap_segment_reserved (region)); + int gen_num = min (gen_number, soh_gen2); + assert (gen_num == heap_segment_gen_num (region)); + int plan_gen_num = heap_segment_plan_gen_num (region); + for (size_t region_index = region_index_start; region_index < region_index_end; region_index++) + { + assert ((map_region_to_generation[region_index] & 0x0f) == gen_num); + assert ((map_region_to_generation[region_index] >> 4) == plan_gen_num); + } + } + } + } +#endif //_DEBUG +} + +// recompute ephemeral range - it may have become too large because of temporary allocation +// and deallocation of regions +void gc_heap::compute_ephemeral_range() +{ + ephemeral_low = MAX_PTR; + ephemeral_high = nullptr; + for (int gen_number = soh_gen0; gen_number <= soh_gen1; gen_number++) + { +#ifdef MULTIPLE_HEAPS + for (int i = 0; i < n_heaps; i++) + { + gc_heap* hp = g_heaps[i]; +#else //MULTIPLE_HEAPS + { + gc_heap* hp = pGenGCHeap; +#endif //MULTIPLE_HEAPS + generation *gen = hp->generation_of (gen_number); + for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region)) + { + ephemeral_low = min (ephemeral_low, get_region_start (region)); + ephemeral_high = max (ephemeral_high, heap_segment_reserved (region)); + } + } + } +} +#endif + void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) { assert (settings.concurrent == FALSE); @@ -25651,31 +25731,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) special_sweep_p = false; region_count = global_region_allocator.get_used_region_count(); grow_mark_list_piece(); - - memset (map_region_to_generation, 0x22, region_count*sizeof(map_region_to_generation[0])); - ephemeral_low = MAX_PTR; - ephemeral_high = nullptr; - for (int gen_number = soh_gen0; gen_number <= soh_gen1; gen_number++) - { -#ifdef MULTIPLE_HEAPS - for (int i = 0; i < n_heaps; i++) - { - gc_heap* hp = g_heaps[i]; -#else //MULTIPLE_HEAPS - { - gc_heap* hp = pGenGCHeap; -#endif //MULTIPLE_HEAPS - generation *gen = hp->generation_of (gen_number); - for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region)) - { - uint8_t* addr = heap_segment_mem (region); - size_t index = (size_t)addr >> gc_heap::min_segment_size_shr; - map_region_to_generation_skewed[index] = 0xf0 | (uint8_t)gen_number; - ephemeral_low = min (ephemeral_low, addr); - ephemeral_high = max (ephemeral_high, heap_segment_reserved (region)); - } - } - } + verify_region_to_generation_map(); + compute_ephemeral_range(); #endif //USE_REGIONS GCToEEInterface::BeforeGcScanRoots(condemned_gen_number, /* is_bgc */ false, /* is_concurrent */ false); @@ -30061,33 +30118,8 @@ void gc_heap::plan_phase (int condemned_gen_number) #endif //!USE_REGIONS #ifdef USE_REGIONS - memset (map_region_to_generation, 0x22, region_count*sizeof(map_region_to_generation[0])); - ephemeral_low = MAX_PTR; - ephemeral_high = nullptr; - for (int gen_number = soh_gen0; gen_number <= soh_gen1; gen_number++) - { - uint8_t table_entry = (uint8_t)((gen_number << 4) | gen_number); -#ifdef MULTIPLE_HEAPS - for (int i = 0; i < n_heaps; i++) - { - gc_heap* hp = g_heaps[i]; -#else //MULTIPLE_HEAPS - { - gc_heap* hp = pGenGCHeap; -#endif //MULTIPLE_HEAPS - generation *gen = hp->generation_of (gen_number); - for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region)) - { - uint8_t* addr = heap_segment_mem (region); - size_t index = (size_t)addr >> gc_heap::min_segment_size_shr; - map_region_to_generation_skewed[index] = table_entry; - ephemeral_low = min (ephemeral_low, addr); - ephemeral_high = max (ephemeral_high, heap_segment_reserved (region)); - } - } - } - stomp_write_barrier_ephemeral (ephemeral_low, ephemeral_high, - map_region_to_generation_skewed, (uint8_t)min_segment_size_shr); + verify_region_to_generation_map (); + compute_ephemeral_range(); #endif //USE_REGIONS #ifdef MULTIPLE_HEAPS @@ -32405,26 +32437,8 @@ void gc_heap::relocate_phase (int condemned_gen_number, #endif //FEATURE_EVENT_TRACE #ifdef USE_REGIONS - for (int gen_number = soh_gen0; gen_number <= soh_gen2; gen_number++) - { -#ifdef MULTIPLE_HEAPS - for (int i = 0; i < n_heaps; i++) - { - gc_heap* hp = g_heaps[i]; -#else //MULTIPLE_HEAPS - { - gc_heap* hp = pGenGCHeap; -#endif //MULTIPLE_HEAPS - generation *gen = hp->generation_of (gen_number); - for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region)) - { - uint8_t* addr = heap_segment_mem (region); - size_t index = (size_t)addr >> gc_heap::min_segment_size_shr; - int plan_gen = heap_segment_plan_gen_num (region); - map_region_to_generation_skewed[index] = (uint8_t)((plan_gen<<4) | (map_region_to_generation_skewed[index] & 0x0f)); - } - } - } + verify_region_to_generation_map(); + compute_ephemeral_range(); #endif //USE_REGIONS #ifdef MULTIPLE_HEAPS @@ -36925,8 +36939,7 @@ gc_heap::mark_through_cards_helper (uint8_t** poo, size_t& n_gen, if ((child_object < ephemeral_low) || (ephemeral_high <= child_object)) return; - size_t child_region_index = (size_t)child_object >> gc_heap::min_segment_size_shr; - int child_object_gen = map_region_to_generation_skewed[child_region_index] & 0x0f; + int child_object_gen = get_gen_num_for_address (child_object); assert (child_object_gen == get_region_gen_num (child_object)); int saved_child_object_gen = child_object_gen; uint8_t* saved_child_object = child_object; @@ -36939,8 +36952,7 @@ gc_heap::mark_through_cards_helper (uint8_t** poo, size_t& n_gen, if (fn == &gc_heap::relocate_address) { - size_t new_child_region_index = (size_t)*poo >> gc_heap::min_segment_size_shr; - child_object_gen = map_region_to_generation_skewed[new_child_region_index] >> 4; + child_object_gen = get_plan_gen_num_for_address (*poo); assert (child_object_gen == get_region_plan_gen_num (*poo)); } diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h index efabf004f15ec6..f745536de46ffd 100644 --- a/src/coreclr/gc/gcpriv.h +++ b/src/coreclr/gc/gcpriv.h @@ -1443,11 +1443,10 @@ class gc_heap heap_segment* relocate_advance_to_non_sip (heap_segment* region); PER_HEAP_ISOLATED - int generation_of_addr (uint8_t* addr); + void verify_region_to_generation_map(); PER_HEAP_ISOLATED - int plan_generation_of_addr (uint8_t* addr); - + void compute_ephemeral_range(); #ifdef STRESS_REGIONS PER_HEAP void pin_by_gc (uint8_t* object); From 7e864c7d023101fe4f762e7223b6f4cf08ee4670 Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Mon, 4 Apr 2022 17:14:39 +0200 Subject: [PATCH 10/23] Commit only the part of the map_region_to_generation table that is needed. --- src/coreclr/gc/gc.cpp | 24 ++++++++++++++++++------ src/coreclr/gc/gcpriv.h | 3 +++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 1e6f85aa98af4f..2f2cf1ad60f93d 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -4045,6 +4045,13 @@ size_t size_seg_mapping_table_of (uint8_t* from, uint8_t* end) return sizeof (seg_mapping)*((size_t)(end - from) >> gc_heap::min_segment_size_shr); } +size_t size_region_to_generation_table_of (uint8_t* from, uint8_t* end) +{ + dprintf (1, ("from: %Ix, end: %Ix, size: %Ix", from, end, + sizeof (uint8_t)*(((size_t)(end - from) >> gc_heap::min_segment_size_shr)))); + return sizeof (uint8_t)*((size_t)(end - from) >> gc_heap::min_segment_size_shr); +} + inline size_t seg_mapping_word_of (uint8_t* add) { @@ -8563,6 +8570,9 @@ void gc_heap::get_card_table_element_sizes (uint8_t* start, uint8_t* end, size_t sizes[software_write_watch_table_element] = SoftwareWriteWatch::GetTableByteSize(start, end); } #endif //FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP && BACKGROUND_GC +#ifdef USE_REGIONS + sizes[region_to_generation_table_element] = size_region_to_generation_table_of (start, end); +#endif //USE_REGIONS sizes[seg_mapping_table_element] = size_seg_mapping_table_of (start, end); #ifdef BACKGROUND_GC if (gc_can_use_concurrent) @@ -8587,6 +8597,9 @@ void gc_heap::get_card_table_element_layout (uint8_t* start, uint8_t* end, size_ #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP sizeof(size_t), // software_write_watch_table_element #endif //FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +#ifdef USE_REGIONS + sizeof (uint8_t), // region_to_generation_table_element +#endif //USE_REGIONS sizeof (uint8_t*), // seg_mapping_table_element #ifdef BACKGROUND_GC // In order to avoid a dependency between commit_mark_array_by_range and this logic, it is easier to make sure @@ -8865,6 +8878,11 @@ uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end) } #endif //FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP && BACKGROUND_GC +#ifdef USE_REGIONS + map_region_to_generation = mem + card_table_element_layout[region_to_generation_table_element]; + map_region_to_generation_skewed = map_region_to_generation - size_region_to_generation_table_of (0, g_gc_lowest_address); +#endif //USE_REGIONS + seg_mapping_table = (seg_mapping*)(mem + card_table_element_layout[seg_mapping_table_element]); seg_mapping_table = (seg_mapping*)((uint8_t*)seg_mapping_table - size_seg_mapping_table_of (0, (align_lower_segment (g_gc_lowest_address)))); @@ -13205,12 +13223,6 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, if (!allocate_initial_regions(number_of_heaps)) return E_OUTOFMEMORY; - - size_t num_region_units = (g_gc_highest_address - g_gc_lowest_address) >> min_segment_size_shr; - map_region_to_generation = new (nothrow) uint8_t[num_region_units]; - if (map_region_to_generation == nullptr) - return E_OUTOFMEMORY; - map_region_to_generation_skewed = map_region_to_generation - ((size_t)g_gc_lowest_address >> min_segment_size_shr); } else { diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h index f745536de46ffd..f3554940fd24f6 100644 --- a/src/coreclr/gc/gcpriv.h +++ b/src/coreclr/gc/gcpriv.h @@ -1202,6 +1202,9 @@ enum bookkeeping_element #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP software_write_watch_table_element, #endif +#ifdef USE_REGIONS + region_to_generation_table_element, +#endif //USE_REGIONS seg_mapping_table_element, #ifdef BACKGROUND_GC mark_array_element, From 5e7be149435de3df1de2e1304aa40d2a9c0f4a86 Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Wed, 6 Apr 2022 16:11:52 +0200 Subject: [PATCH 11/23] Initial version of more precise write barrier helpers for x64 / Linux. --- src/coreclr/vm/amd64/jithelpers_fast.S | 71 ++++++-- .../vm/amd64/jithelpers_fastwritebarriers.S | 159 ++++++++++++++++++ 2 files changed, 220 insertions(+), 10 deletions(-) diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index 63167ae2ae0d85..6ad69b99875b20 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -138,6 +138,31 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT .balign 16 Exit: REPRET + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + #else // JIT_WriteBarrier_PostGrow64 @@ -330,6 +355,34 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT cmp rcx, [rax] jnb Exit_ByRefWriteBarrier + mov rax, rcx + + PREPARE_EXTERNAL_VAR g_region_shr, rcx + mov cl, [rcx] + test cl, cl + je SkipCheck_ByRefWriteBarrier + + // check if the source is in gen 2 - then it's not an ephemeral pointer + shr rax, cl + PREPARE_EXTERNAL_VAR g_region_to_generation_table, r10 + mov r10, [r10] + cmp byte ptr [rax + r10], 0x22 + je Exit_ByRefWriteBarrier + + // check if the destination happens to be in gen 0 + mov rax, rdi + shr rax, cl + cmp byte ptr [rax + r10], 0 + je Exit_ByRefWriteBarrier + SkipCheck_ByRefWriteBarrier: + + // compute card table bit + mov rcx, rdi + mov al, 1 + shr rcx, 8 + and cl, 7 + shl al, cl + // move current rdi value into rcx and then increment the pointers mov rcx, rdi add rsi, 0x8 @@ -337,19 +390,17 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT // Check if we need to update the card table // Calc pCardByte - shr rcx, 0x0B - - PREPARE_EXTERNAL_VAR g_card_table, rax - mov rax, [rax] - - // Check if this card is dirty - cmp byte ptr [rcx + rax], 0xFF + shr rcx, 0xB + PREPARE_EXTERNAL_VAR g_card_table, r10 + mov r10, [r10] - jne UpdateCardTable_ByRefWriteBarrier + // Check if this card table bit is already set + test byte ptr [rcx + r10], al + je SetCardTableBit_ByRefWriteBarrier REPRET - UpdateCardTable_ByRefWriteBarrier: - mov byte ptr [rcx + rax], 0xFF + SetCardTableBit_ByRefWriteBarrier: + lock or byte ptr [rcx + r10], al #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES // Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already) diff --git a/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S b/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S index 70d2f2072aa51a..c3560dc4dd212a 100644 --- a/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S +++ b/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S @@ -213,6 +213,80 @@ LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT #endif + .balign 8 +LEAF_ENTRY JIT_WriteBarrier_Region64, _TEXT + + // Do the move into the GC . It is correct to take an AV here, the EH code + // figures out that this came from a WriteBarrier and correctly maps it back + // to the managed method which called the WriteBarrier (see setup in + // InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rdi], rsi + + mov r8, rdi + +PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionToGeneration + movabs rax, 0xF0F0F0F0F0F0F0F0 + +PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrDest + shr rdi, 0x16 // compute region index + + // Check whether the region we're storing into is gen 0 - nothing to do in this case + cmp byte ptr [rdi + rax], 0 + jne NotGen0_Region64 + REPRET + + NOP_2_BYTE // padding for alignment of constant + + NotGen0_Region64: +PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Lower + movabs r9, 0xF0F0F0F0F0F0F0F0 + cmp rsi, r9 + jae NotLow_Region64 + ret + NotLow_Region64: +PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Upper + movabs r9, 0xF0F0F0F0F0F0F0F0 + cmp rsi, r9 + jb NotHigh_Region64 + REPRET + NotHigh_Region64: +PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrSrc + shr rsi, 0x16 // compute region index + mov dl, [rsi + rax] + cmp dl, [rdi + rax] + jb IsOldToYoung_Region64 + REPRET + nop + + IsOldToYoung_Region64: +PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardTable + movabs rax, 0xF0F0F0F0F0F0F0F0 + + mov ecx, r8d + shr r8, 0xB + shr ecx, 8 + and ecx, 7 + mov dl, 1 + shl dl, cl + test byte ptr [r8 + rax], dl + je UpdateCardTable_Region64 + REPRET + + UpdateCardTable_Region64: + lock or byte ptr [r8 + rax], dl +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES +PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardBundleTable + movabs rax, 0xF0F0F0F0F0F0F0F0 + shr r8, 0x0A + cmp byte ptr [r8 + rax], 0xFF + jne UpdateCardBundleTable_Region64 + REPRET + + UpdateCardBundleTable_Region64: + mov byte ptr [r8 + rax], 0xFF +#endif + ret +LEAF_END_MARKED JIT_WriteBarrier_Region64, _TEXT #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP @@ -457,4 +531,89 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardBundleTable LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_SVR64, _TEXT #endif + .balign 8 +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_Region64, _TEXT + + // Do the move into the GC . It is correct to take an AV here, the EH code + // figures out that this came from a WriteBarrier and correctly maps it back + // to the managed method which called the WriteBarrier (see setup in + // InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rdi], rsi + + // Update the write watch table if necessary + mov rax, rdi +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_WriteWatchTable + movabs r8, 0xF0F0F0F0F0F0F0F0 + shr rax, 0x0C // SoftwareWriteWatch::AddressToTableByteIndexShift + add rax, r8 + mov r8, rdi +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrDest + shr rdi, 0x16 // compute region index + cmp byte ptr [rax], 0x0 + jne CheckGen0_WriteWatch_Region64 + mov byte ptr [rax], 0xFF + CheckGen0_WriteWatch_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration + mov rax, 0xF0F0F0F0F0F0F0F0 + + // Check whether the region we're storing into is gen 0 - nothing to do in this case + cmp byte ptr [rdi + rax], 0 + jne NotGen0_WriteWatch_Region64 + REPRET + + NOP_2_BYTE // padding for alignment of constant + NOP_2_BYTE // padding for alignment of constant + NOP_2_BYTE // padding for alignment of constant + + NotGen0_WriteWatch_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Lower + movabs r9, 0xF0F0F0F0F0F0F0F0 + cmp rsi, r9 + jae NotLow_WriteWatch_Region64 + ret + NotLow_WriteWatch_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Upper + mov r9, 0xF0F0F0F0F0F0F0F0 + cmp rsi, r9 + jb NotHigh_WriteWatch_Region64 + REPRET + NotHigh_WriteWatch_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrSrc + shr rsi, 0x16 // compute region index + mov dl, [rsi + rax] + cmp dl, [rdi + rax] + jb IsOldToYoung_WriteWatch_Region64 + REPRET + nop + + IsOldToYoung_WriteWatch_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardTable + mov rax, 0xF0F0F0F0F0F0F0F0 + + mov ecx, r8d + shr r8, 0xB + shr ecx, 8 + and ecx, 7 + mov dl, 1 + shl dl, cl + test byte ptr [r8 + rax], dl + je UpdateCardTable_WriteWatch_Region64 + REPRET + + UpdateCardTable_WriteWatch_Region64: + lock or byte ptr [r8 + rax], dl +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardBundleTable + mov rax, 0xF0F0F0F0F0F0F0F0 + shr r8, 0x0A + cmp byte ptr [r8 + rax], 0xFF + jne UpdateCardBundleTable_WriteWatch_Region64 + REPRET + + UpdateCardBundleTable_WriteWatch_Region64: + mov byte ptr [r8 + rax], 0xFF +#endif + ret +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_Region64, _TEXT + #endif From aacd62122f734426fb2a676582f0dbedbe5496fd Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Thu, 7 Apr 2022 15:16:19 +0200 Subject: [PATCH 12/23] Attempt to fix OSX code alignment issues following pattern seen in other code. --- .../vm/amd64/jithelpers_fastwritebarriers.S | 39 ++++++++++++------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S b/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S index c3560dc4dd212a..7176bc331e76b0 100644 --- a/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S +++ b/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S @@ -232,7 +232,8 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrDest // Check whether the region we're storing into is gen 0 - nothing to do in this case cmp byte ptr [rdi + rax], 0 - jne NotGen0_Region64 + .byte 0x75, 0x04 + //jne NotGen0_Region64 REPRET NOP_2_BYTE // padding for alignment of constant @@ -241,20 +242,23 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrDest PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Lower movabs r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 - jae NotLow_Region64 + .byte 0x73, 0x01 + // jae NotLow_Region64 ret NotLow_Region64: PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Upper movabs r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 - jb NotHigh_Region64 + .byte 0x72, 0x02 + // jb NotHigh_Region64 REPRET NotHigh_Region64: PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrSrc shr rsi, 0x16 // compute region index mov dl, [rsi + rax] cmp dl, [rdi + rax] - jb IsOldToYoung_Region64 + .byte 0x72, 0x03 + // jb IsOldToYoung_Region64 REPRET nop @@ -269,7 +273,8 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardTable mov dl, 1 shl dl, cl test byte ptr [r8 + rax], dl - je UpdateCardTable_Region64 + .byte 0x74, 0x02 + // je UpdateCardTable_Region64 REPRET UpdateCardTable_Region64: @@ -279,7 +284,8 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardBundleTable movabs rax, 0xF0F0F0F0F0F0F0F0 shr r8, 0x0A cmp byte ptr [r8 + rax], 0xFF - jne UpdateCardBundleTable_Region64 + .byte 0x75, 0x02 + // jne UpdateCardBundleTable_Region64 REPRET UpdateCardBundleTable_Region64: @@ -550,7 +556,8 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_WriteWatchTable PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrDest shr rdi, 0x16 // compute region index cmp byte ptr [rax], 0x0 - jne CheckGen0_WriteWatch_Region64 + .byte 0x75, 0x03 + // jne CheckGen0_WriteWatch_Region64 mov byte ptr [rax], 0xFF CheckGen0_WriteWatch_Region64: PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration @@ -558,7 +565,8 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration // Check whether the region we're storing into is gen 0 - nothing to do in this case cmp byte ptr [rdi + rax], 0 - jne NotGen0_WriteWatch_Region64 + .byte 0x75, 0x08 + // jne NotGen0_WriteWatch_Region64 REPRET NOP_2_BYTE // padding for alignment of constant @@ -569,20 +577,23 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Lower movabs r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 - jae NotLow_WriteWatch_Region64 + .byte 0x73, 0x01 + // jae NotLow_WriteWatch_Region64 ret NotLow_WriteWatch_Region64: PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Upper mov r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 - jb NotHigh_WriteWatch_Region64 + .byte 0x72, 0x02 + // jb NotHigh_WriteWatch_Region64 REPRET NotHigh_WriteWatch_Region64: PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrSrc shr rsi, 0x16 // compute region index mov dl, [rsi + rax] cmp dl, [rdi + rax] - jb IsOldToYoung_WriteWatch_Region64 + .byte 0x72, 0x03 + // jb IsOldToYoung_WriteWatch_Region64 REPRET nop @@ -597,7 +608,8 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardTable mov dl, 1 shl dl, cl test byte ptr [r8 + rax], dl - je UpdateCardTable_WriteWatch_Region64 + .byte 0x74, 0x02 + // je UpdateCardTable_WriteWatch_Region64 REPRET UpdateCardTable_WriteWatch_Region64: @@ -607,7 +619,8 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardBundleTable mov rax, 0xF0F0F0F0F0F0F0F0 shr r8, 0x0A cmp byte ptr [r8 + rax], 0xFF - jne UpdateCardBundleTable_WriteWatch_Region64 + .byte 0x75, 0x02 + // jne UpdateCardBundleTable_WriteWatch_Region64 REPRET UpdateCardBundleTable_WriteWatch_Region64: From 1c95773047023217dbe791975efb5372af8faf21 Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Fri, 8 Apr 2022 10:54:59 +0200 Subject: [PATCH 13/23] Try replacing ugly .byte directives by jcc short. --- .../vm/amd64/jithelpers_fastwritebarriers.S | 39 +++++++------------ 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S b/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S index 7176bc331e76b0..b57980b8aa2355 100644 --- a/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S +++ b/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S @@ -232,8 +232,7 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrDest // Check whether the region we're storing into is gen 0 - nothing to do in this case cmp byte ptr [rdi + rax], 0 - .byte 0x75, 0x04 - //jne NotGen0_Region64 + jne short NotGen0_Region64 REPRET NOP_2_BYTE // padding for alignment of constant @@ -242,23 +241,20 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrDest PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Lower movabs r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 - .byte 0x73, 0x01 - // jae NotLow_Region64 + jae short NotLow_Region64 ret NotLow_Region64: PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Upper movabs r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 - .byte 0x72, 0x02 - // jb NotHigh_Region64 + jb short NotHigh_Region64 REPRET NotHigh_Region64: PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrSrc shr rsi, 0x16 // compute region index mov dl, [rsi + rax] cmp dl, [rdi + rax] - .byte 0x72, 0x03 - // jb IsOldToYoung_Region64 + jb short IsOldToYoung_Region64 REPRET nop @@ -273,8 +269,7 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardTable mov dl, 1 shl dl, cl test byte ptr [r8 + rax], dl - .byte 0x74, 0x02 - // je UpdateCardTable_Region64 + je short UpdateCardTable_Region64 REPRET UpdateCardTable_Region64: @@ -284,8 +279,7 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardBundleTable movabs rax, 0xF0F0F0F0F0F0F0F0 shr r8, 0x0A cmp byte ptr [r8 + rax], 0xFF - .byte 0x75, 0x02 - // jne UpdateCardBundleTable_Region64 + jne short UpdateCardBundleTable_Region64 REPRET UpdateCardBundleTable_Region64: @@ -556,8 +550,7 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_WriteWatchTable PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrDest shr rdi, 0x16 // compute region index cmp byte ptr [rax], 0x0 - .byte 0x75, 0x03 - // jne CheckGen0_WriteWatch_Region64 + jne short CheckGen0_WriteWatch_Region64 mov byte ptr [rax], 0xFF CheckGen0_WriteWatch_Region64: PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration @@ -565,8 +558,7 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration // Check whether the region we're storing into is gen 0 - nothing to do in this case cmp byte ptr [rdi + rax], 0 - .byte 0x75, 0x08 - // jne NotGen0_WriteWatch_Region64 + jne short NotGen0_WriteWatch_Region64 REPRET NOP_2_BYTE // padding for alignment of constant @@ -577,23 +569,20 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Lower movabs r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 - .byte 0x73, 0x01 - // jae NotLow_WriteWatch_Region64 + jae short NotLow_WriteWatch_Region64 ret NotLow_WriteWatch_Region64: PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Upper mov r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 - .byte 0x72, 0x02 - // jb NotHigh_WriteWatch_Region64 + jb short NotHigh_WriteWatch_Region64 REPRET NotHigh_WriteWatch_Region64: PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrSrc shr rsi, 0x16 // compute region index mov dl, [rsi + rax] cmp dl, [rdi + rax] - .byte 0x72, 0x03 - // jb IsOldToYoung_WriteWatch_Region64 + jb short IsOldToYoung_WriteWatch_Region64 REPRET nop @@ -608,8 +597,7 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardTable mov dl, 1 shl dl, cl test byte ptr [r8 + rax], dl - .byte 0x74, 0x02 - // je UpdateCardTable_WriteWatch_Region64 + je short UpdateCardTable_WriteWatch_Region64 REPRET UpdateCardTable_WriteWatch_Region64: @@ -619,8 +607,7 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardBundleTable mov rax, 0xF0F0F0F0F0F0F0F0 shr r8, 0x0A cmp byte ptr [r8 + rax], 0xFF - .byte 0x75, 0x02 - // jne UpdateCardBundleTable_WriteWatch_Region64 + jne short UpdateCardBundleTable_WriteWatch_Region64 REPRET UpdateCardBundleTable_WriteWatch_Region64: From 766df8f5ca99a78494a565bbe5badd9ef794c110 Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Fri, 8 Apr 2022 13:50:46 +0200 Subject: [PATCH 14/23] Undo change trying "jcc short" - doesn't work. Disable more precise write barrier for OSX. --- src/coreclr/gc/gc.cpp | 8 ++-- .../vm/amd64/jithelpers_fastwritebarriers.S | 39 ++++++++++++------- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 2f2cf1ad60f93d..f2f2670111fe18 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -2012,10 +2012,10 @@ void stomp_write_barrier_ephemeral (uint8_t* ephemeral_low, uint8_t* ephemeral_h args.is_runtime_suspended = true; args.ephemeral_low = ephemeral_low; args.ephemeral_high = ephemeral_high; -#ifdef USE_REGIONS +#if defined(USE_REGIONS) && !defined(TARGET_OSX) args.region_to_generation_table = map_region_to_generation_skewed; args.region_shr = region_shr; -#endif //USE_REGIONS +#endif //USE_REGIONS && !TARGET_OSX GCToEEInterface::StompWriteBarrier(&args); } @@ -2041,10 +2041,10 @@ void stomp_write_barrier_initialize(uint8_t* ephemeral_low, uint8_t* ephemeral_h args.ephemeral_low = ephemeral_low; args.ephemeral_high = ephemeral_high; -#ifdef USE_REGIONS +#if defined(USE_REGIONS) && !defined(TARGET_OSX) args.region_to_generation_table = map_region_to_generation_skewed; args.region_shr = region_shr; -#endif //USE_REGIONS +#endif //USE_REGIONS && !TARGET_OSX GCToEEInterface::StompWriteBarrier(&args); } diff --git a/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S b/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S index b57980b8aa2355..7176bc331e76b0 100644 --- a/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S +++ b/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S @@ -232,7 +232,8 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrDest // Check whether the region we're storing into is gen 0 - nothing to do in this case cmp byte ptr [rdi + rax], 0 - jne short NotGen0_Region64 + .byte 0x75, 0x04 + //jne NotGen0_Region64 REPRET NOP_2_BYTE // padding for alignment of constant @@ -241,20 +242,23 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrDest PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Lower movabs r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 - jae short NotLow_Region64 + .byte 0x73, 0x01 + // jae NotLow_Region64 ret NotLow_Region64: PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Upper movabs r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 - jb short NotHigh_Region64 + .byte 0x72, 0x02 + // jb NotHigh_Region64 REPRET NotHigh_Region64: PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrSrc shr rsi, 0x16 // compute region index mov dl, [rsi + rax] cmp dl, [rdi + rax] - jb short IsOldToYoung_Region64 + .byte 0x72, 0x03 + // jb IsOldToYoung_Region64 REPRET nop @@ -269,7 +273,8 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardTable mov dl, 1 shl dl, cl test byte ptr [r8 + rax], dl - je short UpdateCardTable_Region64 + .byte 0x74, 0x02 + // je UpdateCardTable_Region64 REPRET UpdateCardTable_Region64: @@ -279,7 +284,8 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardBundleTable movabs rax, 0xF0F0F0F0F0F0F0F0 shr r8, 0x0A cmp byte ptr [r8 + rax], 0xFF - jne short UpdateCardBundleTable_Region64 + .byte 0x75, 0x02 + // jne UpdateCardBundleTable_Region64 REPRET UpdateCardBundleTable_Region64: @@ -550,7 +556,8 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_WriteWatchTable PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrDest shr rdi, 0x16 // compute region index cmp byte ptr [rax], 0x0 - jne short CheckGen0_WriteWatch_Region64 + .byte 0x75, 0x03 + // jne CheckGen0_WriteWatch_Region64 mov byte ptr [rax], 0xFF CheckGen0_WriteWatch_Region64: PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration @@ -558,7 +565,8 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration // Check whether the region we're storing into is gen 0 - nothing to do in this case cmp byte ptr [rdi + rax], 0 - jne short NotGen0_WriteWatch_Region64 + .byte 0x75, 0x08 + // jne NotGen0_WriteWatch_Region64 REPRET NOP_2_BYTE // padding for alignment of constant @@ -569,20 +577,23 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Lower movabs r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 - jae short NotLow_WriteWatch_Region64 + .byte 0x73, 0x01 + // jae NotLow_WriteWatch_Region64 ret NotLow_WriteWatch_Region64: PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Upper mov r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 - jb short NotHigh_WriteWatch_Region64 + .byte 0x72, 0x02 + // jb NotHigh_WriteWatch_Region64 REPRET NotHigh_WriteWatch_Region64: PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrSrc shr rsi, 0x16 // compute region index mov dl, [rsi + rax] cmp dl, [rdi + rax] - jb short IsOldToYoung_WriteWatch_Region64 + .byte 0x72, 0x03 + // jb IsOldToYoung_WriteWatch_Region64 REPRET nop @@ -597,7 +608,8 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardTable mov dl, 1 shl dl, cl test byte ptr [r8 + rax], dl - je short UpdateCardTable_WriteWatch_Region64 + .byte 0x74, 0x02 + // je UpdateCardTable_WriteWatch_Region64 REPRET UpdateCardTable_WriteWatch_Region64: @@ -607,7 +619,8 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardBundleTable mov rax, 0xF0F0F0F0F0F0F0F0 shr r8, 0x0A cmp byte ptr [r8 + rax], 0xFF - jne short UpdateCardBundleTable_WriteWatch_Region64 + .byte 0x75, 0x02 + // jne UpdateCardBundleTable_WriteWatch_Region64 REPRET UpdateCardBundleTable_WriteWatch_Region64: From 9c31f09410f2a2af573066c0bcaec6e5d48e4f37 Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Tue, 19 Apr 2022 12:32:43 +0200 Subject: [PATCH 15/23] Revert code changes in JIT_ByRefWriteBarrier to check whether the failures in OSX are due it. --- src/coreclr/vm/amd64/jithelpers_fast.S | 46 ++++++-------------------- 1 file changed, 10 insertions(+), 36 deletions(-) diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index 6ad69b99875b20..73a7cd4d4c6382 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -355,34 +355,6 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT cmp rcx, [rax] jnb Exit_ByRefWriteBarrier - mov rax, rcx - - PREPARE_EXTERNAL_VAR g_region_shr, rcx - mov cl, [rcx] - test cl, cl - je SkipCheck_ByRefWriteBarrier - - // check if the source is in gen 2 - then it's not an ephemeral pointer - shr rax, cl - PREPARE_EXTERNAL_VAR g_region_to_generation_table, r10 - mov r10, [r10] - cmp byte ptr [rax + r10], 0x22 - je Exit_ByRefWriteBarrier - - // check if the destination happens to be in gen 0 - mov rax, rdi - shr rax, cl - cmp byte ptr [rax + r10], 0 - je Exit_ByRefWriteBarrier - SkipCheck_ByRefWriteBarrier: - - // compute card table bit - mov rcx, rdi - mov al, 1 - shr rcx, 8 - and cl, 7 - shl al, cl - // move current rdi value into rcx and then increment the pointers mov rcx, rdi add rsi, 0x8 @@ -390,17 +362,19 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT // Check if we need to update the card table // Calc pCardByte - shr rcx, 0xB - PREPARE_EXTERNAL_VAR g_card_table, r10 - mov r10, [r10] + shr rcx, 0x0B + + PREPARE_EXTERNAL_VAR g_card_table, rax + mov rax, [rax] + + // Check if this card is dirty + cmp byte ptr [rcx + rax], 0xFF - // Check if this card table bit is already set - test byte ptr [rcx + r10], al - je SetCardTableBit_ByRefWriteBarrier + jne UpdateCardTable_ByRefWriteBarrier REPRET - SetCardTableBit_ByRefWriteBarrier: - lock or byte ptr [rcx + r10], al + UpdateCardTable_ByRefWriteBarrier: + mov byte ptr [rcx + rax], 0xFF #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES // Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already) From 30e1233da53a5b5863c65baa884fddda05473b91 Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Sun, 24 Apr 2022 12:41:13 +0200 Subject: [PATCH 16/23] Add a write barrier type for regions that sets a whole byte instead of just a single bit. This will allows us to determine the tradeoff between being more precise in the write barrier, which saves work in card marking, and being faster in the write barrier which causes more work in card marking. --- src/coreclr/gc/gc.cpp | 10 +- src/coreclr/gc/gcconfig.h | 1 + src/coreclr/gc/gcinterface.h | 3 + src/coreclr/vm/amd64/JitHelpers_Fast.asm | 12 +- .../vm/amd64/JitHelpers_FastWriteBarriers.asm | 191 ++++++++++-- src/coreclr/vm/amd64/jithelpers_fast.S | 63 +++- .../vm/amd64/jithelpers_fastwritebarriers.S | 281 ++++++++++++++---- src/coreclr/vm/amd64/jitinterfaceamd64.cpp | 275 ++++++++++++----- src/coreclr/vm/gcenv.ee.cpp | 2 + src/coreclr/vm/gcheaputilities.cpp | 1 + src/coreclr/vm/gcheaputilities.h | 1 + src/coreclr/vm/jitinterface.h | 8 +- 12 files changed, 678 insertions(+), 170 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 88d82b85c828d6..d752c6acffc46f 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -2014,10 +2014,11 @@ void stomp_write_barrier_ephemeral (uint8_t* ephemeral_low, uint8_t* ephemeral_h args.is_runtime_suspended = true; args.ephemeral_low = ephemeral_low; args.ephemeral_high = ephemeral_high; -#if defined(USE_REGIONS) && !defined(TARGET_OSX) +#ifdef USE_REGIONS args.region_to_generation_table = map_region_to_generation_skewed; args.region_shr = region_shr; -#endif //USE_REGIONS && !TARGET_OSX + args.region_use_bitwise_write_barrier = GCConfig::GetGCBitwiseWriteBarrier() != 0; +#endif //USE_REGIONS GCToEEInterface::StompWriteBarrier(&args); } @@ -2043,10 +2044,11 @@ void stomp_write_barrier_initialize(uint8_t* ephemeral_low, uint8_t* ephemeral_h args.ephemeral_low = ephemeral_low; args.ephemeral_high = ephemeral_high; -#if defined(USE_REGIONS) && !defined(TARGET_OSX) +#ifdef USE_REGIONS args.region_to_generation_table = map_region_to_generation_skewed; args.region_shr = region_shr; -#endif //USE_REGIONS && !TARGET_OSX + args.region_use_bitwise_write_barrier = GCConfig::GetGCBitwiseWriteBarrier() != 0; +#endif //USE_REGIONS GCToEEInterface::StompWriteBarrier(&args); } diff --git a/src/coreclr/gc/gcconfig.h b/src/coreclr/gc/gcconfig.h index 63e2ded8af3b20..7149527a326fe4 100644 --- a/src/coreclr/gc/gcconfig.h +++ b/src/coreclr/gc/gcconfig.h @@ -134,6 +134,7 @@ class GCConfigStringHolder INT_CONFIG (GCHeapHardLimitPOHPercent, "GCHeapHardLimitPOHPercent", "System.GC.HeapHardLimitPOHPercent", 0, "Specifies the GC heap POH usage as a percentage of the total memory") \ INT_CONFIG (GCEnabledInstructionSets, "GCEnabledInstructionSets", NULL, -1, "Specifies whether GC can use AVX2 or AVX512F - 0 for neither, 1 for AVX2, 3 for AVX512F")\ INT_CONFIG (GCConserveMem, "GCConserveMemory", NULL, 0, "Specifies how hard GC should try to conserve memory - values 0-9") \ + INT_CONFIG (GCBitwiseWriteBarrier, "GCBitwiseWriteBarrier", NULL, 0, "Specifies whether GC should use more precise but slower write barrier") \ // This class is responsible for retreiving configuration information // for how the GC should operate. diff --git a/src/coreclr/gc/gcinterface.h b/src/coreclr/gc/gcinterface.h index b96b01b9936eaa..855f78c69758b2 100644 --- a/src/coreclr/gc/gcinterface.h +++ b/src/coreclr/gc/gcinterface.h @@ -114,6 +114,9 @@ struct WriteBarrierParameters // shift count - how many bits to shift right to obtain region index from address uint8_t region_shr; + + // whether to use the more precise but slower write barrier + bool region_use_bitwise_write_barrier; }; struct EtwGCSettingsInfo diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index 0a4d3012a206ca..4ea80159c2ea33 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -21,6 +21,7 @@ EXTERN g_lowest_address:QWORD EXTERN g_highest_address:QWORD EXTERN g_card_table:QWORD EXTERN g_region_shr:BYTE +EXTERN g_region_use_bitwise_write_barrier:BYTE EXTERN g_region_to_generation_table:QWORD ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES @@ -355,6 +356,9 @@ ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP je Exit SkipCheck: + cmp [g_region_use_bitwise_write_barrier], 0 + je CheckCardTableByte + ; compute card table bit mov rcx, rdi mov al, 1 @@ -379,7 +383,9 @@ ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP SetCardTableBit: lock or byte ptr [rcx], al -else + jmp CheckCardBundle +endif +CheckCardTableByte: ; move current rdi value into rcx and then increment the pointers mov rcx, rdi @@ -398,7 +404,9 @@ else UpdateCardTable: mov byte ptr [rcx], 0FFh -endif + + CheckCardBundle: + ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES ; check if we need to update the card bundle table ; restore destination address from rdi - rdi has been incremented by 8 already diff --git a/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm b/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm index 3ca22469fce5f8..68ab221278442b 100644 --- a/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm +++ b/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm @@ -196,7 +196,7 @@ endif ret LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT -LEAF_ENTRY JIT_WriteBarrier_Region64, _TEXT +LEAF_ENTRY JIT_WriteBarrier_Byte_Region64, _TEXT align 8 ; Do the move into the GC . It is correct to take an AV here, the EH code @@ -207,10 +207,10 @@ LEAF_ENTRY JIT_WriteBarrier_Region64, _TEXT mov r8, rcx -PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionToGeneration +PATCH_LABEL JIT_WriteBarrier_Byte_Region64_Patch_Label_RegionToGeneration mov rax, 0F0F0F0F0F0F0F0F0h -PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrDest +PATCH_LABEL JIT_WriteBarrier_Byte_Region64_Patch_Label_RegionShrDest shr rcx, 16h ; compute region index ; Check whether the region we're storing into is gen 0 - nothing to do in this case @@ -221,19 +221,19 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrDest NOP_2_BYTE ; padding for alignment of constant NotGen0: -PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Lower +PATCH_LABEL JIT_WriteBarrier_Byte_Region64_Patch_Label_Lower mov r9, 0F0F0F0F0F0F0F0F0h cmp rdx, r9 jae NotLow ret NotLow: -PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Upper +PATCH_LABEL JIT_WriteBarrier_Byte_Region64_Patch_Label_Upper mov r9, 0F0F0F0F0F0F0F0F0h cmp rdx, r9 jb NotHigh REPRET NotHigh: -PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrSrc +PATCH_LABEL JIT_WriteBarrier_Byte_Region64_Patch_Label_RegionShrSrc shr rdx, 16h ; compute region index mov dl, [rdx + rax] cmp dl, [rcx + rax] @@ -242,7 +242,77 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrSrc nop IsOldToYoung: -PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardTable +PATCH_LABEL JIT_WriteBarrier_Byte_Region64_Patch_Label_CardTable + mov rax, 0F0F0F0F0F0F0F0F0h + + shr r8, 0Bh + cmp byte ptr [r8 + rax], 0FFh + jne UpdateCardTable + REPRET + + UpdateCardTable: + mov byte ptr [r8 + rax], 0FFh +ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + shr r8, 0Ah +PATCH_LABEL JIT_WriteBarrier_Byte_Region64_Patch_Label_CardBundleTable + mov rax, 0F0F0F0F0F0F0F0F0h + cmp byte ptr [r8 + rax], 0FFh + jne UpdateCardBundleTable + REPRET + + UpdateCardBundleTable: + mov byte ptr [r8 + rax], 0FFh +endif + ret +LEAF_END_MARKED JIT_WriteBarrier_Byte_Region64, _TEXT + +LEAF_ENTRY JIT_WriteBarrier_Bit_Region64, _TEXT + align 8 + + ; Do the move into the GC . It is correct to take an AV here, the EH code + ; figures out that this came from a WriteBarrier and correctly maps it back + ; to the managed method which called the WriteBarrier (see setup in + ; InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rcx], rdx + + mov r8, rcx + +PATCH_LABEL JIT_WriteBarrier_Bit_Region64_Patch_Label_RegionToGeneration + mov rax, 0F0F0F0F0F0F0F0F0h + +PATCH_LABEL JIT_WriteBarrier_Bit_Region64_Patch_Label_RegionShrDest + shr rcx, 16h ; compute region index + + ; Check whether the region we're storing into is gen 0 - nothing to do in this case + cmp byte ptr [rcx + rax], 0 + jne NotGen0 + REPRET + + NOP_2_BYTE ; padding for alignment of constant + + NotGen0: +PATCH_LABEL JIT_WriteBarrier_Bit_Region64_Patch_Label_Lower + mov r9, 0F0F0F0F0F0F0F0F0h + cmp rdx, r9 + jae NotLow + ret + NotLow: +PATCH_LABEL JIT_WriteBarrier_Bit_Region64_Patch_Label_Upper + mov r9, 0F0F0F0F0F0F0F0F0h + cmp rdx, r9 + jb NotHigh + REPRET + NotHigh: +PATCH_LABEL JIT_WriteBarrier_Bit_Region64_Patch_Label_RegionShrSrc + shr rdx, 16h ; compute region index + mov dl, [rdx + rax] + cmp dl, [rcx + rax] + jb isOldToYoung + REPRET + nop + + IsOldToYoung: +PATCH_LABEL JIT_WriteBarrier_Bit_Region64_Patch_Label_CardTable mov rax, 0F0F0F0F0F0F0F0F0h mov ecx, r8d @@ -257,8 +327,9 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardTable UpdateCardTable: lock or byte ptr [r8 + rax], dl + ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardBundleTable +PATCH_LABEL JIT_WriteBarrier_Bit_Region64_Patch_Label_CardBundleTable mov rax, 0F0F0F0F0F0F0F0F0h shr r8, 0Ah cmp byte ptr [r8 + rax], 0FFh @@ -269,7 +340,7 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardBundleTable mov byte ptr [r8 + rax], 0FFh endif ret -LEAF_END_MARKED JIT_WriteBarrier_Region64, _TEXT +LEAF_END_MARKED JIT_WriteBarrier_Bit_Region64, _TEXT endif @@ -481,7 +552,87 @@ LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_SVR64, _TEXT endif -LEAF_ENTRY JIT_WriteBarrier_WriteWatch_Region64, _TEXT +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_Byte_Region64, _TEXT + align 8 + + ; Do the move into the GC . It is correct to take an AV here, the EH code + ; figures out that this came from a WriteBarrier and correctly maps it back + ; to the managed method which called the WriteBarrier (see setup in + ; InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rcx], rdx + + ; Update the write watch table if necessary + mov rax, rcx +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_WriteWatchTable + mov r8, 0F0F0F0F0F0F0F0F0h + shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift + add rax, r8 + mov r8, rcx +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_RegionShrDest + shr rcx, 16h ; compute region index + cmp byte ptr [rax], 0h + jne JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_RegionToGeneration + mov byte ptr [rax], 0FFh + +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_RegionToGeneration + mov rax, 0F0F0F0F0F0F0F0F0h + + ; Check whether the region we're storing into is gen 0 - nothing to do in this case + cmp byte ptr [rcx + rax], 0 + jne NotGen0 + REPRET + + NOP_2_BYTE ; padding for alignment of constant + NOP_2_BYTE ; padding for alignment of constant + NOP_2_BYTE ; padding for alignment of constant + + NotGen0: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_Lower + mov r9, 0F0F0F0F0F0F0F0F0h + cmp rdx, r9 + jae NotLow + ret + NotLow: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_Upper + mov r9, 0F0F0F0F0F0F0F0F0h + cmp rdx, r9 + jb NotHigh + REPRET + NotHigh: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_RegionShrSrc + shr rdx, 16h ; compute region index + mov dl, [rdx + rax] + cmp dl, [rcx + rax] + jb isOldToYoung + REPRET + nop + + IsOldToYoung: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_CardTable + mov rax, 0F0F0F0F0F0F0F0F0h + + shr r8, 0Bh + cmp byte ptr [r8 + rax], 0FFh + jne UpdateCardTable + REPRET + + UpdateCardTable: + mov byte ptr [r8 + rax], 0FFh +ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + shr r8, 0Ah +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_CardBundleTable + mov rax, 0F0F0F0F0F0F0F0F0h + cmp byte ptr [r8 + rax], 0FFh + jne UpdateCardBundleTable + REPRET + + UpdateCardBundleTable: + mov byte ptr [r8 + rax], 0FFh +endif + ret +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_Byte_Region64, _TEXT + +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_Bit_Region64, _TEXT align 8 ; Do the move into the GC . It is correct to take an AV here, the EH code @@ -492,18 +643,18 @@ LEAF_ENTRY JIT_WriteBarrier_WriteWatch_Region64, _TEXT ; Update the write watch table if necessary mov rax, rcx -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_WriteWatchTable +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_WriteWatchTable mov r8, 0F0F0F0F0F0F0F0F0h shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift add rax, r8 mov r8, rcx -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrDest +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_RegionShrDest shr rcx, 16h ; compute region index cmp byte ptr [rax], 0h - jne JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration + jne JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_RegionToGeneration mov byte ptr [rax], 0FFh -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_RegionToGeneration mov rax, 0F0F0F0F0F0F0F0F0h ; Check whether the region we're storing into is gen 0 - nothing to do in this case @@ -516,19 +667,19 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration NOP_2_BYTE ; padding for alignment of constant NotGen0: -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Lower +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_Lower mov r9, 0F0F0F0F0F0F0F0F0h cmp rdx, r9 jae NotLow ret NotLow: -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Upper +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_Upper mov r9, 0F0F0F0F0F0F0F0F0h cmp rdx, r9 jb NotHigh REPRET NotHigh: -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrSrc +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_RegionShrSrc shr rdx, 16h ; compute region index mov dl, [rdx + rax] cmp dl, [rcx + rax] @@ -537,7 +688,7 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrSrc nop IsOldToYoung: -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardTable +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_CardTable mov rax, 0F0F0F0F0F0F0F0F0h mov ecx, r8d @@ -553,7 +704,7 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardTable UpdateCardTable: lock or byte ptr [r8 + rax], dl ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardBundleTable +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_CardBundleTable mov rax, 0F0F0F0F0F0F0F0F0h shr r8, 0Ah cmp byte ptr [r8 + rax], 0FFh @@ -564,7 +715,7 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardBundleTable mov byte ptr [r8 + rax], 0FFh endif ret -LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_Region64, _TEXT +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_Bit_Region64, _TEXT endif diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index 73a7cd4d4c6382..6823ea4474fc0d 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -355,6 +355,41 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT cmp rcx, [rax] jnb Exit_ByRefWriteBarrier + mov rax, rcx + + PREPARE_EXTERNAL_VAR g_region_shr, rcx + mov cl, [rcx] + test cl, cl + je SkipCheck_ByRefWriteBarrier + + // check if the source is in gen 2 - then it's not an ephemeral pointer + shr rax, cl + PREPARE_EXTERNAL_VAR g_region_to_generation_table, r10 + mov r10, [r10] + cmp byte ptr [rax + r10], 0x22 + je Exit_ByRefWriteBarrier + + // check if the destination happens to be in gen 0 + mov rax, rdi + shr rax, cl + cmp byte ptr [rax + r10], 0 + je Exit_ByRefWriteBarrier + SkipCheck_ByRefWriteBarrier: + + PREPARE_EXTERNAL_VAR g_card_table, r10 + mov r10, [r10] + + PREPARE_EXTERNAL_VAR g_region_use_bitwise_write_barrier, rax + cmp byte ptr [rax], 0 + je CheckCardTableByte_ByRefWriteBarrier + + // compute card table bit + mov ecx, edi + mov al, 1 + shr ecx, 8 + and cl, 7 + shl al, cl + // move current rdi value into rcx and then increment the pointers mov rcx, rdi add rsi, 0x8 @@ -362,19 +397,31 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT // Check if we need to update the card table // Calc pCardByte - shr rcx, 0x0B + shr rcx, 0xB + // Check if this card table bit is already set + test byte ptr [rcx + r10], al + je SetCardTableBit_ByRefWriteBarrier + REPRET - PREPARE_EXTERNAL_VAR g_card_table, rax - mov rax, [rax] + SetCardTableBit_ByRefWriteBarrier: + lock or byte ptr [rcx + r10], al - // Check if this card is dirty - cmp byte ptr [rcx + rax], 0xFF + jmp CheckCardBundle_ByRefWriteBarrier + + CheckCardTableByte_ByRefWriteBarrier: + // move current rdi value into rcx and then increment the pointers + mov rcx, rdi + add rsi, 0x8 + add rdi, 0x8 - jne UpdateCardTable_ByRefWriteBarrier + shr rcx, 0xB + cmp byte ptr [rcx + r10], 0xFF + jne SetCardTableByte_ByRefWriteBarrier REPRET + SetCardTableByte_ByRefWriteBarrier: + mov byte ptr [rcx + r10], 0xFF - UpdateCardTable_ByRefWriteBarrier: - mov byte ptr [rcx + rax], 0xFF + CheckCardBundle_ByRefWriteBarrier: #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES // Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already) diff --git a/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S b/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S index 7176bc331e76b0..f987751bdcb358 100644 --- a/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S +++ b/src/coreclr/vm/amd64/jithelpers_fastwritebarriers.S @@ -5,7 +5,7 @@ #include "unixasmmacros.inc" - .balign 8 + .balign 16 LEAF_ENTRY JIT_WriteBarrier_PreGrow64, _TEXT // Do the move into the GC . It is correct to take an AV here, the EH code // figures out that this came from a WriteBarrier and correctly maps it back @@ -26,7 +26,7 @@ PATCH_LABEL JIT_WriteBarrier_PreGrow64_Patch_Label_Lower #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES .byte 0x72, 0x4B #else - .byte 0x72, 0x23 + .byte 0x72, 0x2b #endif // jb Exit_PreGrow64 @@ -72,7 +72,7 @@ PATCH_LABEL JIT_WriteBarrier_PreGrow64_Patch_Label_CardBundleTable LEAF_END_MARKED JIT_WriteBarrier_PreGrow64, _TEXT - .balign 8 + .balign 16 // See comments for JIT_WriteBarrier_PreGrow (above). LEAF_ENTRY JIT_WriteBarrier_PostGrow64, _TEXT // Do the move into the GC . It is correct to take an AV here, the EH code @@ -95,9 +95,9 @@ PATCH_LABEL JIT_WriteBarrier_PostGrow64_Patch_Label_Lower cmp rsi, rax #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - .byte 0x72, 0x53 + .byte 0x72, 0x5b #else - .byte 0x72, 0x33 + .byte 0x72, 0x3b #endif // jb Exit_PostGrow64 @@ -109,9 +109,9 @@ PATCH_LABEL JIT_WriteBarrier_PostGrow64_Patch_Label_Upper cmp rsi, r8 #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - .byte 0x73, 0x43 + .byte 0x73, 0x4b #else - .byte 0x73, 0x23 + .byte 0x73, 0x2b #endif // jae Exit_PostGrow64 @@ -159,7 +159,7 @@ LEAF_END_MARKED JIT_WriteBarrier_PostGrow64, _TEXT #ifdef FEATURE_SVR_GC - .balign 8 + .balign 16 LEAF_ENTRY JIT_WriteBarrier_SVR64, _TEXT // // SVR GC has multiple heaps, so it cannot provide one single @@ -213,8 +213,83 @@ LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT #endif - .balign 8 -LEAF_ENTRY JIT_WriteBarrier_Region64, _TEXT + .balign 16 +LEAF_ENTRY JIT_WriteBarrier_Byte_Region64, _TEXT + + // Do the move into the GC . It is correct to take an AV here, the EH code + // figures out that this came from a WriteBarrier and correctly maps it back + // to the managed method which called the WriteBarrier (see setup in + // InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rdi], rsi + + mov r8, rdi + +PATCH_LABEL JIT_WriteBarrier_Byte_Region64_Patch_Label_RegionToGeneration + movabs rax, 0xF0F0F0F0F0F0F0F0 + +PATCH_LABEL JIT_WriteBarrier_Byte_Region64_Patch_Label_RegionShrDest + shr rdi, 0x16 // compute region index + + // Check whether the region we're storing into is gen 0 - nothing to do in this case + cmp byte ptr [rdi + rax], 0 + .byte 0x75, 0x04 + //jne NotGen0_Byte_Region64 + REPRET + + NOP_2_BYTE // padding for alignment of constant + + NotGen0_Byte_Region64: +PATCH_LABEL JIT_WriteBarrier_Byte_Region64_Patch_Label_Lower + movabs r9, 0xF0F0F0F0F0F0F0F0 + cmp rsi, r9 + .byte 0x73, 0x01 + // jae NotLow_Byte_Region64 + ret + NotLow_Byte_Region64: +PATCH_LABEL JIT_WriteBarrier_Byte_Region64_Patch_Label_Upper + movabs r9, 0xF0F0F0F0F0F0F0F0 + cmp rsi, r9 + .byte 0x72, 0x02 + // jb NotHigh_Byte_Region64 + REPRET + NotHigh_Byte_Region64: +PATCH_LABEL JIT_WriteBarrier_Byte_Region64_Patch_Label_RegionShrSrc + shr rsi, 0x16 // compute region index + mov dl, [rsi + rax] + cmp dl, [rdi + rax] + .byte 0x72, 0x03 + // jb IsOldToYoung_Byte_Region64 + REPRET + nop + + IsOldToYoung_Byte_Region64: +PATCH_LABEL JIT_WriteBarrier_Byte_Region64_Patch_Label_CardTable + movabs rax, 0xF0F0F0F0F0F0F0F0 + + shr r8, 0xB + cmp byte ptr [r8 + rax], 0xFF + .byte 0x75, 0x02 + // jne UpdateCardTable_Byte_Region64 + REPRET + + UpdateCardTable_Byte_Region64: + mov byte ptr [r8 + rax], 0xFF +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + shr r8, 0x0A +PATCH_LABEL JIT_WriteBarrier_Byte_Region64_Patch_Label_CardBundleTable + movabs rax, 0xF0F0F0F0F0F0F0F0 + cmp byte ptr [r8 + rax], 0xFF + .byte 0x75, 0x02 + // jne UpdateCardBundleTable_Byte_Region64 + REPRET + + UpdateCardBundleTable_Byte_Region64: + mov byte ptr [r8 + rax], 0xFF +#endif + ret +LEAF_END_MARKED JIT_WriteBarrier_Byte_Region64, _TEXT + .balign 16 +LEAF_ENTRY JIT_WriteBarrier_Bit_Region64, _TEXT // Do the move into the GC . It is correct to take an AV here, the EH code // figures out that this came from a WriteBarrier and correctly maps it back @@ -224,46 +299,46 @@ LEAF_ENTRY JIT_WriteBarrier_Region64, _TEXT mov r8, rdi -PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionToGeneration +PATCH_LABEL JIT_WriteBarrier_Bit_Region64_Patch_Label_RegionToGeneration movabs rax, 0xF0F0F0F0F0F0F0F0 -PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrDest +PATCH_LABEL JIT_WriteBarrier_Bit_Region64_Patch_Label_RegionShrDest shr rdi, 0x16 // compute region index // Check whether the region we're storing into is gen 0 - nothing to do in this case cmp byte ptr [rdi + rax], 0 .byte 0x75, 0x04 - //jne NotGen0_Region64 + //jne NotGen0_Bit_Region64 REPRET NOP_2_BYTE // padding for alignment of constant - NotGen0_Region64: -PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Lower + NotGen0_Bit_Region64: +PATCH_LABEL JIT_WriteBarrier_Bit_Region64_Patch_Label_Lower movabs r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 .byte 0x73, 0x01 - // jae NotLow_Region64 + // jae NotLow_Bit_Region64 ret - NotLow_Region64: -PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Upper + NotLow_Bit_Region64: +PATCH_LABEL JIT_WriteBarrier_Bit_Region64_Patch_Label_Upper movabs r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 .byte 0x72, 0x02 - // jb NotHigh_Region64 + // jb NotHigh_Bit_Region64 REPRET - NotHigh_Region64: -PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrSrc + NotHigh_Bit_Region64: +PATCH_LABEL JIT_WriteBarrier_Bit_Region64_Patch_Label_RegionShrSrc shr rsi, 0x16 // compute region index mov dl, [rsi + rax] cmp dl, [rdi + rax] .byte 0x72, 0x03 - // jb IsOldToYoung_Region64 + // jb IsOldToYoung_Bit_Region64 REPRET nop - IsOldToYoung_Region64: -PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardTable + IsOldToYoung_Bit_Region64: +PATCH_LABEL JIT_WriteBarrier_Bit_Region64_Patch_Label_CardTable movabs rax, 0xF0F0F0F0F0F0F0F0 mov ecx, r8d @@ -274,29 +349,29 @@ PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardTable shl dl, cl test byte ptr [r8 + rax], dl .byte 0x74, 0x02 - // je UpdateCardTable_Region64 + // je UpdateCardTable_Bit_Region64 REPRET - UpdateCardTable_Region64: + UpdateCardTable_Bit_Region64: lock or byte ptr [r8 + rax], dl #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardBundleTable +PATCH_LABEL JIT_WriteBarrier_Bit_Region64_Patch_Label_CardBundleTable movabs rax, 0xF0F0F0F0F0F0F0F0 shr r8, 0x0A cmp byte ptr [r8 + rax], 0xFF .byte 0x75, 0x02 - // jne UpdateCardBundleTable_Region64 + // jne UpdateCardBundleTable_Bit_Region64 REPRET - UpdateCardBundleTable_Region64: + UpdateCardBundleTable_Bit_Region64: mov byte ptr [r8 + rax], 0xFF #endif ret -LEAF_END_MARKED JIT_WriteBarrier_Region64, _TEXT +LEAF_END_MARKED JIT_WriteBarrier_Bit_Region64, _TEXT #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - .balign 8 + .balign 16 LEAF_ENTRY JIT_WriteBarrier_WriteWatch_PreGrow64, _TEXT // Regarding patchable constants: // - 64-bit constants have to be loaded into a register @@ -374,7 +449,7 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_CardBundleTable LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_PreGrow64, _TEXT - .balign 8 + .balign 16 LEAF_ENTRY JIT_WriteBarrier_WriteWatch_PostGrow64, _TEXT // Regarding patchable constants: // - 64-bit constants have to be loaded into a register @@ -410,7 +485,7 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Lower cmp rsi, r11 #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - .byte 0x72, 0x55 + .byte 0x72, 0x4d #else .byte 0x72, 0x3d #endif @@ -424,7 +499,7 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Upper cmp rsi, r10 #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - .byte 0x73, 0x43 + .byte 0x73, 0x3b #else .byte 0x73, 0x2b #endif @@ -470,7 +545,7 @@ LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_PostGrow64, _TEXT #ifdef FEATURE_SVR_GC - .balign 8 + .balign 16 LEAF_ENTRY JIT_WriteBarrier_WriteWatch_SVR64, _TEXT // Regarding patchable constants: // - 64-bit constants have to be loaded into a register @@ -537,8 +612,94 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardBundleTable LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_SVR64, _TEXT #endif - .balign 8 -LEAF_ENTRY JIT_WriteBarrier_WriteWatch_Region64, _TEXT + .balign 16 +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_Byte_Region64, _TEXT + + // Do the move into the GC . It is correct to take an AV here, the EH code + // figures out that this came from a WriteBarrier and correctly maps it back + // to the managed method which called the WriteBarrier (see setup in + // InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rdi], rsi + + // Update the write watch table if necessary + mov rax, rdi +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_WriteWatchTable + movabs r8, 0xF0F0F0F0F0F0F0F0 + shr rax, 0x0C // SoftwareWriteWatch::AddressToTableByteIndexShift + add rax, r8 + mov r8, rdi +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_RegionShrDest + shr rdi, 0x16 // compute region index + cmp byte ptr [rax], 0x0 + .byte 0x75, 0x03 + // jne CheckGen0_WriteWatch_Byte_Region64 + mov byte ptr [rax], 0xFF + CheckGen0_WriteWatch_Byte_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_RegionToGeneration + mov rax, 0xF0F0F0F0F0F0F0F0 + + // Check whether the region we're storing into is gen 0 - nothing to do in this case + cmp byte ptr [rdi + rax], 0 + .byte 0x75, 0x08 + // jne NotGen0_WriteWatch_Byte_Region64 + REPRET + + NOP_2_BYTE // padding for alignment of constant + NOP_2_BYTE // padding for alignment of constant + NOP_2_BYTE // padding for alignment of constant + + NotGen0_WriteWatch_Byte_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_Lower + movabs r9, 0xF0F0F0F0F0F0F0F0 + cmp rsi, r9 + .byte 0x73, 0x01 + // jae NotLow_WriteWatch_Byte_Region64 + ret + NotLow_WriteWatch_Byte_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_Upper + mov r9, 0xF0F0F0F0F0F0F0F0 + cmp rsi, r9 + .byte 0x72, 0x02 + // jb NotHigh_WriteWatch_Byte_Region64 + REPRET + NotHigh_WriteWatch_Byte_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_RegionShrSrc + shr rsi, 0x16 // compute region index + mov dl, [rsi + rax] + cmp dl, [rdi + rax] + .byte 0x72, 0x03 + // jb IsOldToYoung_WriteWatch_Byte_Region64 + REPRET + nop + + IsOldToYoung_WriteWatch_Byte_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_CardTable + mov rax, 0xF0F0F0F0F0F0F0F0 + + shr r8, 0xB + cmp byte ptr [r8 + rax], 0xFF + .byte 0x75, 0x02 + // jne UpdateCardTable_WriteWatch_Byte_Region64 + REPRET + + UpdateCardTable_WriteWatch_Byte_Region64: + mov byte ptr [r8 + rax], 0xFF +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + shr r8, 0x0A +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_CardBundleTable + mov rax, 0xF0F0F0F0F0F0F0F0 + cmp byte ptr [r8 + rax], 0xFF + .byte 0x75, 0x02 + // jne UpdateCardBundleTable_WriteWatch_Byte_Region64 + REPRET + + UpdateCardBundleTable_WriteWatch_Byte_Region64: + mov byte ptr [r8 + rax], 0xFF +#endif + ret +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_Byte_Region64, _TEXT + .balign 16 +LEAF_ENTRY JIT_WriteBarrier_WriteWatch_Bit_Region64, _TEXT // Do the move into the GC . It is correct to take an AV here, the EH code // figures out that this came from a WriteBarrier and correctly maps it back @@ -548,57 +709,57 @@ LEAF_ENTRY JIT_WriteBarrier_WriteWatch_Region64, _TEXT // Update the write watch table if necessary mov rax, rdi -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_WriteWatchTable +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_WriteWatchTable movabs r8, 0xF0F0F0F0F0F0F0F0 shr rax, 0x0C // SoftwareWriteWatch::AddressToTableByteIndexShift add rax, r8 mov r8, rdi -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrDest +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_RegionShrDest shr rdi, 0x16 // compute region index cmp byte ptr [rax], 0x0 .byte 0x75, 0x03 - // jne CheckGen0_WriteWatch_Region64 + // jne CheckGen0_WriteWatch_Bit_Region64 mov byte ptr [rax], 0xFF - CheckGen0_WriteWatch_Region64: -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration + CheckGen0_WriteWatch_Bit_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_RegionToGeneration mov rax, 0xF0F0F0F0F0F0F0F0 // Check whether the region we're storing into is gen 0 - nothing to do in this case cmp byte ptr [rdi + rax], 0 .byte 0x75, 0x08 - // jne NotGen0_WriteWatch_Region64 + // jne NotGen0_WriteWatch_Bit_Region64 REPRET NOP_2_BYTE // padding for alignment of constant NOP_2_BYTE // padding for alignment of constant NOP_2_BYTE // padding for alignment of constant - NotGen0_WriteWatch_Region64: -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Lower + NotGen0_WriteWatch_Bit_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_Lower movabs r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 .byte 0x73, 0x01 - // jae NotLow_WriteWatch_Region64 + // jae NotLow_WriteWatch_Bit_Region64 ret - NotLow_WriteWatch_Region64: -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Upper + NotLow_WriteWatch_Bit_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_Upper mov r9, 0xF0F0F0F0F0F0F0F0 cmp rsi, r9 .byte 0x72, 0x02 - // jb NotHigh_WriteWatch_Region64 + // jb NotHigh_WriteWatch_Bit_Region64 REPRET - NotHigh_WriteWatch_Region64: -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrSrc + NotHigh_WriteWatch_Bit_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_RegionShrSrc shr rsi, 0x16 // compute region index mov dl, [rsi + rax] cmp dl, [rdi + rax] .byte 0x72, 0x03 - // jb IsOldToYoung_WriteWatch_Region64 + // jb IsOldToYoung_WriteWatch_Bit_Region64 REPRET nop - IsOldToYoung_WriteWatch_Region64: -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardTable + IsOldToYoung_WriteWatch_Bit_Region64: +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_CardTable mov rax, 0xF0F0F0F0F0F0F0F0 mov ecx, r8d @@ -609,24 +770,24 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardTable shl dl, cl test byte ptr [r8 + rax], dl .byte 0x74, 0x02 - // je UpdateCardTable_WriteWatch_Region64 + // je UpdateCardTable_WriteWatch_Bit_Region64 REPRET - UpdateCardTable_WriteWatch_Region64: + UpdateCardTable_WriteWatch_Bit_Region64: lock or byte ptr [r8 + rax], dl #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardBundleTable +PATCH_LABEL JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_CardBundleTable mov rax, 0xF0F0F0F0F0F0F0F0 shr r8, 0x0A cmp byte ptr [r8 + rax], 0xFF .byte 0x75, 0x02 - // jne UpdateCardBundleTable_WriteWatch_Region64 + // jne UpdateCardBundleTable_WriteWatch_Bit_Region64 REPRET - UpdateCardBundleTable_WriteWatch_Region64: + UpdateCardBundleTable_WriteWatch_Bit_Region64: mov byte ptr [r8 + rax], 0xFF #endif ret -LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_Region64, _TEXT +LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_Bit_Region64, _TEXT #endif diff --git a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp index a33f8dbb4ef02c..a4036c7880a28a 100644 --- a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp +++ b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp @@ -50,17 +50,30 @@ EXTERN_C void JIT_WriteBarrier_SVR64_PatchLabel_CardBundleTable(); EXTERN_C void JIT_WriteBarrier_SVR64_End(); #endif // FEATURE_SVR_GC -EXTERN_C void JIT_WriteBarrier_Region64(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_Region64_Patch_Label_RegionToGeneration(); -EXTERN_C void JIT_WriteBarrier_Region64_Patch_Label_RegionShrDest(); -EXTERN_C void JIT_WriteBarrier_Region64_Patch_Label_Lower(); -EXTERN_C void JIT_WriteBarrier_Region64_Patch_Label_Upper(); -EXTERN_C void JIT_WriteBarrier_Region64_Patch_Label_RegionShrSrc(); -EXTERN_C void JIT_WriteBarrier_Region64_Patch_Label_CardTable(); +EXTERN_C void JIT_WriteBarrier_Byte_Region64(Object **dst, Object *ref); +EXTERN_C void JIT_WriteBarrier_Byte_Region64_Patch_Label_RegionToGeneration(); +EXTERN_C void JIT_WriteBarrier_Byte_Region64_Patch_Label_RegionShrDest(); +EXTERN_C void JIT_WriteBarrier_Byte_Region64_Patch_Label_Lower(); +EXTERN_C void JIT_WriteBarrier_Byte_Region64_Patch_Label_Upper(); +EXTERN_C void JIT_WriteBarrier_Byte_Region64_Patch_Label_RegionShrSrc(); +EXTERN_C void JIT_WriteBarrier_Byte_Region64_Patch_Label_CardTable(); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -EXTERN_C void JIT_WriteBarrier_Region64_Patch_Label_CardBundleTable(); +EXTERN_C void JIT_WriteBarrier_Byte_Region64_Patch_Label_CardBundleTable(); #endif -EXTERN_C void JIT_WriteBarrier_Region64_End(); +EXTERN_C void JIT_WriteBarrier_Byte_Region64_End(); + +EXTERN_C void JIT_WriteBarrier_Bit_Region64(Object **dst, Object *ref); +EXTERN_C void JIT_WriteBarrier_Bit_Region64_Patch_Label_RegionToGeneration(); +EXTERN_C void JIT_WriteBarrier_Bit_Region64_Patch_Label_RegionShrDest(); +EXTERN_C void JIT_WriteBarrier_Bit_Region64_Patch_Label_Lower(); +EXTERN_C void JIT_WriteBarrier_Bit_Region64_Patch_Label_Upper(); +EXTERN_C void JIT_WriteBarrier_Bit_Region64_Patch_Label_RegionShrSrc(); +EXTERN_C void JIT_WriteBarrier_Bit_Region64_Patch_Label_CardTable(); +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES +EXTERN_C void JIT_WriteBarrier_Bit_Region64_Patch_Label_CardBundleTable(); +#endif +EXTERN_C void JIT_WriteBarrier_Bit_Region64_End(); + #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64(Object **dst, Object *ref); @@ -91,18 +104,33 @@ EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardBundleTable(); #endif EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_End(); #endif // FEATURE_SVR_GC -EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64(Object **dst, Object *ref); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_WriteWatchTable(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrDest(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Lower(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Upper(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrSrc(); -EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardTable(); -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES -EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardBundleTable(); -#endif -EXTERN_C void JIT_WriteBarrier_WriteWatch_Region64_End(); + +EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64(Object **dst, Object *ref); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_WriteWatchTable(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_RegionToGeneration(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_RegionShrDest(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_Lower(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_Upper(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_RegionShrSrc(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_CardTable(); +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES +EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_Patch_Label_CardBundleTable(); +#endif +EXTERN_C void JIT_WriteBarrier_WriteWatch_Byte_Region64_End(); + +EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64(Object **dst, Object *ref); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_WriteWatchTable(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_RegionToGeneration(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_RegionShrDest(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_Lower(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_Upper(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_RegionShrSrc(); +EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_CardTable(); +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES +EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_Patch_Label_CardBundleTable(); +#endif +EXTERN_C void JIT_WriteBarrier_WriteWatch_Bit_Region64_End(); + #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP WriteBarrierManager g_WriteBarrierManager; @@ -176,18 +204,33 @@ void WriteBarrierManager::Validate() #endif // FEATURE_MANUALLY_MANAGED_CARD_BUNDLES #endif // FEATURE_SVR_GC - PBYTE pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_RegionToGeneration, 2); + PBYTE pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_RegionToGeneration, 2); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); - pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_Lower, 2); - pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_Upper, 2); - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_CardTable, 2); + pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_Lower, 2); + pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_Upper, 2); + pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_CardTable, 2); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardTableImmediate) & 0x7) == 0); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_CardBundleTable, 2); + pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_CardBundleTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); +#endif + + pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_RegionToGeneration, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); + + pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_Lower, 2); + pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_Upper, 2); + pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_CardTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardTableImmediate) & 0x7) == 0); + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_CardBundleTable, 2); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); #endif @@ -234,18 +277,33 @@ void WriteBarrierManager::Validate() #endif // FEATURE_MANUALLY_MANAGED_CARD_BUNDLES #endif // FEATURE_SVR_GC - pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_RegionToGeneration, 2); + pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_RegionToGeneration, 2); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); - pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_Lower, 2); - pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_Upper, 2); - pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_CardTable, 2); + pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_Lower, 2); + pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_Upper, 2); + pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_CardTable, 2); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardTableImmediate) & 0x7) == 0); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_CardBundleTable, 2); + pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_CardBundleTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); +#endif + + pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_RegionToGeneration, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); + + pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_Lower, 2); + pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_Upper, 2); + pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_CardTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardTableImmediate) & 0x7) == 0); + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_CardBundleTable, 2); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); #endif @@ -269,8 +327,10 @@ PCODE WriteBarrierManager::GetCurrentWriteBarrierCode() case WRITE_BARRIER_SVR64: return GetEEFuncEntryPoint(JIT_WriteBarrier_SVR64); #endif // FEATURE_SVR_GC - case WRITE_BARRIER_REGIONS64: - return GetEEFuncEntryPoint(JIT_WriteBarrier_Region64); + case WRITE_BARRIER_BYTE_REGIONS64: + return GetEEFuncEntryPoint(JIT_WriteBarrier_Byte_Region64); + case WRITE_BARRIER_BIT_REGIONS64: + return GetEEFuncEntryPoint(JIT_WriteBarrier_Bit_Region64); #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP case WRITE_BARRIER_WRITE_WATCH_PREGROW64: return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_PreGrow64); @@ -280,8 +340,10 @@ PCODE WriteBarrierManager::GetCurrentWriteBarrierCode() case WRITE_BARRIER_WRITE_WATCH_SVR64: return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_SVR64); #endif // FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_REGIONS64: - return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_Region64); + case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: + return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_Byte_Region64); + case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: + return GetEEFuncEntryPoint(JIT_WriteBarrier_WriteWatch_Bit_Region64); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP default: UNREACHABLE_MSG("unexpected m_currentWriteBarrier!"); @@ -305,8 +367,10 @@ size_t WriteBarrierManager::GetSpecificWriteBarrierSize(WriteBarrierType writeBa case WRITE_BARRIER_SVR64: return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_SVR64); #endif // FEATURE_SVR_GC - case WRITE_BARRIER_REGIONS64: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_Region64); + case WRITE_BARRIER_BYTE_REGIONS64: + return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_Byte_Region64); + case WRITE_BARRIER_BIT_REGIONS64: + return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_Bit_Region64); #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP case WRITE_BARRIER_WRITE_WATCH_PREGROW64: return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_PreGrow64); @@ -316,8 +380,10 @@ size_t WriteBarrierManager::GetSpecificWriteBarrierSize(WriteBarrierType writeBa case WRITE_BARRIER_WRITE_WATCH_SVR64: return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_SVR64); #endif // FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_REGIONS64: - return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_Region64); + case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: + return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_Byte_Region64); + case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: + return MARKED_FUNCTION_SIZE(JIT_WriteBarrier_WriteWatch_Bit_Region64); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP case WRITE_BARRIER_BUFFER: return MARKED_FUNCTION_SIZE(JIT_WriteBarrier); @@ -413,13 +479,35 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, } #endif // FEATURE_SVR_GC - case WRITE_BARRIER_REGIONS64: - m_pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_RegionToGeneration, 2); - m_pRegionShrDest = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_RegionShrDest, 3); - m_pRegionShrSrc = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_RegionShrSrc, 3); - m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_Lower, 2); - m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_Upper, 2); - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_CardTable, 2); + case WRITE_BARRIER_BYTE_REGIONS64: + m_pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_RegionToGeneration, 2); + m_pRegionShrDest = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_RegionShrDest, 3); + m_pRegionShrSrc = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_RegionShrSrc, 3); + m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_Lower, 2); + m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_Upper, 2); + m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_CardTable, 2); + + // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrDest); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrSrc); + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_CardBundleTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); +#endif + break; + + case WRITE_BARRIER_BIT_REGIONS64: + m_pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_RegionToGeneration, 2); + m_pRegionShrDest = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_RegionShrDest, 3); + m_pRegionShrSrc = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_RegionShrSrc, 3); + m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_Lower, 2); + m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_Upper, 2); + m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_CardTable, 2); // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); @@ -430,7 +518,7 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrSrc); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Region64, Patch_Label_CardBundleTable, 2); + m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_CardBundleTable, 2); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); #endif break; @@ -492,14 +580,14 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, } #endif // FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_REGIONS64: - m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_WriteWatchTable, 2); - m_pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_RegionToGeneration, 2); - m_pRegionShrDest = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_RegionShrDest, 3); - m_pRegionShrSrc = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_RegionShrSrc, 3); - m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_Lower, 2); - m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_Upper, 2); - m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_CardTable, 2); + case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: + m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_WriteWatchTable, 2); + m_pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_RegionToGeneration, 2); + m_pRegionShrDest = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_RegionShrDest, 3); + m_pRegionShrSrc = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_RegionShrSrc, 3); + m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_Lower, 2); + m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_Upper, 2); + m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_CardTable, 2); // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); @@ -511,7 +599,31 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrSrc); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Region64, Patch_Label_CardBundleTable, 2); + m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_CardBundleTable, 2); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); +#endif + break; + + case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: + m_pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_WriteWatchTable, 2); + m_pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_RegionToGeneration, 2); + m_pRegionShrDest = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_RegionShrDest, 3); + m_pRegionShrSrc = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_RegionShrSrc, 3); + m_pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_Lower, 2); + m_pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_Upper, 2); + m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_CardTable, 2); + + // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrDest); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrSrc); + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_CardBundleTable, 2); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); #endif break; @@ -551,14 +663,16 @@ void WriteBarrierManager::Initialize() #ifdef FEATURE_SVR_GC _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_SVR64)); #endif // FEATURE_SVR_GC - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_REGIONS64)); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_BYTE_REGIONS64)); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_BIT_REGIONS64)); #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_PREGROW64)); _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_POSTGROW64)); #ifdef FEATURE_SVR_GC _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_SVR64)); #endif // FEATURE_SVR_GC - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_REGIONS64)); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64)); + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64)); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP #if !defined(CODECOVERAGE) @@ -566,7 +680,7 @@ void WriteBarrierManager::Initialize() #endif } -bool WriteBarrierManager::NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, WriteBarrierType* pNewWriteBarrierType) +bool WriteBarrierManager::NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, bool bUseBitwiseWriteBarrier, WriteBarrierType* pNewWriteBarrierType) { // Init code for the JIT_WriteBarrier assembly routine. Since it will be bashed everytime the GC Heap // changes size, we want to do most of the work just once. @@ -590,7 +704,7 @@ bool WriteBarrierManager::NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, W #endif if (g_region_shr != 0) { - writeBarrierType = WRITE_BARRIER_REGIONS64; + writeBarrierType = bUseBitwiseWriteBarrier ? WRITE_BARRIER_BIT_REGIONS64: WRITE_BARRIER_BYTE_REGIONS64; } else { @@ -613,7 +727,8 @@ bool WriteBarrierManager::NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, W break; #endif // FEATURE_SVR_GC - case WRITE_BARRIER_REGIONS64: + case WRITE_BARRIER_BYTE_REGIONS64: + case WRITE_BARRIER_BIT_REGIONS64: break; #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP @@ -631,7 +746,8 @@ bool WriteBarrierManager::NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, W case WRITE_BARRIER_WRITE_WATCH_SVR64: break; #endif // FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_REGIONS64: + case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: + case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: break; #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP @@ -648,7 +764,7 @@ bool WriteBarrierManager::NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, W int WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended) { WriteBarrierType newType; - if (NeedDifferentWriteBarrier(false, &newType)) + if (NeedDifferentWriteBarrier(false, g_region_use_bitwise_write_barrier, &newType)) { return ChangeWriteBarrierTo(newType, isRuntimeSuspended); } @@ -664,10 +780,12 @@ int WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended) switch (m_currentWriteBarrier) { case WRITE_BARRIER_POSTGROW64: - case WRITE_BARRIER_REGIONS64: + case WRITE_BARRIER_BYTE_REGIONS64: + case WRITE_BARRIER_BIT_REGIONS64: #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP case WRITE_BARRIER_WRITE_WATCH_POSTGROW64: - case WRITE_BARRIER_WRITE_WATCH_REGIONS64: + case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: + case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP { // Change immediate if different from new g_ephermeral_high. @@ -717,7 +835,7 @@ int WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSus // we need to switch to the WriteBarrier_PostGrow function for good. WriteBarrierType newType; - if (NeedDifferentWriteBarrier(bReqUpperBoundsCheck, &newType)) + if (NeedDifferentWriteBarrier(bReqUpperBoundsCheck, g_region_use_bitwise_write_barrier, &newType)) { return ChangeWriteBarrierTo(newType, isRuntimeSuspended); } @@ -738,7 +856,8 @@ int WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSus #ifdef FEATURE_SVR_GC case WRITE_BARRIER_WRITE_WATCH_SVR64: #endif // FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_REGIONS64: + case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: + case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: if (*(UINT64*)m_pWriteWatchTableImmediate != (size_t)g_sw_ww_table) { ExecutableWriterHolder writeWatchTableImmediateWriterHolder((UINT64*)m_pWriteWatchTableImmediate, sizeof(UINT64)); @@ -754,8 +873,10 @@ int WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSus switch (m_currentWriteBarrier) { - case WRITE_BARRIER_REGIONS64: - case WRITE_BARRIER_WRITE_WATCH_REGIONS64: + case WRITE_BARRIER_BYTE_REGIONS64: + case WRITE_BARRIER_BIT_REGIONS64: + case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: + case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: if (*(UINT64*)m_pRegionToGenTableImmediate != (size_t)g_region_to_generation_table) { ExecutableWriterHolder writeWatchTableImmediateWriterHolder((UINT64*)m_pRegionToGenTableImmediate, sizeof(UINT64)); @@ -823,8 +944,12 @@ int WriteBarrierManager::SwitchToWriteWatchBarrier(bool isRuntimeSuspended) break; #endif // FEATURE_SVR_GC - case WRITE_BARRIER_REGIONS64: - newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_REGIONS64; + case WRITE_BARRIER_BYTE_REGIONS64: + newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64; + break; + + case WRITE_BARRIER_BIT_REGIONS64: + newWriteBarrierType = WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64; break; default: @@ -857,8 +982,12 @@ int WriteBarrierManager::SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended) break; #endif // FEATURE_SVR_GC - case WRITE_BARRIER_WRITE_WATCH_REGIONS64: - newWriteBarrierType = WRITE_BARRIER_REGIONS64; + case WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64: + newWriteBarrierType = WRITE_BARRIER_BYTE_REGIONS64; + break; + + case WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64: + newWriteBarrierType = WRITE_BARRIER_BIT_REGIONS64; break; default: diff --git a/src/coreclr/vm/gcenv.ee.cpp b/src/coreclr/vm/gcenv.ee.cpp index 8011e2f0b3e52a..3bdd4bf279ecbc 100644 --- a/src/coreclr/vm/gcenv.ee.cpp +++ b/src/coreclr/vm/gcenv.ee.cpp @@ -1004,6 +1004,7 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args) g_ephemeral_high = args->ephemeral_high; g_region_to_generation_table = args->region_to_generation_table; g_region_shr = args->region_shr; + g_region_use_bitwise_write_barrier = args->region_use_bitwise_write_barrier; stompWBCompleteActions |= ::StompWriteBarrierEphemeral(args->is_runtime_suspended); break; case WriteBarrierOp::Initialize: @@ -1030,6 +1031,7 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args) g_highest_address = args->highest_address; g_region_to_generation_table = args->region_to_generation_table; g_region_shr = args->region_shr; + g_region_use_bitwise_write_barrier = args->region_use_bitwise_write_barrier; stompWBCompleteActions |= ::StompWriteBarrierResize(true, false); // StompWriteBarrierResize does not necessarily bash g_ephemeral_low diff --git a/src/coreclr/vm/gcheaputilities.cpp b/src/coreclr/vm/gcheaputilities.cpp index af53ff669c4fd0..4b36281b675099 100644 --- a/src/coreclr/vm/gcheaputilities.cpp +++ b/src/coreclr/vm/gcheaputilities.cpp @@ -19,6 +19,7 @@ uint8_t* g_ephemeral_low = (uint8_t*)1; uint8_t* g_ephemeral_high = (uint8_t*)~0; uint8_t* g_region_to_generation_table = nullptr; uint8_t g_region_shr = 0; +bool g_region_use_bitwise_write_barrier = false; #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES uint32_t* g_card_bundle_table = nullptr; diff --git a/src/coreclr/vm/gcheaputilities.h b/src/coreclr/vm/gcheaputilities.h index 8d1e301b93e7f0..a5afa7c90f98d6 100644 --- a/src/coreclr/vm/gcheaputilities.h +++ b/src/coreclr/vm/gcheaputilities.h @@ -31,6 +31,7 @@ extern "C" uint8_t* g_ephemeral_low; extern "C" uint8_t* g_ephemeral_high; extern "C" uint8_t* g_region_to_generation_table; extern "C" uint8_t g_region_shr; +extern "C" bool g_region_use_bitwise_write_barrier; #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index 852067d3421d60..07f4a242946b09 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -262,14 +262,16 @@ class WriteBarrierManager #ifdef FEATURE_SVR_GC WRITE_BARRIER_SVR64, #endif // FEATURE_SVR_GC - WRITE_BARRIER_REGIONS64, + WRITE_BARRIER_BYTE_REGIONS64, + WRITE_BARRIER_BIT_REGIONS64, #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP WRITE_BARRIER_WRITE_WATCH_PREGROW64, WRITE_BARRIER_WRITE_WATCH_POSTGROW64, #ifdef FEATURE_SVR_GC WRITE_BARRIER_WRITE_WATCH_SVR64, #endif // FEATURE_SVR_GC - WRITE_BARRIER_WRITE_WATCH_REGIONS64, + WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64, + WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64, #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP WRITE_BARRIER_BUFFER }; @@ -291,7 +293,7 @@ class WriteBarrierManager PBYTE CalculatePatchLocation(LPVOID base, LPVOID label, int offset); PCODE GetCurrentWriteBarrierCode(); int ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, bool isRuntimeSuspended); - bool NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, WriteBarrierType* pNewWriteBarrierType); + bool NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, bool bUseBitwiseWriteBarrier, WriteBarrierType* pNewWriteBarrierType); private: void Validate(); From 5c2e9f3ac9fc4aef346f7b758cb4000210ce169d Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Fri, 13 May 2022 07:34:04 +0200 Subject: [PATCH 17/23] Add narrowing cast to fix build issue, remove temporary hack to force release write barrier in debug builds. --- src/coreclr/gc/gc.cpp | 2 +- src/coreclr/vm/amd64/jitinterfaceamd64.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 174458192b0f7c..820b99a605fd2d 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -11412,7 +11412,7 @@ void gc_heap::set_region_plan_gen_num (heap_segment* region, int plan_gen_num) for (size_t region_index = region_index_start; region_index < region_index_end; region_index++) { assert (plan_gen_num <= max_generation); - map_region_to_generation[region_index] = (plan_gen_num << 4) | (map_region_to_generation[region_index] & 0x0f); + map_region_to_generation[region_index] = (uint8_t)((plan_gen_num << 4) | (map_region_to_generation[region_index] & 0x0f)); } } diff --git a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp index a4036c7880a28a..1d5a801a9817d2 100644 --- a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp +++ b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp @@ -698,9 +698,9 @@ bool WriteBarrierManager::NeedDifferentWriteBarrier(bool bReqUpperBoundsCheck, b case WRITE_BARRIER_UNINITIALIZED: #ifdef _DEBUG // Use the default slow write barrier some of the time in debug builds because of of contains some good asserts - //if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK) || DbgRandomOnExe(0.5)) { - // break; - //} + if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK) || DbgRandomOnExe(0.5)) { + break; + } #endif if (g_region_shr != 0) { From cd5cb8c68c1443216119334c93018f54bc6a725e Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Mon, 25 Jul 2022 14:33:18 +0200 Subject: [PATCH 18/23] Generalize the lookup via the map_region_to_generation table to also contain flags to indicate whether a region is sweep-in-plan, and whether it has been demoted. Generalize the config setting to change the write barrier to allow reverting to the SVR type write barrier as well. Bug fixes concerning setting the ephemeral limits in the write barrier, and where to compute the ephemeral limits within the GC. Use the lookup via the map_region_to_generation table in the mark phase as well. --- src/coreclr/gc/gc.cpp | 334 +++++++++++++++-------- src/coreclr/gc/gcconfig.h | 10 +- src/coreclr/gc/gcpriv.h | 52 +++- src/coreclr/vm/amd64/JitHelpers_Fast.asm | 2 +- src/coreclr/vm/amd64/jithelpers_fast.S | 2 +- 5 files changed, 277 insertions(+), 123 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 138b42b6c57922..eb174147114f14 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -2003,9 +2003,43 @@ void stomp_write_barrier_resize(bool is_runtime_suspended, bool requires_upper_b GCToEEInterface::StompWriteBarrier(&args); } +#ifdef USE_REGIONS +void region_write_barrier_settings (WriteBarrierParameters* args, + gc_heap::region_info* map_region_to_generation_skewed, + uint8_t region_shr) +{ + switch (GCConfig::GetGCWriteBarrier()) + { + default: + case GCConfig::WRITE_BARRIER_DEFAULT: + case GCConfig::WRITE_BARRIER_REGION_BIT: + // bitwise region write barrier is the default now + args->region_to_generation_table = (uint8_t*)map_region_to_generation_skewed; + args->region_shr = region_shr; + args->region_use_bitwise_write_barrier = true; + break; + + case GCConfig::WRITE_BARRIER_REGION_BYTE: + // bytewise region write barrier + args->region_to_generation_table = (uint8_t*)map_region_to_generation_skewed; + args->region_shr = region_shr; + assert (args->region_use_bitwise_write_barrier == false); + break; + + case GCConfig::WRITE_BARRIER_SERVER: + // server write barrier + // args should have been zero initialized + assert (args->region_use_bitwise_write_barrier == false); + assert (args->region_to_generation_table == nullptr); + assert (args->region_shr == 0); + break; + } +} +#endif //USE_REGIONS + void stomp_write_barrier_ephemeral (uint8_t* ephemeral_low, uint8_t* ephemeral_high #ifdef USE_REGIONS - , uint8_t* map_region_to_generation_skewed + , gc_heap::region_info* map_region_to_generation_skewed , uint8_t region_shr #endif //USE_REGIONS ) @@ -2018,16 +2052,14 @@ void stomp_write_barrier_ephemeral (uint8_t* ephemeral_low, uint8_t* ephemeral_h args.ephemeral_low = ephemeral_low; args.ephemeral_high = ephemeral_high; #ifdef USE_REGIONS - args.region_to_generation_table = map_region_to_generation_skewed; - args.region_shr = region_shr; - args.region_use_bitwise_write_barrier = GCConfig::GetGCBitwiseWriteBarrier() != 0; + region_write_barrier_settings (&args, map_region_to_generation_skewed, region_shr); #endif //USE_REGIONS GCToEEInterface::StompWriteBarrier(&args); } void stomp_write_barrier_initialize(uint8_t* ephemeral_low, uint8_t* ephemeral_high #ifdef USE_REGIONS - , uint8_t* map_region_to_generation_skewed + , gc_heap::region_info* map_region_to_generation_skewed , uint8_t region_shr #endif //USE_REGIONS ) @@ -2048,9 +2080,7 @@ void stomp_write_barrier_initialize(uint8_t* ephemeral_low, uint8_t* ephemeral_h args.ephemeral_high = ephemeral_high; #ifdef USE_REGIONS - args.region_to_generation_table = map_region_to_generation_skewed; - args.region_shr = region_shr; - args.region_use_bitwise_write_barrier = GCConfig::GetGCBitwiseWriteBarrier() != 0; + region_write_barrier_settings (&args, map_region_to_generation_skewed, region_shr); #endif //USE_REGIONS GCToEEInterface::StompWriteBarrier(&args); @@ -2310,8 +2340,8 @@ region_allocator global_region_allocator; uint8_t*(*initial_regions)[total_generation_count][2] = nullptr; size_t gc_heap::region_count = 0; -uint8_t* gc_heap::map_region_to_generation = nullptr; -uint8_t* gc_heap::map_region_to_generation_skewed = nullptr; +gc_heap::region_info* gc_heap::map_region_to_generation = nullptr; +gc_heap::region_info* gc_heap::map_region_to_generation_skewed = nullptr; #endif //USE_REGIONS @@ -2352,8 +2382,12 @@ VOLATILE(BOOL) gc_heap::gc_background_running = FALSE; #endif //BACKGROUND_GC #ifdef USE_REGIONS -uint8_t* gc_heap::ephemeral_low; -uint8_t* gc_heap::ephemeral_high; +#ifdef MULTIPLE_HEAPS +uint8_t* gc_heap::gc_low; +uint8_t* gc_heap::gc_high; +#endif //MULTIPLE_HEAPS +VOLATILE(uint8_t*) gc_heap::ephemeral_low; +VOLATILE(uint8_t*) gc_heap::ephemeral_high; #endif //USE_REGIONS #ifndef MULTIPLE_HEAPS @@ -3529,28 +3563,18 @@ sorted_table::clear() #ifdef USE_REGIONS inline -size_t get_basic_region_index_for_address (uint8_t* address) -{ - size_t basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr; - return (basic_region_index - ((size_t)g_gc_lowest_address >> gc_heap::min_segment_size_shr)); -} - -inline int get_gen_num_for_address (uint8_t *address) +size_t get_skewed_basic_region_index_for_address (uint8_t* address) { assert ((g_gc_lowest_address <= address) && (address < g_gc_highest_address)); - size_t region_index = (size_t)address >> gc_heap::min_segment_size_shr; - int gen_num = gc_heap::map_region_to_generation_skewed[region_index] & 0x0f; - assert ((0 <= gen_num) && (gen_num <= 2)); - return gen_num; + size_t skewed_basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr; + return skewed_basic_region_index; } -inline int get_plan_gen_num_for_address (uint8_t *address) +inline +size_t get_basic_region_index_for_address (uint8_t* address) { - assert ((g_gc_lowest_address <= address) && (address < g_gc_highest_address)); - size_t region_index = (size_t)address >> gc_heap::min_segment_size_shr; - int plan_gen_num = gc_heap::map_region_to_generation_skewed[region_index] >> 4; - assert ((0 <= plan_gen_num) && (plan_gen_num <= 2)); - return plan_gen_num; + size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (address); + return (skewed_basic_region_index - get_skewed_basic_region_index_for_address (g_gc_lowest_address)); } // Go from a random address to its region info. The random address could be @@ -7820,27 +7844,10 @@ bool gc_heap::should_check_brick_for_reloc (uint8_t* o) { assert ((o >= g_gc_lowest_address) && (o < g_gc_highest_address)); - int condemned_gen = settings.condemned_generation; - if (condemned_gen < max_generation) - { - heap_segment* region = region_of (o); - int gen = get_region_gen_num (region); - if ((gen > condemned_gen) || (heap_segment_swept_in_plan (region))) - { - if (heap_segment_swept_in_plan (region)) - { - dprintf (4444, ("-Rsip %Ix", o)); - } - - return false; - } - } - else if (heap_segment_swept_in_plan (region_of (o))) - { - return false; - } + size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (o); - return true; + // return true if the region is not SIP and the generation is <= condemned generation + return (map_region_to_generation_skewed[skewed_basic_region_index] & (RI_SIP|RI_GEN_MASK)) <= settings.condemned_generation; } #endif //USE_REGIONS @@ -8886,7 +8893,7 @@ uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end) #endif //FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP && BACKGROUND_GC #ifdef USE_REGIONS - map_region_to_generation = mem + card_table_element_layout[region_to_generation_table_element]; + map_region_to_generation = (region_info*)(mem + card_table_element_layout[region_to_generation_table_element]); map_region_to_generation_skewed = map_region_to_generation - size_region_to_generation_table_of (0, g_gc_lowest_address); #endif //USE_REGIONS @@ -11313,17 +11320,28 @@ int gc_heap::get_region_gen_num (heap_segment* region) int gc_heap::get_region_gen_num (uint8_t* obj) { - return heap_segment_gen_num (region_of (obj)); + size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (obj); + int gen_num = map_region_to_generation_skewed[skewed_basic_region_index] & gc_heap::RI_GEN_MASK; + assert ((soh_gen0 <= gen_num) && (gen_num <= soh_gen2)); + assert (gen_num == heap_segment_gen_num (region_of (obj))); + return gen_num; } int gc_heap::get_region_plan_gen_num (uint8_t* obj) { - return heap_segment_plan_gen_num (region_of (obj)); + size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (obj); + int plan_gen_num = map_region_to_generation_skewed[skewed_basic_region_index] >> gc_heap::RI_PLAN_GEN_SHR; + assert ((soh_gen0 <= plan_gen_num) && (plan_gen_num <= soh_gen2)); + assert (plan_gen_num == heap_segment_plan_gen_num (region_of (obj))); + return plan_gen_num; } bool gc_heap::is_region_demoted (uint8_t* obj) { - return heap_segment_demoted_p (region_of (obj)); + size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (obj); + bool demoted_p = (map_region_to_generation_skewed[skewed_basic_region_index] & gc_heap::RI_DEMOTED) != 0; + assert (demoted_p == heap_segment_demoted_p (region_of (obj))); + return demoted_p; } inline @@ -11338,7 +11356,7 @@ void gc_heap::set_region_gen_num (heap_segment* region, int gen_num) size_t region_index_start = get_basic_region_index_for_address (region_start); size_t region_index_end = get_basic_region_index_for_address (region_end); - uint8_t entry = (uint8_t)((gen_num << 4) | gen_num); + region_info entry = (region_info)((gen_num << RI_PLAN_GEN_SHR) | gen_num); for (size_t region_index = region_index_start; region_index < region_index_end; region_index++) { assert (gen_num <= max_generation); @@ -11346,35 +11364,61 @@ void gc_heap::set_region_gen_num (heap_segment* region, int gen_num) } if (gen_num <= soh_gen1) { - bool success_low = false; - bool success_high = false; - bool ephemeral_change = false; - while (!(success_low && success_high)) + if ((region_start < ephemeral_low) || (ephemeral_high < region_end)) { - uint8_t* current_ephemeral_low = ephemeral_low; - if (current_ephemeral_low <= region_start) - success_low = true; - else + static GCSpinLock write_barrier_spin_lock; + + while (true) { - success_low = (Interlocked::CompareExchangePointer (&ephemeral_low, region_start, current_ephemeral_low) == current_ephemeral_low); - if (success_low) - ephemeral_change = true; + if (Interlocked::CompareExchange(&write_barrier_spin_lock.lock, 0, -1) < 0) + break; + + if ((ephemeral_low <= region_start) && (region_end <= ephemeral_high)) + return; + + while (write_barrier_spin_lock.lock >= 0) + { + YieldProcessor(); // indicate to the processor that we are spinning + } } +#ifdef _DEBUG + write_barrier_spin_lock.holding_thread = GCToEEInterface::GetThread(); +#endif //_DEBUG - uint8_t* current_ephemeral_high = ephemeral_high; - if (current_ephemeral_high >= region_end) - success_high = true; + if ((region_start < ephemeral_low) || (ephemeral_high < region_end)) + { + uint8_t* new_ephemeral_low = min (region_start, ephemeral_low); + uint8_t* new_ephemeral_high = max (region_end, ephemeral_high); + + dprintf (REGIONS_LOG, ("about to set ephemeral_low = %Ix ephemeral_high = %Ix", new_ephemeral_low, new_ephemeral_high)); + + stomp_write_barrier_ephemeral (new_ephemeral_low, new_ephemeral_high, + map_region_to_generation_skewed, (uint8_t)min_segment_size_shr); + + // we should only *decrease* ephemeral_low and only *increase* ephemeral_high + if (ephemeral_low < new_ephemeral_low) + GCToOSInterface::DebugBreak (); + if (new_ephemeral_high < ephemeral_high) + GCToOSInterface::DebugBreak (); + + // only set the globals *after* we have updated the write barrier + ephemeral_low = new_ephemeral_low; + ephemeral_high = new_ephemeral_high; + + dprintf (REGIONS_LOG, ("set ephemeral_low = %Ix ephemeral_high = %Ix", new_ephemeral_low, new_ephemeral_high)); + } else { - success_high = (Interlocked::CompareExchangePointer (&ephemeral_high, region_end, current_ephemeral_high) == current_ephemeral_high); - if (success_high) - ephemeral_change = true; + dprintf (REGIONS_LOG, ("leaving lock - no need to update ephemeral range [%Ix,%Ix[ for region [%Ix,%Ix]", (uint8_t*)ephemeral_low, (uint8_t*)ephemeral_high, region_start, region_end)); } +#ifdef _DEBUG + write_barrier_spin_lock.holding_thread = (Thread*)-1; +#endif //_DEBUG + write_barrier_spin_lock.lock = -1; } - if (ephemeral_change) + else { - stomp_write_barrier_ephemeral (ephemeral_low, ephemeral_high, - map_region_to_generation_skewed, (uint8_t)min_segment_size_shr); + dprintf (REGIONS_LOG, ("no need to update ephemeral range [%Ix,%Ix[ for region [%Ix,%Ix]", (uint8_t*)ephemeral_low, (uint8_t*)ephemeral_high, region_start, region_end)); } } } @@ -11390,6 +11434,7 @@ void gc_heap::set_region_plan_gen_num (heap_segment* region, int plan_gen_num) gen_num, plan_gen_num, supposed_plan_gen_num, ((plan_gen_num < supposed_plan_gen_num) ? "DEMOTED" : "ND"))); + region_info region_info_bits_to_set = (region_info)(plan_gen_num << RI_PLAN_GEN_SHR); if ((plan_gen_num < supposed_plan_gen_num) && (heap_segment_pinned_survived (region) != 0)) { if (!settings.demotion) @@ -11398,6 +11443,7 @@ void gc_heap::set_region_plan_gen_num (heap_segment* region, int plan_gen_num) } get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit); region->flags |= heap_segment_flags_demoted; + region_info_bits_to_set = (region_info)(region_info_bits_to_set | RI_DEMOTED); } else { @@ -11414,7 +11460,7 @@ void gc_heap::set_region_plan_gen_num (heap_segment* region, int plan_gen_num) for (size_t region_index = region_index_start; region_index < region_index_end; region_index++) { assert (plan_gen_num <= max_generation); - map_region_to_generation[region_index] = (uint8_t)((plan_gen_num << 4) | (map_region_to_generation[region_index] & 0x0f)); + map_region_to_generation[region_index] = (region_info)(region_info_bits_to_set | (map_region_to_generation[region_index] & ~(RI_PLAN_GEN_MASK|RI_DEMOTED))); } } @@ -11426,6 +11472,42 @@ void gc_heap::set_region_plan_gen_num_sip (heap_segment* region, int plan_gen_nu set_region_plan_gen_num (region, plan_gen_num); } } + +void gc_heap::set_region_sweep_in_plan (heap_segment*region) +{ + heap_segment_swept_in_plan (region) = true; + + // this should be a basic region + assert (get_region_size (region) == global_region_allocator.get_region_alignment()); + + uint8_t* region_start = get_region_start (region); + size_t region_index = get_basic_region_index_for_address (region_start); + map_region_to_generation[region_index] = (region_info)(map_region_to_generation[region_index] | RI_SIP); +} + +void gc_heap::clear_region_sweep_in_plan (heap_segment*region) +{ + heap_segment_swept_in_plan (region) = false; + + // this should be a basic region + assert (get_region_size (region) == global_region_allocator.get_region_alignment()); + + uint8_t* region_start = get_region_start (region); + size_t region_index = get_basic_region_index_for_address (region_start); + map_region_to_generation[region_index] = (region_info)(map_region_to_generation[region_index] & ~RI_SIP); +} + +void gc_heap::clear_region_demoted (heap_segment* region) +{ + region->flags &= ~heap_segment_flags_demoted; + + // this should be a basic region + assert (get_region_size (region) == global_region_allocator.get_region_alignment()); + + uint8_t* region_start = get_region_start (region); + size_t region_index = get_basic_region_index_for_address (region_start); + map_region_to_generation[region_index] = (region_info)(map_region_to_generation[region_index] & ~RI_DEMOTED); +} #endif //USE_REGIONS int gc_heap::get_plan_gen_num (int gen_number) @@ -21206,6 +21288,10 @@ void gc_heap::gc1() } #ifdef USE_REGIONS distribute_free_regions(); + verify_region_to_generation_map (); + compute_gc_and_ephemeral_range (settings.condemned_generation); + stomp_write_barrier_ephemeral (ephemeral_low, ephemeral_high, + map_region_to_generation_skewed, (uint8_t)min_segment_size_shr); #endif //USE_REGIONS #ifdef FEATURE_LOH_COMPACTION @@ -21270,6 +21356,10 @@ void gc_heap::gc1() { #ifdef USE_REGIONS distribute_free_regions(); + verify_region_to_generation_map (); + compute_gc_and_ephemeral_range (settings.condemned_generation); + stomp_write_barrier_ephemeral (ephemeral_low, ephemeral_high, + map_region_to_generation_skewed, (uint8_t)min_segment_size_shr); if (settings.condemned_generation == max_generation) { // age and print all kinds of free regions @@ -23209,30 +23299,40 @@ inline bool is_in_heap_range (uint8_t* o) return (o != nullptr); #endif //FEATURE_BASICFREEZE } + +inline bool gc_heap::is_in_gc_range (uint8_t* o) +{ +#ifdef FEATURE_BASICFREEZE + // we may have frozen objects in read only segments + // outside of the reserved address range of the gc heap + assert (((g_gc_lowest_address <= o) && (o < g_gc_highest_address)) || + (o == nullptr) || (ro_segment_lookup (o) != nullptr)); +#else //FEATURE_BASICFREEZE + // without frozen objects, every non-null pointer must be + // within the heap + assert ((o == nullptr) || (g_gc_lowest_address <= o) && (o < g_gc_highest_address)); +#endif //FEATURE_BASICFREEZE + return ((gc_low <= o) && (o < gc_high)); +} #endif //USE_REGIONS inline BOOL gc_heap::gc_mark (uint8_t* o, uint8_t* low, uint8_t* high, int condemned_gen) { #ifdef USE_REGIONS - assert (low == 0); - assert (high == 0); - if (is_in_heap_range (o)) + if ((o >= low) && (o < high)) { - BOOL already_marked = marked (o); - if (already_marked) - return FALSE; - if (condemned_gen == max_generation) + if (condemned_gen != max_generation && get_region_gen_num (o) > condemned_gen) { - set_marked (o); - return TRUE; + return FALSE; } - int gen = get_region_gen_num (o); - if (gen <= condemned_gen) + BOOL already_marked = marked (o); + if (already_marked) { - set_marked (o); - return TRUE; + return FALSE; } + set_marked(o); + return TRUE; } return FALSE; #else //USE_REGIONS @@ -24243,7 +24343,7 @@ inline void gc_heap::mark_object (uint8_t* o THREAD_NUMBER_DCL) { #ifdef USE_REGIONS - if (is_in_heap_range (o) && is_in_condemned_gc (o)) + if (is_in_gc_range (o) && is_in_condemned_gc (o)) { mark_object_simple (&o THREAD_NUMBER_ARG); } @@ -25655,10 +25755,15 @@ void gc_heap::verify_region_to_generation_map() int gen_num = min (gen_number, soh_gen2); assert (gen_num == heap_segment_gen_num (region)); int plan_gen_num = heap_segment_plan_gen_num (region); + bool is_demoted = (region->flags & heap_segment_flags_demoted) != 0; + bool is_sweep_in_plan = heap_segment_swept_in_plan (region); for (size_t region_index = region_index_start; region_index < region_index_end; region_index++) { - assert ((map_region_to_generation[region_index] & 0x0f) == gen_num); - assert ((map_region_to_generation[region_index] >> 4) == plan_gen_num); + region_info region_info_bits = map_region_to_generation[region_index]; + assert ((region_info_bits & RI_GEN_MASK) == gen_num); + assert ((region_info_bits >> RI_PLAN_GEN_SHR) == plan_gen_num); + assert (((region_info_bits & RI_SIP) != 0) == is_sweep_in_plan); + assert (((region_info_bits & RI_DEMOTED) != 0) == is_demoted); } } } @@ -25668,10 +25773,17 @@ void gc_heap::verify_region_to_generation_map() // recompute ephemeral range - it may have become too large because of temporary allocation // and deallocation of regions -void gc_heap::compute_ephemeral_range() +void gc_heap::compute_gc_and_ephemeral_range (int condemned_gen_number) { ephemeral_low = MAX_PTR; ephemeral_high = nullptr; + gc_low = MAX_PTR; + gc_high = nullptr; + if (condemned_gen_number >= soh_gen2) + { + gc_low = g_gc_lowest_address; + gc_high = g_gc_highest_address; + } for (int gen_number = soh_gen0; gen_number <= soh_gen1; gen_number++) { #ifdef MULTIPLE_HEAPS @@ -25687,11 +25799,17 @@ void gc_heap::compute_ephemeral_range() { ephemeral_low = min (ephemeral_low, get_region_start (region)); ephemeral_high = max (ephemeral_high, heap_segment_reserved (region)); + if (gen_number <= condemned_gen_number) + { + gc_low = min (gc_low, get_region_start (region)); + gc_high = max (gc_high, heap_segment_reserved (region)); + } } } } + dprintf (2, ("ephemeral_low = %Ix, ephemeral_high = %Ix, gc_low = %Ix, gc_high = %Ix", (uint8_t*)ephemeral_low, (uint8_t*)ephemeral_high, gc_low, gc_high)); } -#endif +#endif //USE_REGIONS void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) { @@ -25827,7 +25945,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) region_count = global_region_allocator.get_used_region_count(); grow_mark_list_piece(); verify_region_to_generation_map(); - compute_ephemeral_range(); + compute_gc_and_ephemeral_range (condemned_gen_number); #endif //USE_REGIONS GCToEEInterface::BeforeGcScanRoots(condemned_gen_number, /* is_bgc */ false, /* is_concurrent */ false); @@ -30231,11 +30349,6 @@ void gc_heap::plan_phase (int condemned_gen_number) } #endif //!USE_REGIONS -#ifdef USE_REGIONS - verify_region_to_generation_map (); - compute_ephemeral_range(); -#endif //USE_REGIONS - #ifdef MULTIPLE_HEAPS //join all threads to make sure they are synchronized dprintf(3, ("Restarting after Promotion granted")); @@ -30539,8 +30652,8 @@ heap_segment* gc_heap::find_first_valid_region (heap_segment* region, bool compa } // Take this opportunity to make sure all the regions left with flags only for this GC are reset. - heap_segment_swept_in_plan (current_region) = false; - current_region->flags &= ~heap_segment_flags_demoted; + clear_region_sweep_in_plan (current_region); + clear_region_demoted (current_region); return current_region; } @@ -30953,7 +31066,7 @@ void gc_heap::sweep_region_in_plan (heap_segment* region, uint8_t**& mark_list_next, uint8_t** mark_list_index) { - heap_segment_swept_in_plan (region) = true; + set_region_sweep_in_plan (region); region->init_free_list(); @@ -31084,9 +31197,9 @@ inline void gc_heap::check_demotion_helper_sip (uint8_t** pval, int parent_gen_num, uint8_t* parent_loc) { uint8_t* child_object = *pval; - if (!is_in_heap_range (child_object)) + if (!is_in_gc_range (child_object)) return; - if (!child_object) return; + assert (child_object != nullptr); int child_object_plan_gen = get_region_plan_gen_num (child_object); if (child_object_plan_gen < parent_gen_num) @@ -31630,7 +31743,7 @@ void gc_heap::relocate_address (uint8_t** pold_address THREAD_NUMBER_DCL) { uint8_t* old_address = *pold_address; #ifdef USE_REGIONS - if (!is_in_heap_range (old_address) || !should_check_brick_for_reloc (old_address)) + if (!is_in_gc_range (old_address) || !should_check_brick_for_reloc (old_address)) { return; } @@ -31770,7 +31883,7 @@ gc_heap::check_demotion_helper (uint8_t** pval, uint8_t* parent_obj) { #ifdef USE_REGIONS uint8_t* child_object = *pval; - if (!is_in_heap_range (child_object)) + if (!is_in_gc_range (child_object)) return; int child_object_plan_gen = get_region_plan_gen_num (child_object); bool child_obj_demoted_p = is_region_demoted (child_object); @@ -32603,7 +32716,6 @@ void gc_heap::relocate_phase (int condemned_gen_number, #ifdef USE_REGIONS verify_region_to_generation_map(); - compute_ephemeral_range(); #endif //USE_REGIONS #ifdef MULTIPLE_HEAPS @@ -37109,8 +37221,7 @@ gc_heap::mark_through_cards_helper (uint8_t** poo, size_t& n_gen, if ((child_object < ephemeral_low) || (ephemeral_high <= child_object)) return; - int child_object_gen = get_gen_num_for_address (child_object); - assert (child_object_gen == get_region_gen_num (child_object)); + int child_object_gen = get_region_gen_num (child_object); int saved_child_object_gen = child_object_gen; uint8_t* saved_child_object = child_object; @@ -37122,8 +37233,7 @@ gc_heap::mark_through_cards_helper (uint8_t** poo, size_t& n_gen, if (fn == &gc_heap::relocate_address) { - child_object_gen = get_plan_gen_num_for_address (*poo); - assert (child_object_gen == get_region_plan_gen_num (*poo)); + child_object_gen = get_region_plan_gen_num (*poo); } if (child_object_gen < current_gen) @@ -44588,7 +44698,7 @@ bool GCHeap::IsPromoted(Object* object) else { #ifdef USE_REGIONS - return (is_in_heap_range (o) ? (gc_heap::is_in_condemned_gc (o) ? gc_heap::is_mark_set (o) : true) : true); + return (gc_heap::is_in_gc_range (o) ? (gc_heap::is_in_condemned_gc (o) ? gc_heap::is_mark_set (o) : true) : true); #else gc_heap* hp = gc_heap::heap_of (o); return (!((o < hp->gc_high) && (o >= hp->gc_low)) diff --git a/src/coreclr/gc/gcconfig.h b/src/coreclr/gc/gcconfig.h index ab59f6a687aab5..e3666f96e8e061 100644 --- a/src/coreclr/gc/gcconfig.h +++ b/src/coreclr/gc/gcconfig.h @@ -134,7 +134,7 @@ class GCConfigStringHolder INT_CONFIG (GCHeapHardLimitPOHPercent, "GCHeapHardLimitPOHPercent", "System.GC.HeapHardLimitPOHPercent", 0, "Specifies the GC heap POH usage as a percentage of the total memory") \ INT_CONFIG (GCEnabledInstructionSets, "GCEnabledInstructionSets", NULL, -1, "Specifies whether GC can use AVX2 or AVX512F - 0 for neither, 1 for AVX2, 3 for AVX512F")\ INT_CONFIG (GCConserveMem, "GCConserveMemory", NULL, 0, "Specifies how hard GC should try to conserve memory - values 0-9") \ - INT_CONFIG (GCBitwiseWriteBarrier, "GCBitwiseWriteBarrier", NULL, 0, "Specifies whether GC should use more precise but slower write barrier") \ + INT_CONFIG (GCWriteBarrier, "GCWriteBarrier", NULL, 0, "Specifies whether GC should use more precise but slower write barrier") \ // This class is responsible for retreiving configuration information // for how the GC should operate. @@ -174,6 +174,14 @@ enum HeapVerifyFlags { HEAPVERIFY_DEEP_ON_COMPACT = 0x80 // Performs deep object verfication only on compacting GCs. }; +enum WriteBarrierFlavor +{ + WRITE_BARRIER_DEFAULT = 0, + WRITE_BARRIER_REGION_BIT = 1, + WRITE_BARRIER_REGION_BYTE = 2, + WRITE_BARRIER_SERVER = 3, +}; + // Initializes the GCConfig subsystem. Must be called before accessing any // configuration information. static void Initialize(); diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h index fccae48fc3a7c5..7257d9e9f26d7b 100644 --- a/src/coreclr/gc/gcpriv.h +++ b/src/coreclr/gc/gcpriv.h @@ -52,7 +52,7 @@ inline void FATAL_GC_ERROR() // This means any empty regions can be freely used for any generation. For // Server GC we will balance regions between heaps. // For now disable regions for StandAlone GC, NativeAOT and MacOS builds -#if defined (HOST_64BIT) && !defined (BUILD_AS_STANDALONE) && !defined(__APPLE__) && !defined(FEATURE_NATIVEAOT) +#if defined (HOST_64BIT) && !defined(__APPLE__) && !defined(FEATURE_NATIVEAOT) #define USE_REGIONS #endif //HOST_64BIT && BUILD_AS_STANDALONE @@ -1377,6 +1377,12 @@ class gc_heap PER_HEAP void set_region_plan_gen_num_sip (heap_segment* region, int plan_gen_num); PER_HEAP + void set_region_sweep_in_plan (heap_segment* region); + PER_HEAP + void clear_region_sweep_in_plan (heap_segment* region); + PER_HEAP + void clear_region_demoted (heap_segment* region); + PER_HEAP void decide_on_demotion_pin_surv (heap_segment* region); PER_HEAP void skip_pins_in_alloc_region (generation* consing_gen, int plan_gen_num); @@ -1449,7 +1455,7 @@ class gc_heap void verify_region_to_generation_map(); PER_HEAP_ISOLATED - void compute_ephemeral_range(); + void compute_gc_and_ephemeral_range (int condemned_gen_number); #ifdef STRESS_REGIONS PER_HEAP void pin_by_gc (uint8_t* object); @@ -3084,6 +3090,8 @@ class gc_heap #endif //BACKGROUND_GC #ifdef USE_REGIONS + PER_HEAP_ISOLATED + bool is_in_gc_range (uint8_t* o); // o is guaranteed to be in the heap range. PER_HEAP_ISOLATED bool is_in_condemned_gc (uint8_t* o); @@ -3679,12 +3687,34 @@ class gc_heap // there are actually two generation numbers per entry: // - the region's current generation // - the region's planned generation, i.e. after the GC + // and there are flags + // - whether the region is sweep in plan + // - and whether the region is demoted + enum region_info : uint8_t + { + // lowest 2 bits are current generation number + RI_GEN_0 = 0x0, + RI_GEN_1 = 0x1, + RI_GEN_2 = 0x2, + RI_GEN_MASK = 0x3, + + // we have 4 bits available for flags, of which 2 are used + RI_SIP = 0x4, + RI_DEMOTED = 0x8, + + // top 2 bits are planned generation number + RI_PLAN_GEN_SHR = 0x6, // how much to shift the value right to obtain plan gen + RI_PLAN_GEN_0 = 0x00, + RI_PLAN_GEN_1 = 0x40, + RI_PLAN_GEN_2 = 0x80, + RI_PLAN_GEN_MASK= 0xC0, + }; PER_HEAP_ISOLATED - uint8_t* map_region_to_generation; + region_info* map_region_to_generation; // same table as above, but skewed so that we can index // directly with address >> min_segment_size_shr PER_HEAP_ISOLATED - uint8_t* map_region_to_generation_skewed; + region_info* map_region_to_generation_skewed; #endif //USE_REGIONS #define max_oom_history_count 4 @@ -3737,10 +3767,10 @@ class gc_heap #ifdef USE_REGIONS PER_HEAP_ISOLATED - uint8_t* ephemeral_low; //lowest ephemeral address + VOLATILE(uint8_t*) ephemeral_low; //lowest ephemeral address PER_HEAP_ISOLATED - uint8_t* ephemeral_high; //highest ephemeral address + VOLATILE(uint8_t*) ephemeral_high; //highest ephemeral address #else //!USE_REGIONS PER_HEAP uint8_t* ephemeral_low; //lowest ephemeral address @@ -4102,13 +4132,19 @@ class gc_heap PER_HEAP uint64_t time_bgc_last; -//#ifndef USE_REGIONS +#ifdef USE_REGIONS + PER_HEAP_ISOLATED + uint8_t* gc_low; // low end of the lowest region being condemned + + PER_HEAP_ISOLATED + uint8_t* gc_high; // high end of the highest region being condemned +#else // USE_REGIONS PER_HEAP uint8_t* gc_low; // lowest address being condemned PER_HEAP uint8_t* gc_high; // highest address being condemned -//#endif //USE_REGIONS +#endif //USE_REGIONS PER_HEAP size_t mark_stack_tos; diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index 4ea80159c2ea33..481b8c0cdb0402 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -345,7 +345,7 @@ ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP ; check if the source is in gen 2 - then it's not an ephemeral pointer shr rax, cl add rax, [g_region_to_generation_table] - cmp byte ptr [rax], 22h + cmp byte ptr [rax], 82h je Exit ; check if the destination happens to be in gen 0 diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index 6823ea4474fc0d..10519dab408ce2 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -366,7 +366,7 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT shr rax, cl PREPARE_EXTERNAL_VAR g_region_to_generation_table, r10 mov r10, [r10] - cmp byte ptr [rax + r10], 0x22 + cmp byte ptr [rax + r10], 0x82 je Exit_ByRefWriteBarrier // check if the destination happens to be in gen 0 From 653db9c950f38aa2e32c5d0877719c721e0fd1e0 Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Mon, 25 Jul 2022 14:40:31 +0200 Subject: [PATCH 19/23] Revert hack to enable regions for clrc. --- src/coreclr/gc/gcpriv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h index 7257d9e9f26d7b..f650d4a03230b4 100644 --- a/src/coreclr/gc/gcpriv.h +++ b/src/coreclr/gc/gcpriv.h @@ -52,7 +52,7 @@ inline void FATAL_GC_ERROR() // This means any empty regions can be freely used for any generation. For // Server GC we will balance regions between heaps. // For now disable regions for StandAlone GC, NativeAOT and MacOS builds -#if defined (HOST_64BIT) && !defined(__APPLE__) && !defined(FEATURE_NATIVEAOT) +#if defined (HOST_64BIT) && !defined (BUILD_AS_STANDALONE) && !defined(__APPLE__) && !defined(FEATURE_NATIVEAOT) #define USE_REGIONS #endif //HOST_64BIT && BUILD_AS_STANDALONE From 2823b223d6c23e836da9089a0759f153107949da Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Mon, 1 Aug 2022 15:13:20 +0200 Subject: [PATCH 20/23] Fix logic error in check_demotion_helper(_sip): child_object is already relocated and thus shouldn't be tested against gc_low/gc_high. --- src/coreclr/gc/gc.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index eb174147114f14..b56736b2509ec8 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -31197,7 +31197,7 @@ inline void gc_heap::check_demotion_helper_sip (uint8_t** pval, int parent_gen_num, uint8_t* parent_loc) { uint8_t* child_object = *pval; - if (!is_in_gc_range (child_object)) + if (!is_in_heap_range (child_object)) return; assert (child_object != nullptr); int child_object_plan_gen = get_region_plan_gen_num (child_object); @@ -31883,7 +31883,7 @@ gc_heap::check_demotion_helper (uint8_t** pval, uint8_t* parent_obj) { #ifdef USE_REGIONS uint8_t* child_object = *pval; - if (!is_in_gc_range (child_object)) + if (!is_in_heap_range (child_object)) return; int child_object_plan_gen = get_region_plan_gen_num (child_object); bool child_obj_demoted_p = is_region_demoted (child_object); From ecf265be127ee125609d3a3e4cb6520193655338 Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Wed, 3 Aug 2022 15:37:40 +0200 Subject: [PATCH 21/23] Use conservative values for the ephemeral range so the write barrier doesn't need to be updated between GCs. Removed file name argument to _ASSERTE_ALL_BUILDS macro. --- src/coreclr/gc/gc.cpp | 62 ++++++++---- src/coreclr/gc/gcpriv.h | 2 +- src/coreclr/vm/amd64/jitinterfaceamd64.cpp | 108 ++++++++++----------- 3 files changed, 98 insertions(+), 74 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index dd92c238419ec4..ebebab1967e12c 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -21378,7 +21378,7 @@ void gc_heap::gc1() #ifdef USE_REGIONS distribute_free_regions(); verify_region_to_generation_map (); - compute_gc_and_ephemeral_range (settings.condemned_generation); + compute_gc_and_ephemeral_range (settings.condemned_generation, true); stomp_write_barrier_ephemeral (ephemeral_low, ephemeral_high, map_region_to_generation_skewed, (uint8_t)min_segment_size_shr); #endif //USE_REGIONS @@ -21446,7 +21446,7 @@ void gc_heap::gc1() #ifdef USE_REGIONS distribute_free_regions(); verify_region_to_generation_map (); - compute_gc_and_ephemeral_range (settings.condemned_generation); + compute_gc_and_ephemeral_range (settings.condemned_generation, true); stomp_write_barrier_ephemeral (ephemeral_low, ephemeral_high, map_region_to_generation_skewed, (uint8_t)min_segment_size_shr); if (settings.condemned_generation == max_generation) @@ -25886,36 +25886,56 @@ void gc_heap::verify_region_to_generation_map() // recompute ephemeral range - it may have become too large because of temporary allocation // and deallocation of regions -void gc_heap::compute_gc_and_ephemeral_range (int condemned_gen_number) +void gc_heap::compute_gc_and_ephemeral_range (int condemned_gen_number, bool end_of_gc_p) { ephemeral_low = MAX_PTR; ephemeral_high = nullptr; gc_low = MAX_PTR; gc_high = nullptr; - if (condemned_gen_number >= soh_gen2) + if (condemned_gen_number >= soh_gen2 || end_of_gc_p) { gc_low = g_gc_lowest_address; gc_high = g_gc_highest_address; } - for (int gen_number = soh_gen0; gen_number <= soh_gen1; gen_number++) + if (end_of_gc_p) { -#ifdef MULTIPLE_HEAPS - for (int i = 0; i < n_heaps; i++) + // scan our address space for a region that is either free + // or in an ephemeral generation + uint8_t* addr = g_gc_lowest_address; + while (true) { - gc_heap* hp = g_heaps[i]; -#else //MULTIPLE_HEAPS + heap_segment* region = get_region_info (addr); + if (is_free_region (region)) + break; + if (heap_segment_gen_num (region) <= soh_gen1) + break; + addr += ((size_t)1) << min_segment_size_shr; + } + ephemeral_low = addr; + ephemeral_high = g_gc_highest_address; + } + else + { + for (int gen_number = soh_gen0; gen_number <= soh_gen1; gen_number++) { - gc_heap* hp = pGenGCHeap; -#endif //MULTIPLE_HEAPS - generation *gen = hp->generation_of (gen_number); - for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region)) + #ifdef MULTIPLE_HEAPS + for (int i = 0; i < n_heaps; i++) + { + gc_heap* hp = g_heaps[i]; + #else //MULTIPLE_HEAPS { - ephemeral_low = min (ephemeral_low, get_region_start (region)); - ephemeral_high = max (ephemeral_high, heap_segment_reserved (region)); - if (gen_number <= condemned_gen_number) + gc_heap* hp = pGenGCHeap; + #endif //MULTIPLE_HEAPS + generation *gen = hp->generation_of (gen_number); + for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region)) { - gc_low = min (gc_low, get_region_start (region)); - gc_high = max (gc_high, heap_segment_reserved (region)); + ephemeral_low = min (ephemeral_low, get_region_start (region)); + ephemeral_high = max (ephemeral_high, heap_segment_reserved (region)); + if (gen_number <= condemned_gen_number) + { + gc_low = min (gc_low, get_region_start (region)); + gc_high = max (gc_high, heap_segment_reserved (region)); + } } } } @@ -26058,7 +26078,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) region_count = global_region_allocator.get_used_region_count(); grow_mark_list_piece(); verify_region_to_generation_map(); - compute_gc_and_ephemeral_range (condemned_gen_number); + compute_gc_and_ephemeral_range (condemned_gen_number, false); #endif //USE_REGIONS GCToEEInterface::BeforeGcScanRoots(condemned_gen_number, /* is_bgc */ false, /* is_concurrent */ false); @@ -30527,6 +30547,10 @@ void gc_heap::plan_phase (int condemned_gen_number) } #endif //!USE_REGIONS +#ifdef USE_REGIONS + verify_region_to_generation_map (); +#endif //USE_REGIONS + #ifdef MULTIPLE_HEAPS //join all threads to make sure they are synchronized dprintf(3, ("Restarting after Promotion granted")); diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h index 967510e26eb8b3..4c56fd886b6cc3 100644 --- a/src/coreclr/gc/gcpriv.h +++ b/src/coreclr/gc/gcpriv.h @@ -1459,7 +1459,7 @@ class gc_heap void verify_region_to_generation_map(); PER_HEAP_ISOLATED - void compute_gc_and_ephemeral_range (int condemned_gen_number); + void compute_gc_and_ephemeral_range (int condemned_gen_number, bool end_of_gc_p); #ifdef STRESS_REGIONS PER_HEAP void pin_by_gc (uint8_t* object); diff --git a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp index 4e93c64306fb3c..08d5e9493f6c22 100644 --- a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp +++ b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp @@ -205,33 +205,33 @@ void WriteBarrierManager::Validate() #endif // FEATURE_SVR_GC PBYTE pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_RegionToGeneration, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_Lower, 2); pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_Upper, 2); pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_CardTable, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardTableImmediate) & 0x7) == 0); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); #endif pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_RegionToGeneration, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_Lower, 2); pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_Upper, 2); pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_CardTable, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardTableImmediate) & 0x7) == 0); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); #endif #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP @@ -278,33 +278,33 @@ void WriteBarrierManager::Validate() #endif // FEATURE_SVR_GC pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_RegionToGeneration, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_Lower, 2); pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_Upper, 2); pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_CardTable, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardTableImmediate) & 0x7) == 0); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); #endif pRegionToGenTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_RegionToGeneration, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pRegionToGenTableImmediate) & 0x7) == 0); pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_Lower, 2); pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_Upper, 2); pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_CardTable, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pLowerBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pUpperBoundImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardTableImmediate) & 0x7) == 0); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); + _ASSERTE_ALL_BUILDS((reinterpret_cast(pCardBundleTableImmediate) & 0x7) == 0); #endif #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP @@ -489,16 +489,16 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_CardTable, 2); // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrDest); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrSrc); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); + _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrDest); + _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrSrc); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Byte_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); #endif break; @@ -511,16 +511,16 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_CardTable, 2); // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrDest); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrSrc); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); + _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrDest); + _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrSrc); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_Bit_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); #endif break; @@ -591,17 +591,17 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_CardTable, 2); // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrDest); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrSrc); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); + _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrDest); + _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrSrc); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Byte_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); #endif break; @@ -615,17 +615,17 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, m_pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_CardTable, 2); // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0). - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrDest); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0x16 == *(UINT8 *)m_pRegionShrSrc); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pRegionToGenTableImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate); + _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrDest); + _ASSERTE_ALL_BUILDS( 0x16 == *(UINT8 *)m_pRegionShrSrc); #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_Bit_Region64, Patch_Label_CardBundleTable, 2); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); + _ASSERTE_ALL_BUILDS(0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate); #endif break; @@ -664,16 +664,16 @@ void WriteBarrierManager::Initialize() #ifdef FEATURE_SVR_GC _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_SVR64)); #endif // FEATURE_SVR_GC - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_BYTE_REGIONS64)); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_BIT_REGIONS64)); + _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_BYTE_REGIONS64)); + _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_BIT_REGIONS64)); #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_PREGROW64)); _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_POSTGROW64)); #ifdef FEATURE_SVR_GC _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_SVR64)); #endif // FEATURE_SVR_GC - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64)); - _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64)); + _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_BYTE_REGIONS64)); + _ASSERTE_ALL_BUILDS(cbWriteBarrierBuffer >= GetSpecificWriteBarrierSize(WRITE_BARRIER_WRITE_WATCH_BIT_REGIONS64)); #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP #if !defined(CODECOVERAGE) From 88b3313e3293a14a23db0e562c1bfe6c962dab1b Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Wed, 3 Aug 2022 16:35:52 +0200 Subject: [PATCH 22/23] Fix Linux build issue with Volatile. --- src/coreclr/gc/gc.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index ebebab1967e12c..a81caadcd35d3b 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -11459,8 +11459,8 @@ void gc_heap::set_region_gen_num (heap_segment* region, int gen_num) if ((region_start < ephemeral_low) || (ephemeral_high < region_end)) { - uint8_t* new_ephemeral_low = min (region_start, ephemeral_low); - uint8_t* new_ephemeral_high = max (region_end, ephemeral_high); + uint8_t* new_ephemeral_low = min (region_start, ephemeral_low.Load()); + uint8_t* new_ephemeral_high = max (region_end, ephemeral_high.Load()); dprintf (REGIONS_LOG, ("about to set ephemeral_low = %Ix ephemeral_high = %Ix", new_ephemeral_low, new_ephemeral_high)); @@ -25929,8 +25929,8 @@ void gc_heap::compute_gc_and_ephemeral_range (int condemned_gen_number, bool end generation *gen = hp->generation_of (gen_number); for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region)) { - ephemeral_low = min (ephemeral_low, get_region_start (region)); - ephemeral_high = max (ephemeral_high, heap_segment_reserved (region)); + ephemeral_low = min (ephemeral_low.Load(), get_region_start (region)); + ephemeral_high = max (ephemeral_high.Load(), heap_segment_reserved (region)); if (gen_number <= condemned_gen_number) { gc_low = min (gc_low, get_region_start (region)); From a840b9bc1ad17335160368a770004e3928b2d359 Mon Sep 17 00:00:00 2001 From: Peter Sollich Date: Thu, 4 Aug 2022 10:16:37 +0200 Subject: [PATCH 23/23] Fix build issue on Windows - had overlooked the fact that on Windows, Volatile expands to T volatile. Use fixed ephemeral bounds for now, but keep more sophisticated code for setting ephemeral_low around. --- src/coreclr/gc/gc.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index a81caadcd35d3b..9cf8e16b9e6f41 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -11459,8 +11459,8 @@ void gc_heap::set_region_gen_num (heap_segment* region, int gen_num) if ((region_start < ephemeral_low) || (ephemeral_high < region_end)) { - uint8_t* new_ephemeral_low = min (region_start, ephemeral_low.Load()); - uint8_t* new_ephemeral_high = max (region_end, ephemeral_high.Load()); + uint8_t* new_ephemeral_low = min (region_start, (uint8_t*)ephemeral_low); + uint8_t* new_ephemeral_high = max (region_end, (uint8_t*)ephemeral_high); dprintf (REGIONS_LOG, ("about to set ephemeral_low = %Ix ephemeral_high = %Ix", new_ephemeral_low, new_ephemeral_high)); @@ -25899,6 +25899,12 @@ void gc_heap::compute_gc_and_ephemeral_range (int condemned_gen_number, bool end } if (end_of_gc_p) { +#if 1 + // simple and safe value + ephemeral_low = g_gc_lowest_address; +#else + // conservative value - should still avoid changing + // ephemeral bounds in the write barrier while app is running // scan our address space for a region that is either free // or in an ephemeral generation uint8_t* addr = g_gc_lowest_address; @@ -25912,6 +25918,7 @@ void gc_heap::compute_gc_and_ephemeral_range (int condemned_gen_number, bool end addr += ((size_t)1) << min_segment_size_shr; } ephemeral_low = addr; +#endif ephemeral_high = g_gc_highest_address; } else @@ -25929,8 +25936,8 @@ void gc_heap::compute_gc_and_ephemeral_range (int condemned_gen_number, bool end generation *gen = hp->generation_of (gen_number); for (heap_segment *region = generation_start_segment (gen); region != nullptr; region = heap_segment_next (region)) { - ephemeral_low = min (ephemeral_low.Load(), get_region_start (region)); - ephemeral_high = max (ephemeral_high.Load(), heap_segment_reserved (region)); + ephemeral_low = min ((uint8_t*)ephemeral_low, get_region_start (region)); + ephemeral_high = max ((uint8_t*)ephemeral_high, heap_segment_reserved (region)); if (gen_number <= condemned_gen_number) { gc_low = min (gc_low, get_region_start (region));