From 0a70d2a7b0d8613c4b3d14d56a2f444505c13877 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Wed, 29 Jan 2025 17:26:55 -0500 Subject: [PATCH 01/20] Reorder block list only once after 3-opt --- src/coreclr/jit/compiler.cpp | 25 +----- src/coreclr/jit/compiler.h | 10 +-- src/coreclr/jit/fgopt.cpp | 168 +++++++++++++++++++++-------------- src/coreclr/jit/jiteh.cpp | 57 +++--------- 4 files changed, 120 insertions(+), 140 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 33dda8c734ca1b..2cb2d0fd61d30d 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5233,30 +5233,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // if (JitConfig.JitDoReversePostOrderLayout()) { - auto lateLayoutPhase = [this] { - // Skip preliminary reordering passes to create more work for 3-opt layout - if (compStressCompile(STRESS_THREE_OPT_LAYOUT, 10)) - { - m_dfsTree = fgComputeDfs(); - } - else - { - fgDoReversePostOrderLayout(); - fgMoveColdBlocks(); - } - - fgSearchImprovedLayout(); - fgInvalidateDfsTree(); - - if (compHndBBtabCount != 0) - { - fgRebuildEHRegions(); - } - - return PhaseStatus::MODIFIED_EVERYTHING; - }; - - DoPhase(this, PHASE_OPTIMIZE_LAYOUT, lateLayoutPhase); + DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::fgSearchImprovedLayout); } // Now that the flowgraph is finalized, run post-layout optimizations. diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4a639c7e639f9e..84d865dc374cd4 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3048,7 +3048,7 @@ class Compiler void fgSetHndEnd(EHblkDsc* handlerTab, BasicBlock* newHndLast); - void fgRebuildEHRegions(); + void fgFindEHRegionEnds(); void fgSkipRmvdBlocks(EHblkDsc* handlerTab); @@ -6315,7 +6315,7 @@ class Compiler bool fgReorderBlocks(bool useProfile); void fgDoReversePostOrderLayout(); void fgMoveColdBlocks(); - void fgSearchImprovedLayout(); + PhaseStatus fgSearchImprovedLayout(); class ThreeOptLayout { @@ -6341,11 +6341,11 @@ class Compiler void AddNonFallthroughPreds(unsigned blockPos); bool RunGreedyThreeOptPass(unsigned startPos, unsigned endPos); - bool RunThreeOptPass(BasicBlock* startBlock, BasicBlock* endBlock); + void RunThreeOptPass(BasicBlock* startBlock, BasicBlock* endBlock); public: - ThreeOptLayout(Compiler* comp); - void Run(); + ThreeOptLayout(Compiler* comp, BasicBlock** hotBlocks, unsigned numHotBlocks); + bool Run(); }; template diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 244d8bcd9efc1f..86e816b3a7fdea 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -4454,6 +4454,7 @@ bool Compiler::fgReorderBlocks(bool useProfile) template void Compiler::fgMoveHotJumps() { + return; #ifdef DEBUG if (verbose) { @@ -4924,13 +4925,19 @@ void Compiler::fgMoveColdBlocks() // // Parameters: // comp - The Compiler instance +// hotBlocks - An array of the blocks worth reordering +// numHotBlocks - The number of blocks in 'hotBlocks' // -Compiler::ThreeOptLayout::ThreeOptLayout(Compiler* comp) +// Notes: +// To save an allocation, we will reuse the DFS tree's underlying array for 'tempOrder'. +// This means we will trash the DFS tree. +// +Compiler::ThreeOptLayout::ThreeOptLayout(Compiler* comp, BasicBlock** hotBlocks, unsigned numHotBlocks) : compiler(comp) , cutPoints(comp->getAllocator(CMK_FlowEdge), &ThreeOptLayout::EdgeCmp) - , blockOrder(nullptr) - , tempOrder(nullptr) - , numCandidateBlocks(0) + , blockOrder(hotBlocks) + , tempOrder(comp->m_dfsTree->GetPostOrder()) + , numCandidateBlocks(numHotBlocks) , currEHRegion(0) { } @@ -5228,49 +5235,21 @@ void Compiler::ThreeOptLayout::AddNonFallthroughPreds(unsigned blockPos) // we're interested in reordering. // We skip reordering handler regions for now, as these are assumed to be cold. // -void Compiler::ThreeOptLayout::Run() +// Returns: +// True if any blocks were moved +// +bool Compiler::ThreeOptLayout::Run() { - // Since we moved all cold blocks to the end of the method already, - // we should have a span of hot blocks to consider reordering at the beginning of the method - // (unless none of the blocks are cold relative to the rest of the method, - // in which case we will reorder the whole main method body). - BasicBlock* const finalBlock = (compiler->fgFirstColdBlock != nullptr) ? compiler->fgFirstColdBlock->Prev() - : compiler->fgLastBBInMainFunction(); - - // Reset cold section pointer, in case we decide to do hot/cold splitting later - compiler->fgFirstColdBlock = nullptr; + assert(numCandidateBlocks > 0); - // We better have an end block for the hot section, and it better not be the start of a call-finally pair. - assert(finalBlock != nullptr); - assert(!finalBlock->isBBCallFinallyPair()); - - // For methods with fewer than three candidate blocks, we cannot partition anything - if (finalBlock->IsFirst() || finalBlock->Prev()->IsFirst()) + // Initialize ordinals, and find EH region ends + for (unsigned i = 0; i < numCandidateBlocks; i++) { - JITDUMP("Not enough blocks to partition anything. Skipping 3-opt.\n"); - return; - } - - // Get an upper bound on the number of hot blocks without walking the whole block list. - // We will only consider blocks reachable via normal flow. - const unsigned numBlocksUpperBound = compiler->m_dfsTree->GetPostOrderCount(); - assert(numBlocksUpperBound != 0); - blockOrder = new (compiler, CMK_BasicBlock) BasicBlock*[numBlocksUpperBound * 2]; - tempOrder = (blockOrder + numBlocksUpperBound); - - // Initialize the current block order - for (BasicBlock* const block : compiler->Blocks(compiler->fgFirstBB, finalBlock)) - { - if (!compiler->m_dfsTree->Contains(block)) - { - continue; - } - - assert(numCandidateBlocks < numBlocksUpperBound); - blockOrder[numCandidateBlocks] = tempOrder[numCandidateBlocks] = block; + BasicBlock* const block = blockOrder[i]; + tempOrder[i] = block; // Repurpose 'bbPostorderNum' for the block's ordinal - block->bbPostorderNum = numCandidateBlocks++; + block->bbPostorderNum = i; // While walking the span of blocks to reorder, // remember where each try region ends within this span. @@ -5283,7 +5262,6 @@ void Compiler::ThreeOptLayout::Run() } // Reorder try regions first - bool modified = false; for (EHblkDsc* const HBtab : EHClauses(compiler)) { // If multiple region indices map to the same region, @@ -5298,32 +5276,55 @@ void Compiler::ThreeOptLayout::Run() if ((tryBeg->bbPostorderNum < numCandidateBlocks) && (blockOrder[tryBeg->bbPostorderNum] == tryBeg)) { JITDUMP("Running 3-opt for try region #%d\n", (currEHRegion - 1)); - modified |= RunThreeOptPass(tryBeg, HBtab->ebdTryLast); + RunThreeOptPass(tryBeg, HBtab->ebdTryLast); } } // Finally, reorder the main method body currEHRegion = 0; JITDUMP("Running 3-opt for main method body\n"); - modified |= RunThreeOptPass(compiler->fgFirstBB, blockOrder[numCandidateBlocks - 1]); + RunThreeOptPass(blockOrder[0], blockOrder[numCandidateBlocks - 1]); - if (modified) + bool modified = false; + for (unsigned i = 1; i < numCandidateBlocks; i++) { - for (unsigned i = 1; i < numCandidateBlocks; i++) + BasicBlock* const block = blockOrder[i - 1]; + BasicBlock* const next = blockOrder[i]; + + if (block->NextIs(next)) { - BasicBlock* const block = blockOrder[i - 1]; - BasicBlock* const next = blockOrder[i]; + continue; + } - // Only reorder within EH regions to maintain contiguity. - // TODO: Allow moving blocks in different regions when 'next' is the region entry. - // This would allow us to move entire regions up/down because of the contiguity requirement. - if (!block->NextIs(next) && BasicBlock::sameEHRegion(block, next)) - { - compiler->fgUnlinkBlock(next); - compiler->fgInsertBBafter(block, next); - } + // Only reorder within EH regions to maintain contiguity. + if (!BasicBlock::sameEHRegion(block, next)) + { + continue; + } + + // Don't break up call-finally pairs + if (block->isBBCallFinallyPair() || next->isBBCallFinallyPairTail()) + { + continue; + } + + modified = true; + + // Move call-finally pairs in tandem + if (next->isBBCallFinallyPair()) + { + BasicBlock* const callFinallyTail = next->Next(); + compiler->fgUnlinkRange(next, callFinallyTail); + compiler->fgMoveBlocksAfter(next, callFinallyTail, block); + } + else + { + compiler->fgUnlinkBlock(next); + compiler->fgInsertBBafter(block, next); } } + + return modified; } //----------------------------------------------------------------------------- @@ -5535,10 +5536,7 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned // startBlock - The first block of the range to reorder // endBlock - The last block (inclusive) of the range to reorder // -// Returns: -// True if we reordered anything, false otherwise -// -bool Compiler::ThreeOptLayout::RunThreeOptPass(BasicBlock* startBlock, BasicBlock* endBlock) +void Compiler::ThreeOptLayout::RunThreeOptPass(BasicBlock* startBlock, BasicBlock* endBlock) { assert(startBlock != nullptr); assert(endBlock != nullptr); @@ -5551,7 +5549,7 @@ bool Compiler::ThreeOptLayout::RunThreeOptPass(BasicBlock* startBlock, BasicBloc if (numBlocks < 3) { JITDUMP("Not enough blocks to partition anything. Skipping reordering.\n"); - return false; + return; } JITDUMP("Initial layout cost: %f\n", GetLayoutCost(startPos, endPos)); @@ -5567,18 +5565,19 @@ bool Compiler::ThreeOptLayout::RunThreeOptPass(BasicBlock* startBlock, BasicBloc { JITDUMP("No changes made.\n"); } - - return modified; } //----------------------------------------------------------------------------- -// fgSearchImprovedLayout: Try to improve upon RPO-based layout with the 3-opt method: +// fgSearchImprovedLayout: Try to improve upon a RPO-based layout with the 3-opt method: // - Identify a range of hot blocks to reorder within // - Partition this set into three segments: S1 - S2 - S3 // - Evaluate cost of swapped layout: S1 - S3 - S2 // - If the cost improves, keep this layout // -void Compiler::fgSearchImprovedLayout() +// Returns: +// Suitable phase status +// +PhaseStatus Compiler::fgSearchImprovedLayout() { #ifdef DEBUG if (verbose) @@ -5591,8 +5590,41 @@ void Compiler::fgSearchImprovedLayout() } #endif // DEBUG - ThreeOptLayout layoutRunner(this); - layoutRunner.Run(); + // Before running 3-opt, compute a loop-aware RPO to get a sensible starting layout. + m_dfsTree = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + BasicBlock** const initialLayout = new (this, CMK_BasicBlock) BasicBlock*[m_dfsTree->GetPostOrderCount()]; + + // When walking the RPO-based layout, only add hot blocks to the initial layout. + // We don't want to waste time running 3-opt on cold blocks. + unsigned numHotBlocks = 0; + auto addToSequence = [this, initialLayout, &numHotBlocks](BasicBlock* block) { + if (!block->isBBWeightCold(this)) + { + initialLayout[numHotBlocks++] = block; + } + }; + + fgVisitBlocksInLoopAwareRPO(m_dfsTree, m_loops, addToSequence); + bool modified = false; + + if (numHotBlocks > 0) + { + ThreeOptLayout layoutRunner(this, initialLayout, numHotBlocks); + modified = layoutRunner.Run(); + + if (compHndBBtabCount > 0) + { + fgFindEHRegionEnds(); + } + } + else + { + JITDUMP("No hot blocks found. Skipping block reordering.\n"); + } + + fgInvalidateDfsTree(); + return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } //------------------------------------------------------------- diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp index 60c830aad8d592..b16fcab9d89790 100644 --- a/src/coreclr/jit/jiteh.cpp +++ b/src/coreclr/jit/jiteh.cpp @@ -1276,10 +1276,9 @@ void Compiler::fgSetHndEnd(EHblkDsc* handlerTab, BasicBlock* newHndLast) } //------------------------------------------------------------- -// fgRebuildEHRegions: After reordering blocks, make EH regions contiguous -// while maintaining relative block order, and update each region's end pointer. +// fgFindEHRegionEnds: Walk the block list, and set each try/handler region's end block. // -void Compiler::fgRebuildEHRegions() +void Compiler::fgFindEHRegionEnds() { assert(compHndBBtabCount != 0); unsigned unsetTryEnds = compHndBBtabCount; @@ -1292,46 +1291,6 @@ void Compiler::fgRebuildEHRegions() HBtab->ebdHndLast = nullptr; } - // Walk the main method body, and move try blocks to re-establish contiguity. - for (BasicBlock *block = fgFirstBB, *next; block != fgFirstFuncletBB; block = next) - { - next = block->Next(); - EHblkDsc* HBtab = ehGetBlockTryDsc(block); - if (HBtab != nullptr) - { - // Move this block up to the previous block in the same try region. - BasicBlock* const insertionPoint = HBtab->ebdTryLast; - if ((insertionPoint != nullptr) && !insertionPoint->NextIs(block)) - { - assert(block != HBtab->ebdTryLast); - fgUnlinkBlock(block); - fgInsertBBafter(HBtab->ebdTryLast, block); - } - - // Update this try region's (and all parent try regions') end pointer with the last block visited - for (unsigned XTnum = block->getTryIndex(); XTnum != EHblkDsc::NO_ENCLOSING_INDEX; - XTnum = ehGetEnclosingTryIndex(XTnum)) - { - HBtab = ehGetDsc(XTnum); - if (HBtab->ebdTryLast == nullptr) - { - assert(HBtab->ebdTryBeg == block); - assert(unsetTryEnds != 0); - unsetTryEnds--; - HBtab->ebdTryLast = block; - } - else if (HBtab->ebdTryLast->NextIs(block)) - { - HBtab->ebdTryLast = block; - } - } - } - } - - // The above logic rebuilt the try regions in the main method body. - // Now, resolve the regions in the funclet section, if there is one. - assert((unsetTryEnds == 0) || (fgFirstFuncletBB != nullptr)); - // Updates the try region's (and all of its parent regions') end block to 'block,' // if the try region's end block hasn't been updated yet. auto setTryEnd = [this, &unsetTryEnds](BasicBlock* block) { @@ -1372,6 +1331,18 @@ void Compiler::fgRebuildEHRegions() } }; + // Iterate backwards through the main method body, and update each try region's end block + for (BasicBlock* block = fgLastBBInMainFunction(); (unsetTryEnds != 0) && (block != nullptr); block = block->Prev()) + { + if (block->hasTryIndex()) + { + setTryEnd(block); + } + } + + // If we don't have a funclet section, then all of the try regions should have been updated above + assert((unsetTryEnds == 0) || (fgFirstFuncletBB != nullptr)); + // If we have a funclet section, update the ends of any try regions nested in funclets for (BasicBlock* block = fgLastBB; (unsetTryEnds != 0) && (block != fgLastBBInMainFunction()); block = block->Prev()) From 25389c94cd57234c892477392f94c0d9fb852997 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Wed, 29 Jan 2025 19:20:12 -0500 Subject: [PATCH 02/20] Fix block insertion --- src/coreclr/jit/fgopt.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 86e816b3a7fdea..c51b568f862642 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -4454,7 +4454,6 @@ bool Compiler::fgReorderBlocks(bool useProfile) template void Compiler::fgMoveHotJumps() { - return; #ifdef DEBUG if (verbose) { @@ -5290,6 +5289,7 @@ bool Compiler::ThreeOptLayout::Run() { BasicBlock* const block = blockOrder[i - 1]; BasicBlock* const next = blockOrder[i]; + assert(block != next); if (block->NextIs(next)) { @@ -5314,8 +5314,11 @@ bool Compiler::ThreeOptLayout::Run() if (next->isBBCallFinallyPair()) { BasicBlock* const callFinallyTail = next->Next(); - compiler->fgUnlinkRange(next, callFinallyTail); - compiler->fgMoveBlocksAfter(next, callFinallyTail, block); + if (callFinallyTail != block) + { + compiler->fgUnlinkRange(next, callFinallyTail); + compiler->fgMoveBlocksAfter(next, callFinallyTail, block); + } } else { From df253174645bf78479d11dad734db315697fae89 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 30 Jan 2025 10:12:36 -0500 Subject: [PATCH 03/20] Run 3-opt once for all regions --- src/coreclr/jit/fgopt.cpp | 43 ++++++++------------------------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index c51b568f862642..8281d9307def7a 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5131,7 +5131,7 @@ void Compiler::ThreeOptLayout::ConsiderEdge(FlowEdge* edge) BasicBlock* const dstBlk = edge->getDestinationBlock(); // Ignore cross-region branches - if ((srcBlk->bbTryIndex != currEHRegion) || (dstBlk->bbTryIndex != currEHRegion)) + if (!BasicBlock::sameTryRegion(srcBlk, dstBlk)) { return; } @@ -5249,39 +5249,8 @@ bool Compiler::ThreeOptLayout::Run() // Repurpose 'bbPostorderNum' for the block's ordinal block->bbPostorderNum = i; - - // While walking the span of blocks to reorder, - // remember where each try region ends within this span. - // We'll use this information to run 3-opt per region. - EHblkDsc* const HBtab = compiler->ehGetBlockTryDsc(block); - if (HBtab != nullptr) - { - HBtab->ebdTryLast = block; - } - } - - // Reorder try regions first - for (EHblkDsc* const HBtab : EHClauses(compiler)) - { - // If multiple region indices map to the same region, - // make sure we reorder its blocks only once - BasicBlock* const tryBeg = HBtab->ebdTryBeg; - if (tryBeg->getTryIndex() != currEHRegion++) - { - continue; - } - - // Only reorder try regions within the candidate span of blocks - if ((tryBeg->bbPostorderNum < numCandidateBlocks) && (blockOrder[tryBeg->bbPostorderNum] == tryBeg)) - { - JITDUMP("Running 3-opt for try region #%d\n", (currEHRegion - 1)); - RunThreeOptPass(tryBeg, HBtab->ebdTryLast); - } } - // Finally, reorder the main method body - currEHRegion = 0; - JITDUMP("Running 3-opt for main method body\n"); RunThreeOptPass(blockOrder[0], blockOrder[numCandidateBlocks - 1]); bool modified = false; @@ -5302,7 +5271,13 @@ bool Compiler::ThreeOptLayout::Run() continue; } - // Don't break up call-finally pairs + // Don't move the entry of an EH region. + if (compiler->bbIsTryBeg(next) || compiler->bbIsHandlerBeg(next)) + { + continue; + } + + // Don't break up call-finally pairs. if (block->isBBCallFinallyPair() || next->isBBCallFinallyPairTail()) { continue; @@ -5310,7 +5285,7 @@ bool Compiler::ThreeOptLayout::Run() modified = true; - // Move call-finally pairs in tandem + // Move call-finally pairs in tandem. if (next->isBBCallFinallyPair()) { BasicBlock* const callFinallyTail = next->Next(); From 46539d554d1082b4c60f27d29b72c6058ffb7a6a Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 30 Jan 2025 10:27:21 -0500 Subject: [PATCH 04/20] Remove some EH checks --- src/coreclr/jit/compiler.h | 1 - src/coreclr/jit/fgopt.cpp | 21 --------------------- 2 files changed, 22 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 84d865dc374cd4..1ac1505eb323f6 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6326,7 +6326,6 @@ class Compiler BasicBlock** blockOrder; BasicBlock** tempOrder; unsigned numCandidateBlocks; - unsigned currEHRegion; #ifdef DEBUG weight_t GetLayoutCost(unsigned startPos, unsigned endPos); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 8281d9307def7a..97e710e484b73a 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -4937,7 +4937,6 @@ Compiler::ThreeOptLayout::ThreeOptLayout(Compiler* comp, BasicBlock** hotBlocks, , blockOrder(hotBlocks) , tempOrder(comp->m_dfsTree->GetPostOrder()) , numCandidateBlocks(numHotBlocks) - , currEHRegion(0) { } @@ -5144,14 +5143,6 @@ void Compiler::ThreeOptLayout::ConsiderEdge(FlowEdge* edge) return; } - // For backward jumps, we will consider partitioning before 'srcBlk'. - // If 'srcBlk' is a BBJ_CALLFINALLYRET, this partition will split up a call-finally pair. - // Thus, don't consider edges out of BBJ_CALLFINALLYRET blocks. - if (srcBlk->KindIs(BBJ_CALLFINALLYRET)) - { - return; - } - const unsigned srcPos = srcBlk->bbPostorderNum; const unsigned dstPos = dstBlk->bbPostorderNum; assert(srcPos < compiler->m_dfsTree->GetPostOrderCount()); @@ -5439,18 +5430,6 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned BasicBlock* const s3Block = blockOrder[position]; BasicBlock* const s3BlockPrev = blockOrder[position - 1]; - // Don't consider any cut points that would break up call-finally pairs - if (s3Block->KindIs(BBJ_CALLFINALLYRET)) - { - continue; - } - - // Don't consider any cut points that would disturb other EH regions - if (!BasicBlock::sameEHRegion(s2Block, s3Block)) - { - continue; - } - // Compute the cost delta of this partition const weight_t currCost = currCostBase + GetCost(s3BlockPrev, s3Block); const weight_t newCost = From 84600ec3326f2caa8f86a79419f5aca566316101 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 30 Jan 2025 10:34:32 -0500 Subject: [PATCH 05/20] Remove unused layout methods --- src/coreclr/jit/compiler.h | 5 - src/coreclr/jit/fgopt.cpp | 439 ------------------------------------- src/coreclr/jit/lsra.cpp | 12 - 3 files changed, 456 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 1ac1505eb323f6..c412b281fb267c 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6313,8 +6313,6 @@ class Compiler bool fgComputeMissingBlockWeights(); bool fgReorderBlocks(bool useProfile); - void fgDoReversePostOrderLayout(); - void fgMoveColdBlocks(); PhaseStatus fgSearchImprovedLayout(); class ThreeOptLayout @@ -6347,9 +6345,6 @@ class Compiler bool Run(); }; - template - void fgMoveHotJumps(); - bool fgFuncletsAreCold(); PhaseStatus fgDetermineFirstColdBlock(); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 97e710e484b73a..3b37745e871242 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -4445,445 +4445,6 @@ bool Compiler::fgReorderBlocks(bool useProfile) #pragma warning(pop) #endif -//----------------------------------------------------------------------------- -// fgMoveHotJumps: Try to move jumps to fall into their successors, if the jump is sufficiently hot. -// -// Template parameters: -// hasEH - If true, method has EH regions, so check that we don't try to move blocks in different regions -// -template -void Compiler::fgMoveHotJumps() -{ -#ifdef DEBUG - if (verbose) - { - printf("*************** In fgMoveHotJumps()\n"); - - printf("\nInitial BasicBlocks"); - fgDispBasicBlocks(verboseTrees); - printf("\n"); - } -#endif // DEBUG - - assert(m_dfsTree != nullptr); - BitVecTraits traits(m_dfsTree->PostOrderTraits()); - BitVec visitedBlocks = BitVecOps::MakeEmpty(&traits); - - // If we have a funclet region, don't bother reordering anything in it. - // - BasicBlock* next; - for (BasicBlock* block = fgFirstBB; block != fgFirstFuncletBB; block = next) - { - next = block->Next(); - if (!m_dfsTree->Contains(block)) - { - continue; - } - - BitVecOps::AddElemD(&traits, visitedBlocks, block->bbPostorderNum); - - // Don't bother trying to move cold blocks - // - if (block->isBBWeightCold(this)) - { - continue; - } - - FlowEdge* targetEdge; - FlowEdge* unlikelyEdge; - - if (block->KindIs(BBJ_ALWAYS)) - { - targetEdge = block->GetTargetEdge(); - unlikelyEdge = nullptr; - } - else if (block->KindIs(BBJ_COND)) - { - // Consider conditional block's most likely branch for moving - // - if (block->GetTrueEdge()->getLikelihood() > 0.5) - { - targetEdge = block->GetTrueEdge(); - unlikelyEdge = block->GetFalseEdge(); - } - else - { - targetEdge = block->GetFalseEdge(); - unlikelyEdge = block->GetTrueEdge(); - } - - // If we aren't sure which successor is hotter, and we already fall into one of them, - // do nothing - if ((unlikelyEdge->getLikelihood() == 0.5) && block->NextIs(unlikelyEdge->getDestinationBlock())) - { - continue; - } - } - else - { - // Don't consider other block kinds - // - continue; - } - - BasicBlock* target = targetEdge->getDestinationBlock(); - bool isBackwardJump = BitVecOps::IsMember(&traits, visitedBlocks, target->bbPostorderNum); - assert(m_dfsTree->Contains(target)); - - if (isBackwardJump) - { - // We don't want to change the first block, so if block is a backward jump to the first block, - // don't try moving block before it. - // - if (target->IsFirst()) - { - continue; - } - - if (block->KindIs(BBJ_COND)) - { - // This could be a loop exit, so don't bother moving this block up. - // Instead, try moving the unlikely target up to create fallthrough. - // - targetEdge = unlikelyEdge; - target = targetEdge->getDestinationBlock(); - isBackwardJump = BitVecOps::IsMember(&traits, visitedBlocks, target->bbPostorderNum); - assert(m_dfsTree->Contains(target)); - - if (isBackwardJump) - { - continue; - } - } - // Check for single-block loop case - // - else if (block == target) - { - continue; - } - } - - // Check if block already falls into target - // - if (block->NextIs(target)) - { - continue; - } - - if (target->isBBWeightCold(this)) - { - // If target is block's most-likely successor, and block is not rarely-run, - // perhaps the profile data is misleading, and we need to run profile repair? - // - continue; - } - - if (hasEH) - { - // Don't move blocks in different EH regions - // - if (!BasicBlock::sameEHRegion(block, target)) - { - continue; - } - - if (isBackwardJump) - { - // block and target are in the same try/handler regions, and target is behind block, - // so block cannot possibly be the start of the region. - // - assert(!bbIsTryBeg(block) && !bbIsHandlerBeg(block)); - - // Don't change the entry block of an EH region - // - if (bbIsTryBeg(target) || bbIsHandlerBeg(target)) - { - continue; - } - } - else - { - // block and target are in the same try/handler regions, and block is behind target, - // so target cannot possibly be the start of the region. - // - assert(!bbIsTryBeg(target) && !bbIsHandlerBeg(target)); - } - } - - // If moving block will break up existing fallthrough behavior into target, make sure it's worth it - // - FlowEdge* const fallthroughEdge = fgGetPredForBlock(target, target->Prev()); - if ((fallthroughEdge != nullptr) && (fallthroughEdge->getLikelyWeight() >= targetEdge->getLikelyWeight())) - { - continue; - } - - if (isBackwardJump) - { - // Move block to before target - // - fgUnlinkBlock(block); - fgInsertBBbefore(target, block); - } - else if (hasEH && target->isBBCallFinallyPair()) - { - // target is a call-finally pair, so move the pair up to block - // - fgUnlinkRange(target, target->Next()); - fgMoveBlocksAfter(target, target->Next(), block); - next = target->Next(); - } - else - { - // Move target up to block - // - fgUnlinkBlock(target); - fgInsertBBafter(block, target); - next = target; - } - } -} - -//----------------------------------------------------------------------------- -// fgDoReversePostOrderLayout: Reorder blocks using a greedy RPO traversal, -// taking care to keep loop bodies compact. -// -void Compiler::fgDoReversePostOrderLayout() -{ -#ifdef DEBUG - if (verbose) - { - printf("*************** In fgDoReversePostOrderLayout()\n"); - - printf("\nInitial BasicBlocks"); - fgDispBasicBlocks(verboseTrees); - printf("\n"); - } -#endif // DEBUG - - // Compute DFS of all blocks in the method, using profile data to determine the order successors are visited in. - // - m_dfsTree = fgComputeDfs(); - - // If LSRA didn't create any new blocks, we can reuse its loop-aware RPO traversal, - // which is cached in Compiler::fgBBs. - // If the cache isn't available, we need to recompute the loop-aware RPO. - // - BasicBlock** rpoSequence = fgBBs; - - if (rpoSequence == nullptr) - { - rpoSequence = new (this, CMK_BasicBlock) BasicBlock*[m_dfsTree->GetPostOrderCount()]; - FlowGraphNaturalLoops* const loops = FlowGraphNaturalLoops::Find(m_dfsTree); - unsigned index = 0; - auto addToSequence = [rpoSequence, &index](BasicBlock* block) { - rpoSequence[index++] = block; - }; - - fgVisitBlocksInLoopAwareRPO(m_dfsTree, loops, addToSequence); - } - - // Fast path: We don't have any EH regions, so just reorder the blocks - // - if (compHndBBtabCount == 0) - { - for (unsigned i = 1; i < m_dfsTree->GetPostOrderCount(); i++) - { - BasicBlock* const block = rpoSequence[i - 1]; - BasicBlock* const blockToMove = rpoSequence[i]; - - if (!block->NextIs(blockToMove)) - { - fgUnlinkBlock(blockToMove); - fgInsertBBafter(block, blockToMove); - } - } - - fgMoveHotJumps(); - - return; - } - - // The RPO will break up call-finally pairs, so save them before re-ordering - // - struct CallFinallyPair - { - BasicBlock* callFinally; - BasicBlock* callFinallyRet; - - // Constructor provided so we can call ArrayStack::Emplace - // - CallFinallyPair(BasicBlock* first, BasicBlock* second) - : callFinally(first) - , callFinallyRet(second) - { - } - }; - - ArrayStack callFinallyPairs(getAllocator()); - - for (EHblkDsc* const HBtab : EHClauses(this)) - { - if (HBtab->HasFinallyHandler()) - { - for (BasicBlock* const pred : HBtab->ebdHndBeg->PredBlocks()) - { - assert(pred->KindIs(BBJ_CALLFINALLY)); - if (pred->isBBCallFinallyPair()) - { - callFinallyPairs.Emplace(pred, pred->Next()); - } - } - } - } - - // Reorder blocks - // - for (unsigned i = 1; i < m_dfsTree->GetPostOrderCount(); i++) - { - BasicBlock* const block = rpoSequence[i - 1]; - BasicBlock* const blockToMove = rpoSequence[i]; - - // Only reorder blocks within the same EH region -- we don't want to make them non-contiguous - // - if (BasicBlock::sameEHRegion(block, blockToMove)) - { - // Don't reorder EH regions with filter handlers -- we want the filter to come first - // - if (block->hasHndIndex() && ehGetDsc(block->getHndIndex())->HasFilter()) - { - continue; - } - - if (!block->NextIs(blockToMove)) - { - fgUnlinkBlock(blockToMove); - fgInsertBBafter(block, blockToMove); - } - } - } - - // Fix up call-finally pairs - // - for (int i = 0; i < callFinallyPairs.Height(); i++) - { - const CallFinallyPair& pair = callFinallyPairs.BottomRef(i); - fgUnlinkBlock(pair.callFinallyRet); - fgInsertBBafter(pair.callFinally, pair.callFinallyRet); - } - - fgMoveHotJumps(); -} - -//----------------------------------------------------------------------------- -// fgMoveColdBlocks: Move rarely-run blocks to the end of their respective regions. -// -// Notes: -// Exception handlers are assumed to be cold, so we won't move blocks within them. -// On platforms that don't use funclets, we should use Compiler::fgRelocateEHRegions to move cold handlers. -// Note that Compiler::fgMoveColdBlocks will break up EH regions to facilitate intermediate transformations. -// To reestablish contiguity of EH regions, callers need to follow this with Compiler::fgRebuildEHRegions. -// -void Compiler::fgMoveColdBlocks() -{ -#ifdef DEBUG - if (verbose) - { - printf("*************** In fgMoveColdBlocks()\n"); - - printf("\nInitial BasicBlocks"); - fgDispBasicBlocks(verboseTrees); - printf("\n"); - } -#endif // DEBUG - - auto moveBlock = [this](BasicBlock* block, BasicBlock* insertionPoint) { - assert(block != insertionPoint); - // Don't move handler blocks. - // Also, leave try entries behind as a breadcrumb for where to reinsert try blocks. - if (!bbIsTryBeg(block) && !block->hasHndIndex()) - { - if (block->isBBCallFinallyPair()) - { - BasicBlock* const callFinallyRet = block->Next(); - if (callFinallyRet != insertionPoint) - { - fgUnlinkRange(block, callFinallyRet); - fgMoveBlocksAfter(block, callFinallyRet, insertionPoint); - } - } - else - { - fgUnlinkBlock(block); - fgInsertBBafter(insertionPoint, block); - } - } - }; - - BasicBlock* lastMainBB = fgLastBBInMainFunction(); - if (lastMainBB->IsFirst()) - { - return; - } - - // Search the main method body for rarely-run blocks to move - // - for (BasicBlock *block = lastMainBB->Prev(), *prev; !block->IsFirst(); block = prev) - { - prev = block->Prev(); - - // We only want to move cold blocks. - // Also, don't move block if it is the end of a call-finally pair, - // as we want to keep these pairs contiguous - // (if we encounter the beginning of a pair, we'll move the whole pair). - // - if (!block->isBBWeightCold(this) || block->isBBCallFinallyPairTail()) - { - continue; - } - - moveBlock(block, lastMainBB); - } - - // We have moved all cold main blocks before lastMainBB to after lastMainBB. - // If lastMainBB itself is cold, move it to the end of the method to restore its relative ordering. - // But first, we can't move just the tail of a call-finally pair, - // so point lastMainBB to the pair's head, if necessary. - // - if (lastMainBB->isBBCallFinallyPairTail()) - { - lastMainBB = lastMainBB->Prev(); - } - - BasicBlock* lastHotBB = nullptr; - if (lastMainBB->isBBWeightCold(this)) - { - // lastMainBB is cold, so the block behind it (if there is one) is the last hot block - // - lastHotBB = lastMainBB->Prev(); - - // Move lastMainBB - // - BasicBlock* const newLastMainBB = fgLastBBInMainFunction(); - if (lastMainBB != newLastMainBB) - { - moveBlock(lastMainBB, newLastMainBB); - } - } - else - { - // lastMainBB isn't cold, so it (or its call-finally pair tail) the last hot block - // - lastHotBB = lastMainBB->isBBCallFinallyPair() ? lastMainBB->Next() : lastMainBB; - } - - // Save the beginning of the cold section for later. - // If lastHotBB is null, there isn't a hot section, - // so there's no point in differentiating between sections for layout purposes. - // - fgFirstColdBlock = (lastHotBB == nullptr) ? nullptr : lastHotBB->Next(); -} - //----------------------------------------------------------------------------- // Compiler::ThreeOptLayout::EdgeCmp: Comparator for the 'cutPoints' priority queue. // If 'left' has a bigger edge weight than 'right', 3-opt will consider it first. diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index c764a7259fba38..4d4460b9dea947 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1309,18 +1309,6 @@ PhaseStatus LinearScan::doLinearScan() compiler->compLSRADone = true; - // If edge resolution didn't create new blocks, - // cache the block sequence so it can be used as an initial layout during block reordering. - if (compiler->fgBBcount == bbSeqCount) - { - compiler->fgBBs = blockSequence; - } - else - { - assert(compiler->fgBBcount > bbSeqCount); - compiler->fgBBs = nullptr; - } - return PhaseStatus::MODIFIED_EVERYTHING; } From 289134afdd96ab7b54d4ec0eb85e592106c7e913 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 30 Jan 2025 11:28:04 -0500 Subject: [PATCH 06/20] Relax EH invariants; better stress mode --- src/coreclr/jit/fgopt.cpp | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 3b37745e871242..6b8b5608159c6b 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -4691,15 +4691,7 @@ void Compiler::ThreeOptLayout::ConsiderEdge(FlowEdge* edge) BasicBlock* const dstBlk = edge->getDestinationBlock(); // Ignore cross-region branches - if (!BasicBlock::sameTryRegion(srcBlk, dstBlk)) - { - return; - } - - // Don't waste time reordering within handler regions. - // Note that if a finally region is sufficiently hot, - // we should have cloned it into the main method body already. - if (srcBlk->hasHndIndex() || dstBlk->hasHndIndex()) + if (!BasicBlock::sameEHRegion(srcBlk, dstBlk)) { return; } @@ -4829,6 +4821,12 @@ bool Compiler::ThreeOptLayout::Run() continue; } + // Don't reorder filter regions. + if (block->hasHndIndex() && compiler->ehGetDsc(block->getHndIndex())->HasFilter()) + { + continue; + } + // Don't break up call-finally pairs. if (block->isBBCallFinallyPair() || next->isBBCallFinallyPairTail()) { @@ -5123,9 +5121,20 @@ PhaseStatus Compiler::fgSearchImprovedLayout() } }; - fgVisitBlocksInLoopAwareRPO(m_dfsTree, m_loops, addToSequence); - bool modified = false; + // Stress 3-opt by giving it the post-order traversal as its initial layout, + // but keep the method entry block at the beginning. + if (compStressCompile(STRESS_THREE_OPT_LAYOUT, 10)) + { + numHotBlocks = m_dfsTree->GetPostOrderCount(); + memcpy(initialLayout, m_dfsTree->GetPostOrder(), sizeof(BasicBlock*) * numHotBlocks); + std::swap(initialLayout[0], initialLayout[numHotBlocks - 1]); + } + else + { + fgVisitBlocksInLoopAwareRPO(m_dfsTree, m_loops, addToSequence); + } + bool modified = false; if (numHotBlocks > 0) { ThreeOptLayout layoutRunner(this, initialLayout, numHotBlocks); From 94d4f84db01ecc995217a0df1632519e4a19f49b Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 30 Jan 2025 12:42:48 -0500 Subject: [PATCH 07/20] Place cold blocks in RPO --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fgopt.cpp | 34 ++++++++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index c412b281fb267c..3c295832b91937 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6341,7 +6341,7 @@ class Compiler void RunThreeOptPass(BasicBlock* startBlock, BasicBlock* endBlock); public: - ThreeOptLayout(Compiler* comp, BasicBlock** hotBlocks, unsigned numHotBlocks); + ThreeOptLayout(Compiler* comp, BasicBlock** initialLayout, unsigned numHotBlocks); bool Run(); }; diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 6b8b5608159c6b..e5bd149f8c29ea 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -4485,17 +4485,17 @@ bool Compiler::fgReorderBlocks(bool useProfile) // // Parameters: // comp - The Compiler instance -// hotBlocks - An array of the blocks worth reordering -// numHotBlocks - The number of blocks in 'hotBlocks' +// initialLayout - An array of the blocks to be reordered +// numHotBlocks - The number of hot blocks at the beginning of 'initialLayout' // // Notes: // To save an allocation, we will reuse the DFS tree's underlying array for 'tempOrder'. // This means we will trash the DFS tree. // -Compiler::ThreeOptLayout::ThreeOptLayout(Compiler* comp, BasicBlock** hotBlocks, unsigned numHotBlocks) +Compiler::ThreeOptLayout::ThreeOptLayout(Compiler* comp, BasicBlock** initialLayout, unsigned numHotBlocks) : compiler(comp) , cutPoints(comp->getAllocator(CMK_FlowEdge), &ThreeOptLayout::EdgeCmp) - , blockOrder(hotBlocks) + , blockOrder(initialLayout) , tempOrder(comp->m_dfsTree->GetPostOrder()) , numCandidateBlocks(numHotBlocks) { @@ -4785,7 +4785,7 @@ bool Compiler::ThreeOptLayout::Run() { assert(numCandidateBlocks > 0); - // Initialize ordinals, and find EH region ends + // Initialize ordinals for the hot blocks for (unsigned i = 0; i < numCandidateBlocks; i++) { BasicBlock* const block = blockOrder[i]; @@ -4795,10 +4795,16 @@ bool Compiler::ThreeOptLayout::Run() block->bbPostorderNum = i; } + // Copy cold block layout over to 'tempOrder' so we don't lose it + const unsigned numColdBlocks = compiler->m_dfsTree->GetPostOrderCount() - numCandidateBlocks; + assert((numCandidateBlocks + numColdBlocks) == compiler->m_dfsTree->GetPostOrderCount()); + memcpy(tempOrder + numCandidateBlocks, blockOrder + numCandidateBlocks, sizeof(BasicBlock*) * numColdBlocks); + RunThreeOptPass(blockOrder[0], blockOrder[numCandidateBlocks - 1]); + // Reorder the block list bool modified = false; - for (unsigned i = 1; i < numCandidateBlocks; i++) + for (unsigned i = 1; i < compiler->m_dfsTree->GetPostOrderCount(); i++) { BasicBlock* const block = blockOrder[i - 1]; BasicBlock* const next = blockOrder[i]; @@ -5111,11 +5117,11 @@ PhaseStatus Compiler::fgSearchImprovedLayout() m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); BasicBlock** const initialLayout = new (this, CMK_BasicBlock) BasicBlock*[m_dfsTree->GetPostOrderCount()]; - // When walking the RPO-based layout, only add hot blocks to the initial layout. + // When walking the RPO-based layout, compact the hot blocks, and remember the end of the hot section. // We don't want to waste time running 3-opt on cold blocks. unsigned numHotBlocks = 0; auto addToSequence = [this, initialLayout, &numHotBlocks](BasicBlock* block) { - if (!block->isBBWeightCold(this)) + if (!block->isBBWeightCold(this) || block->IsFirst()) { initialLayout[numHotBlocks++] = block; } @@ -5132,6 +5138,18 @@ PhaseStatus Compiler::fgSearchImprovedLayout() else { fgVisitBlocksInLoopAwareRPO(m_dfsTree, m_loops, addToSequence); + + // Add the cold blocks to the end of the initial layout in RPO. + unsigned nextColdIndex = numHotBlocks; + for (unsigned i = m_dfsTree->GetPostOrderCount(); nextColdIndex < m_dfsTree->GetPostOrderCount(); i--) + { + assert(i != 0); + BasicBlock* const block = m_dfsTree->GetPostOrder(i - 1); + if (block->isBBWeightCold(this) && !block->IsFirst()) + { + initialLayout[nextColdIndex++] = block; + } + } } bool modified = false; From a2774e0936ec0cac2fb3b8f06c4a0788e9f22e2f Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Wed, 5 Feb 2025 16:32:25 -0500 Subject: [PATCH 08/20] Clean up 3-opt driver a bit --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fgopt.cpp | 38 +++++++++++++++----------------------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6a7298cf3d2373..6e40f8e4709207 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6368,7 +6368,7 @@ class Compiler void AddNonFallthroughPreds(unsigned blockPos); bool RunGreedyThreeOptPass(unsigned startPos, unsigned endPos); - bool RunThreeOptPass(); + bool RunThreeOpt(); public: ThreeOptLayout(Compiler* comp); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 8c8376694281f6..4c60646e5c729d 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5244,13 +5244,6 @@ void Compiler::ThreeOptLayout::Run() assert(finalBlock != nullptr); assert(!finalBlock->isBBCallFinallyPair()); - // For methods with fewer than three candidate blocks, we cannot partition anything - if (finalBlock->IsFirst() || finalBlock->Prev()->IsFirst()) - { - JITDUMP("Not enough blocks to partition anything. Skipping 3-opt.\n"); - return; - } - // Get an upper bound on the number of hot blocks without walking the whole block list. // We will only consider blocks reachable via normal flow. const unsigned numBlocksUpperBound = compiler->m_dfsTree->GetPostOrderCount(); @@ -5267,13 +5260,20 @@ void Compiler::ThreeOptLayout::Run() } assert(numCandidateBlocks < numBlocksUpperBound); - blockOrder[numCandidateBlocks] = tempOrder[numCandidateBlocks] = block; + blockOrder[numCandidateBlocks] = block; // Repurpose 'bbPostorderNum' for the block's ordinal block->bbPostorderNum = numCandidateBlocks++; } - const bool modified = RunThreeOptPass(); + // For methods with fewer than three candidate blocks, we cannot partition anything + if (numCandidateBlocks < 3) + { + JITDUMP("Not enough blocks to partition anything. Skipping reordering.\n"); + return; + } + + const bool modified = RunThreeOpt(); if (modified) { @@ -5502,31 +5502,23 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned } //----------------------------------------------------------------------------- -// Compiler::ThreeOptLayout::RunThreeOptPass: Runs 3-opt on the candidate span of blocks. +// Compiler::ThreeOptLayout::RunThreeOpt: Runs 3-opt on the candidate span of blocks. // // Returns: // True if we reordered anything, false otherwise // -bool Compiler::ThreeOptLayout::RunThreeOptPass() +bool Compiler::ThreeOptLayout::RunThreeOpt() { - const unsigned startPos = 0; - const unsigned endPos = numCandidateBlocks - 1; - const unsigned numBlocks = (endPos - startPos + 1); - assert(startPos <= endPos); - - if (numBlocks < 3) - { - JITDUMP("Not enough blocks to partition anything. Skipping reordering.\n"); - return false; - } + // We better have enough blocks to create partitions + assert(numCandidateBlocks > 2); + const unsigned startPos = 0; + const unsigned endPos = numCandidateBlocks - 1; JITDUMP("Initial layout cost: %f\n", GetLayoutCost(startPos, endPos)); const bool modified = RunGreedyThreeOptPass(startPos, endPos); - // Write back to 'tempOrder' so changes to this region aren't lost next time we swap 'tempOrder' and 'blockOrder' if (modified) { - memcpy(tempOrder + startPos, blockOrder + startPos, sizeof(BasicBlock*) * numBlocks); JITDUMP("Final layout cost: %f\n", GetLayoutCost(startPos, endPos)); } else From 1404f80af27c848373f6e22bcfbf91211fd4f298 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 6 Feb 2025 17:19:28 -0500 Subject: [PATCH 09/20] Cold code motion working --- src/coreclr/jit/compiler.hpp | 17 +++- src/coreclr/jit/fgopt.cpp | 159 +++++++++++++++++++++++------------ src/coreclr/jit/lsra.cpp | 11 +-- 3 files changed, 120 insertions(+), 67 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index ccb2584ac92550..b4c5f02ece0499 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -5054,10 +5054,21 @@ void Compiler::fgVisitBlocksInLoopAwareRPO(FlowGraphDfsTree* dfsTree, FlowGraphN LoopAwareVisitor visitor(dfsTree, loops, func); - for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--) + if (loops->NumLoops() == 0) { - BasicBlock* const block = dfsTree->GetPostOrder(i - 1); - visitor.VisitBlock(block); + for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--) + { + BasicBlock* const block = dfsTree->GetPostOrder(i - 1); + func(block); + } + } + else + { + for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--) + { + BasicBlock* const block = dfsTree->GetPostOrder(i - 1); + visitor.VisitBlock(block); + } } } diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index d9bb3e05c79bb0..b8a9b38670c29a 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -4800,86 +4800,129 @@ void Compiler::ThreeOptLayout::AddNonFallthroughPreds(unsigned blockPos) // bool Compiler::ThreeOptLayout::Run() { - // For methods with fewer than three candidate blocks, we cannot partition anything - if (numCandidateBlocks < 3) - { - JITDUMP("Not enough blocks to partition anything. Skipping reordering.\n"); - return false; - } + assert(numCandidateBlocks > 0); // Initialize ordinals for the hot blocks for (unsigned i = 0; i < numCandidateBlocks; i++) { BasicBlock* const block = blockOrder[i]; - tempOrder[i] = block; + assert(!block->isBBWeightCold(compiler)); + assert(!block->hasHndIndex()); // Repurpose 'bbPostorderNum' for the block's ordinal block->bbPostorderNum = i; } - // Copy cold block layout over to 'tempOrder' so we don't lose it - const unsigned numColdBlocks = compiler->m_dfsTree->GetPostOrderCount() - numCandidateBlocks; - assert((numCandidateBlocks + numColdBlocks) == compiler->m_dfsTree->GetPostOrderCount()); - memcpy(tempOrder + numCandidateBlocks, blockOrder + numCandidateBlocks, sizeof(BasicBlock*) * numColdBlocks); - RunThreeOpt(); + BasicBlock** const lastHotBlocks = new (compiler, CMK_BasicBlock) BasicBlock*[compiler->compHndBBtabCount + 1]; + lastHotBlocks[0] = compiler->fgFirstBB; + + for (EHblkDsc* const HBtab : EHClauses(compiler)) + { + lastHotBlocks[HBtab->ebdTryBeg->bbTryIndex] = HBtab->ebdTryBeg; + } + // Reorder the block list bool modified = false; - for (unsigned i = 1; i < compiler->m_dfsTree->GetPostOrderCount(); i++) + for (unsigned i = 1; i < numCandidateBlocks; i++) { BasicBlock* const block = blockOrder[i - 1]; BasicBlock* const next = blockOrder[i]; - assert(block != next); - - if (block->NextIs(next)) - { - continue; - } + lastHotBlocks[block->bbTryIndex] = block; // Only reorder within EH regions to maintain contiguity. - if (!BasicBlock::sameEHRegion(block, next)) + if (compiler->bbIsTryBeg(next)) { continue; } - // Don't move the entry of an EH region. - if (compiler->bbIsTryBeg(next) || compiler->bbIsHandlerBeg(next)) - { - continue; - } + BasicBlock* insertionPoint = lastHotBlocks[next->bbTryIndex]; + assert((block == insertionPoint) || !BasicBlock::sameTryRegion(block, next)); - // Don't reorder filter regions. - if (block->hasHndIndex() && compiler->ehGetDsc(block->getHndIndex())->HasFilter()) + if (insertionPoint->NextIs(next)) { continue; } - // Don't break up call-finally pairs. - if (block->isBBCallFinallyPair() || next->isBBCallFinallyPairTail()) + if (insertionPoint->isBBCallFinallyPair()) { - continue; + insertionPoint = insertionPoint->Next(); + assert(insertionPoint != next); } // Move call-finally pairs in tandem. if (next->isBBCallFinallyPair()) { BasicBlock* const callFinallyTail = next->Next(); - if (callFinallyTail != block) + if (callFinallyTail != insertionPoint) { compiler->fgUnlinkRange(next, callFinallyTail); - compiler->fgMoveBlocksAfter(next, callFinallyTail, block); + compiler->fgMoveBlocksAfter(next, callFinallyTail, insertionPoint); + modified = true; + } + } + else if (next->isBBCallFinallyPairTail()) + { + BasicBlock* const callFinally = next->Prev(); + if (callFinally != insertionPoint) + { + compiler->fgUnlinkRange(callFinally, next); + compiler->fgMoveBlocksAfter(callFinally, next, insertionPoint); modified = true; } } else { compiler->fgUnlinkBlock(next); - compiler->fgInsertBBafter(block, next); + compiler->fgInsertBBafter(insertionPoint, next); modified = true; } } + lastHotBlocks[blockOrder[numCandidateBlocks - 1]->bbTryIndex] = blockOrder[numCandidateBlocks - 1]; + + if (compiler->compHndBBtabCount > 0) + { + if (modified) + { + compiler->fgFindEHRegionEnds(); + } + + // bool findEHRegions = false; + // for (EHblkDsc* const HBtab : EHClauses(compiler)) + // { + // BasicBlock* const tryBeg = HBtab->ebdTryBeg; + // if ((tryBeg->bbPostorderNum >= numCandidateBlocks) || (blockOrder[tryBeg->bbPostorderNum] != tryBeg) || tryBeg->IsFirst()) + // { + // continue; + // } + + // BasicBlock* const tryLast = HBtab->ebdTryLast; + // BasicBlock* insertionPoint = blockOrder[tryBeg->bbPostorderNum - 1]; + // unsigned parentIndex = insertionPoint->hasTryIndex() ? insertionPoint->getTryIndex() : EHblkDsc::NO_ENCLOSING_INDEX; + + // // Can we move this try to after 'insertionPoint' without breaking EH nesting invariants? + // if (parentIndex != HBtab->ebdEnclosingTryIndex) + // { + // parentIndex = (HBtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) ? (HBtab->ebdEnclosingTryIndex + 1) : 0; + // insertionPoint = lastHotBlocks[parentIndex]; + // } + + // compiler->fgUnlinkRange(tryBeg, tryLast); + // compiler->fgMoveBlocksAfter(tryBeg, tryLast, insertionPoint); + // modified = true; + + // // If this try region has a parent region, we will need to find EH region ends again. + // findEHRegions = (HBtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX); + // } + + // if (findEHRegions) + // { + // compiler->fgFindEHRegionEnds(); + // } + } + return modified; } @@ -5084,8 +5127,13 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned // void Compiler::ThreeOptLayout::RunThreeOpt() { - // We better have enough blocks to create partitions - assert(numCandidateBlocks > 2); + // For methods with fewer than three candidate blocks, we cannot partition anything + if (numCandidateBlocks < 3) + { + JITDUMP("Not enough blocks to partition anything. Skipping reordering.\n"); + return; + } + const unsigned startPos = 0; const unsigned endPos = numCandidateBlocks - 1; @@ -5125,16 +5173,24 @@ PhaseStatus Compiler::fgSearchImprovedLayout() } #endif // DEBUG - // Before running 3-opt, compute a loop-aware RPO to get a sensible starting layout. - m_dfsTree = fgComputeDfs(); - m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + // Before running 3-opt, compute a loop-aware RPO (if not already available) to get a sensible starting layout. + if (m_dfsTree == nullptr) + { + m_dfsTree = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + } + else + { + assert(m_loops != nullptr); + } + BasicBlock** const initialLayout = new (this, CMK_BasicBlock) BasicBlock*[m_dfsTree->GetPostOrderCount()]; // When walking the RPO-based layout, compact the hot blocks, and remember the end of the hot section. // We don't want to waste time running 3-opt on cold blocks. unsigned numHotBlocks = 0; auto addToSequence = [this, initialLayout, &numHotBlocks](BasicBlock* block) { - if (!block->isBBWeightCold(this) || block->IsFirst()) + if (!block->isBBWeightCold(this) && !block->hasHndIndex()) { initialLayout[numHotBlocks++] = block; } @@ -5151,26 +5207,17 @@ PhaseStatus Compiler::fgSearchImprovedLayout() else { fgVisitBlocksInLoopAwareRPO(m_dfsTree, m_loops, addToSequence); - - // Add the cold blocks to the end of the initial layout in RPO. - unsigned nextColdIndex = numHotBlocks; - for (unsigned i = m_dfsTree->GetPostOrderCount(); nextColdIndex < m_dfsTree->GetPostOrderCount(); i--) - { - assert(i != 0); - BasicBlock* const block = m_dfsTree->GetPostOrder(i - 1); - if (block->isBBWeightCold(this) && !block->IsFirst()) - { - initialLayout[nextColdIndex++] = block; - } - } } - ThreeOptLayout layoutRunner(this, initialLayout, numHotBlocks); - const bool modified = layoutRunner.Run(); - - if (modified && (compHndBBtabCount > 0)) + bool modified = false; + if (numHotBlocks > 0) + { + ThreeOptLayout layoutRunner(this, initialLayout, numHotBlocks); + modified = layoutRunner.Run(); + } + else { - fgFindEHRegionEnds(); + JITDUMP("No hot blocks found. Skipping reordering.\n"); } fgInvalidateDfsTree(); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 0f7397012f2d14..bb198a6d99890d 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -952,7 +952,7 @@ void LinearScan::setBlockSequence() FlowGraphDfsTree* const dfsTree = compiler->m_dfsTree; blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount]; - if (compiler->opts.OptimizationEnabled() && dfsTree->HasCycle()) + if (compiler->opts.OptimizationEnabled()) { // Ensure loop bodies are compact in the visitation order. compiler->m_loops = FlowGraphNaturalLoops::Find(dfsTree); @@ -1313,15 +1313,10 @@ PhaseStatus LinearScan::doLinearScan() compiler->compLSRADone = true; // If edge resolution didn't create new blocks, - // cache the block sequence so it can be used as an initial layout during block reordering. - if (compiler->fgBBcount == bbSeqCount) - { - compiler->fgBBs = blockSequence; - } - else + // we can reuse the current flowgraph annotations during block layout. + if (compiler->fgBBcount != bbSeqCount) { assert(compiler->fgBBcount > bbSeqCount); - compiler->fgBBs = nullptr; compiler->fgInvalidateDfsTree(); } From 26359c46b6a70130d9f2f5e22fa71ebf4c2f5e0b Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 6 Feb 2025 19:00:27 -0500 Subject: [PATCH 10/20] Move try regions --- src/coreclr/jit/compiler.h | 2 + src/coreclr/jit/fgopt.cpp | 302 +++++++++++++++++++++++-------------- 2 files changed, 191 insertions(+), 113 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 82e3d4bcc3f6dc..f966460a6f444e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6377,6 +6377,8 @@ class Compiler void RunThreeOpt(); + bool ReorderBlockList(); + public: ThreeOptLayout(Compiler* comp, BasicBlock** initialLayout, unsigned numHotBlocks); bool Run(); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index b8a9b38670c29a..87e11a7e46660f 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -4806,7 +4806,8 @@ bool Compiler::ThreeOptLayout::Run() for (unsigned i = 0; i < numCandidateBlocks; i++) { BasicBlock* const block = blockOrder[i]; - assert(!block->isBBWeightCold(compiler)); + + // 3-opt does not expect to be given handler blocks to reorder assert(!block->hasHndIndex()); // Repurpose 'bbPostorderNum' for the block's ordinal @@ -4815,115 +4816,7 @@ bool Compiler::ThreeOptLayout::Run() RunThreeOpt(); - BasicBlock** const lastHotBlocks = new (compiler, CMK_BasicBlock) BasicBlock*[compiler->compHndBBtabCount + 1]; - lastHotBlocks[0] = compiler->fgFirstBB; - - for (EHblkDsc* const HBtab : EHClauses(compiler)) - { - lastHotBlocks[HBtab->ebdTryBeg->bbTryIndex] = HBtab->ebdTryBeg; - } - - // Reorder the block list - bool modified = false; - for (unsigned i = 1; i < numCandidateBlocks; i++) - { - BasicBlock* const block = blockOrder[i - 1]; - BasicBlock* const next = blockOrder[i]; - lastHotBlocks[block->bbTryIndex] = block; - - // Only reorder within EH regions to maintain contiguity. - if (compiler->bbIsTryBeg(next)) - { - continue; - } - - BasicBlock* insertionPoint = lastHotBlocks[next->bbTryIndex]; - assert((block == insertionPoint) || !BasicBlock::sameTryRegion(block, next)); - - if (insertionPoint->NextIs(next)) - { - continue; - } - - if (insertionPoint->isBBCallFinallyPair()) - { - insertionPoint = insertionPoint->Next(); - assert(insertionPoint != next); - } - - // Move call-finally pairs in tandem. - if (next->isBBCallFinallyPair()) - { - BasicBlock* const callFinallyTail = next->Next(); - if (callFinallyTail != insertionPoint) - { - compiler->fgUnlinkRange(next, callFinallyTail); - compiler->fgMoveBlocksAfter(next, callFinallyTail, insertionPoint); - modified = true; - } - } - else if (next->isBBCallFinallyPairTail()) - { - BasicBlock* const callFinally = next->Prev(); - if (callFinally != insertionPoint) - { - compiler->fgUnlinkRange(callFinally, next); - compiler->fgMoveBlocksAfter(callFinally, next, insertionPoint); - modified = true; - } - } - else - { - compiler->fgUnlinkBlock(next); - compiler->fgInsertBBafter(insertionPoint, next); - modified = true; - } - } - - lastHotBlocks[blockOrder[numCandidateBlocks - 1]->bbTryIndex] = blockOrder[numCandidateBlocks - 1]; - - if (compiler->compHndBBtabCount > 0) - { - if (modified) - { - compiler->fgFindEHRegionEnds(); - } - - // bool findEHRegions = false; - // for (EHblkDsc* const HBtab : EHClauses(compiler)) - // { - // BasicBlock* const tryBeg = HBtab->ebdTryBeg; - // if ((tryBeg->bbPostorderNum >= numCandidateBlocks) || (blockOrder[tryBeg->bbPostorderNum] != tryBeg) || tryBeg->IsFirst()) - // { - // continue; - // } - - // BasicBlock* const tryLast = HBtab->ebdTryLast; - // BasicBlock* insertionPoint = blockOrder[tryBeg->bbPostorderNum - 1]; - // unsigned parentIndex = insertionPoint->hasTryIndex() ? insertionPoint->getTryIndex() : EHblkDsc::NO_ENCLOSING_INDEX; - - // // Can we move this try to after 'insertionPoint' without breaking EH nesting invariants? - // if (parentIndex != HBtab->ebdEnclosingTryIndex) - // { - // parentIndex = (HBtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) ? (HBtab->ebdEnclosingTryIndex + 1) : 0; - // insertionPoint = lastHotBlocks[parentIndex]; - // } - - // compiler->fgUnlinkRange(tryBeg, tryLast); - // compiler->fgMoveBlocksAfter(tryBeg, tryLast, insertionPoint); - // modified = true; - - // // If this try region has a parent region, we will need to find EH region ends again. - // findEHRegions = (HBtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX); - // } - - // if (findEHRegions) - // { - // compiler->fgFindEHRegionEnds(); - // } - } - - return modified; + return ReorderBlockList(); } //----------------------------------------------------------------------------- @@ -5150,6 +5043,180 @@ void Compiler::ThreeOptLayout::RunThreeOpt() } } +//----------------------------------------------------------------------------- +// Compiler::ThreeOptLayout::ReorderBlockList: Reorders blocks within their regions +// using the order 3-opt came up with. +// If the method has try regions, this will also move them to try to create fallthrough into their entries. +// +// Returns: +// True if any blocks were moved +// +bool Compiler::ThreeOptLayout::ReorderBlockList() +{ + // As we reorder blocks, remember the last candidate block we found in each region. + // In case we cannot place two blocks next to each other because they are in different regions, + // we will instead place the latter block after the last one we saw in its region. + // This ensures cold blocks sink to the end of their respective regions. + // This will also push nested regions further down the method, but we will move them later, anyway. + BasicBlock** const lastHotBlocks = new (compiler, CMK_BasicBlock) BasicBlock* [compiler->compHndBBtabCount + 1] {}; + lastHotBlocks[0] = compiler->fgFirstBB; + + for (EHblkDsc* const HBtab : EHClauses(compiler)) + { + lastHotBlocks[HBtab->ebdTryBeg->bbTryIndex] = HBtab->ebdTryBeg; + } + + // Reorder the block list + bool modified = false; + for (unsigned i = 1; i < numCandidateBlocks; i++) + { + BasicBlock* const block = blockOrder[i - 1]; + BasicBlock* const next = blockOrder[i]; + lastHotBlocks[block->bbTryIndex] = block; + + // Don't move try region entries yet. + // We will move entire try regions after. + if (compiler->bbIsTryBeg(next)) + { + continue; + } + + // If 'block' and 'next' are in the same region, we should have no problem creating fallthrough. + // If they aren't, then we will pick the last hot block we saw in the same region as 'next' to insert after. + BasicBlock* insertionPoint = lastHotBlocks[next->bbTryIndex]; + assert(insertionPoint != nullptr); + assert((block == insertionPoint) || !BasicBlock::sameTryRegion(block, next)); + + // Nothing to do if we already fall through. + if (insertionPoint->NextIs(next)) + { + continue; + } + + // Don't break up call-finally pairs. + if (insertionPoint->isBBCallFinallyPair()) + { + insertionPoint = insertionPoint->Next(); + assert(insertionPoint != next); + } + + // Move call-finally pairs in tandem. + if (next->isBBCallFinallyPair()) + { + BasicBlock* const callFinallyTail = next->Next(); + if (callFinallyTail != insertionPoint) + { + compiler->fgUnlinkRange(next, callFinallyTail); + compiler->fgMoveBlocksAfter(next, callFinallyTail, insertionPoint); + modified = true; + } + } + else if (next->isBBCallFinallyPairTail()) + { + BasicBlock* const callFinally = next->Prev(); + if (callFinally != insertionPoint) + { + compiler->fgUnlinkRange(callFinally, next); + compiler->fgMoveBlocksAfter(callFinally, next, insertionPoint); + modified = true; + } + } + else + { + compiler->fgUnlinkBlock(next); + compiler->fgInsertBBafter(insertionPoint, next); + modified = true; + } + } + + lastHotBlocks[blockOrder[numCandidateBlocks - 1]->bbTryIndex] = blockOrder[numCandidateBlocks - 1]; + + if (compiler->compHndBBtabCount > 0) + { + // If we reordered within any try regions, make sure the EH table is up-to-date. + if (modified) + { + compiler->fgFindEHRegionEnds(); + } + + // We only ordered blocks within regions above. + // Now, move entire try regions up to their ideal predecessors, if possible. + for (EHblkDsc* const HBtab : EHClauses(compiler)) + { + // If this try region isn't in the candidate span of blocks, don't consider it. + // Also, if this try region's entry is also the method entry, don't move it. + BasicBlock* const tryBeg = HBtab->ebdTryBeg; + if ((tryBeg->bbPostorderNum >= numCandidateBlocks) || (blockOrder[tryBeg->bbPostorderNum] != tryBeg) || + tryBeg->IsFirst()) + { + continue; + } + + // We will try using 3-opt's chosen predecessor for the try region. + BasicBlock* const tryBegPrev = tryBeg->Prev(); + BasicBlock* const tryLast = HBtab->ebdTryLast; + BasicBlock* insertionPoint = blockOrder[tryBeg->bbPostorderNum - 1]; + unsigned parentIndex = + insertionPoint->hasTryIndex() ? insertionPoint->getTryIndex() : EHblkDsc::NO_ENCLOSING_INDEX; + + // Can we move this try to after 'insertionPoint' without breaking EH nesting invariants? + if (parentIndex != HBtab->ebdEnclosingTryIndex) + { + // We cannot. Instead, get the last hot block in the parent region. + parentIndex = (HBtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) + ? (HBtab->ebdEnclosingTryIndex + 1) + : 0; + insertionPoint = lastHotBlocks[parentIndex]; + + // If the parent of this try region is the same as it, it won't have an entry in 'lastHotBlocks'. + // Tolerate this. + if (insertionPoint == nullptr) + { + continue; + } + } + + // Don't break up call-finally pairs. + if (insertionPoint->isBBCallFinallyPair()) + { + insertionPoint = insertionPoint->Next(); + } + + // Nothing to do if we already fall through. + if (insertionPoint->NextIs(tryBeg)) + { + continue; + } + + compiler->fgUnlinkRange(tryBeg, tryLast); + compiler->fgMoveBlocksAfter(tryBeg, tryLast, insertionPoint); + modified = true; + + // Update the parent regions' end blocks. + for (unsigned tryIndex = compiler->ehGetEnclosingTryIndex(tryBeg->getTryIndex()); + tryIndex != EHblkDsc::NO_ENCLOSING_INDEX; tryIndex = compiler->ehGetEnclosingTryIndex(tryIndex)) + { + EHblkDsc* const parentTab = compiler->ehGetDsc(tryIndex); + if (parentTab->ebdTryLast == tryLast) + { + // Tolerate parent regions that are identical to the child region. + if (parentTab->ebdTryBeg != tryBeg) + { + parentTab->ebdTryLast = tryBegPrev; + } + } + else + { + // No need to continue searching if the parent region's end block differs from the child region's. + break; + } + } + } + } + + return modified; +} + //----------------------------------------------------------------------------- // fgSearchImprovedLayout: Try to improve upon a RPO-based layout with the 3-opt method: // - Identify a range of hot blocks to reorder within @@ -5187,7 +5254,7 @@ PhaseStatus Compiler::fgSearchImprovedLayout() BasicBlock** const initialLayout = new (this, CMK_BasicBlock) BasicBlock*[m_dfsTree->GetPostOrderCount()]; // When walking the RPO-based layout, compact the hot blocks, and remember the end of the hot section. - // We don't want to waste time running 3-opt on cold blocks. + // We don't want to waste time running 3-opt on cold blocks, or on handler sections. unsigned numHotBlocks = 0; auto addToSequence = [this, initialLayout, &numHotBlocks](BasicBlock* block) { if (!block->isBBWeightCold(this) && !block->hasHndIndex()) @@ -5200,8 +5267,15 @@ PhaseStatus Compiler::fgSearchImprovedLayout() // but keep the method entry block at the beginning. if (compStressCompile(STRESS_THREE_OPT_LAYOUT, 10)) { - numHotBlocks = m_dfsTree->GetPostOrderCount(); - memcpy(initialLayout, m_dfsTree->GetPostOrder(), sizeof(BasicBlock*) * numHotBlocks); + for (unsigned i = 0; i < m_dfsTree->GetPostOrderCount(); i++) + { + BasicBlock* const block = m_dfsTree->GetPostOrder(i); + if (!block->hasHndIndex()) + { + initialLayout[numHotBlocks++] = block; + } + } + std::swap(initialLayout[0], initialLayout[numHotBlocks - 1]); } else @@ -5220,6 +5294,8 @@ PhaseStatus Compiler::fgSearchImprovedLayout() JITDUMP("No hot blocks found. Skipping reordering.\n"); } + // 3-opt will mess with post-order numbers regardless of whether it modifies anything, + // so we always need to invalidate the flowgraph annotations after. fgInvalidateDfsTree(); return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } From f062f0bf5e50598988de1c2cdb8f7a7ac57b0314 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 6 Feb 2025 21:36:25 -0500 Subject: [PATCH 11/20] Ensure method entry is considered hot by 3-opt --- src/coreclr/jit/fgopt.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 87e11a7e46660f..e2328da9ab42b2 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5257,7 +5257,8 @@ PhaseStatus Compiler::fgSearchImprovedLayout() // We don't want to waste time running 3-opt on cold blocks, or on handler sections. unsigned numHotBlocks = 0; auto addToSequence = [this, initialLayout, &numHotBlocks](BasicBlock* block) { - if (!block->isBBWeightCold(this) && !block->hasHndIndex()) + // The first block really shouldn't be cold, but if it is, ensure it's still placed first. + if (!block->hasHndIndex() && (!block->isBBWeightCold(this) || block->IsFirst())) { initialLayout[numHotBlocks++] = block; } From ce79655eb41607960e0c3ce1b9fb715a3e9956f2 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Fri, 7 Feb 2025 16:58:46 -0500 Subject: [PATCH 12/20] Always invalidate DFS tree after layout --- src/coreclr/jit/compiler.cpp | 7 +++++++ src/coreclr/jit/fgopt.cpp | 3 --- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 3613e9a6274b08..ab8f0dcbd16595 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5227,6 +5227,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::fgSearchImprovedLayout); } + // 3-opt will mess with post-order numbers regardless of whether it modifies anything, + // so we always need to invalidate the flowgraph annotations after. + // If we didn't run 3-opt, we might still have a profile-aware DFS tree computed during LSRA available. + // This tree's presence can trigger asserts if pre/postorder numbers are recomputed, + // so invalidate the tree in both cases. + fgInvalidateDfsTree(); + // Now that the flowgraph is finalized, run post-layout optimizations. // DoPhase(this, PHASE_OPTIMIZE_POST_LAYOUT, &Compiler::optOptimizePostLayout); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index e2328da9ab42b2..9ce4d65bc440e6 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5295,9 +5295,6 @@ PhaseStatus Compiler::fgSearchImprovedLayout() JITDUMP("No hot blocks found. Skipping reordering.\n"); } - // 3-opt will mess with post-order numbers regardless of whether it modifies anything, - // so we always need to invalidate the flowgraph annotations after. - fgInvalidateDfsTree(); return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } From 2520f7aba8a64da7d419ec195d608c935f651b3d Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Fri, 7 Feb 2025 17:31:43 -0500 Subject: [PATCH 13/20] Revert "Always invalidate DFS tree after layout" This reverts commit ce79655eb41607960e0c3ce1b9fb715a3e9956f2. --- src/coreclr/jit/compiler.cpp | 7 ------- src/coreclr/jit/fgopt.cpp | 3 +++ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index ab8f0dcbd16595..3613e9a6274b08 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5227,13 +5227,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::fgSearchImprovedLayout); } - // 3-opt will mess with post-order numbers regardless of whether it modifies anything, - // so we always need to invalidate the flowgraph annotations after. - // If we didn't run 3-opt, we might still have a profile-aware DFS tree computed during LSRA available. - // This tree's presence can trigger asserts if pre/postorder numbers are recomputed, - // so invalidate the tree in both cases. - fgInvalidateDfsTree(); - // Now that the flowgraph is finalized, run post-layout optimizations. // DoPhase(this, PHASE_OPTIMIZE_POST_LAYOUT, &Compiler::optOptimizePostLayout); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 9ce4d65bc440e6..e2328da9ab42b2 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5295,6 +5295,9 @@ PhaseStatus Compiler::fgSearchImprovedLayout() JITDUMP("No hot blocks found. Skipping reordering.\n"); } + // 3-opt will mess with post-order numbers regardless of whether it modifies anything, + // so we always need to invalidate the flowgraph annotations after. + fgInvalidateDfsTree(); return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } From e54dbb52bec81709ff068bb38ce0cc3b6eb30aea Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Fri, 7 Feb 2025 17:32:31 -0500 Subject: [PATCH 14/20] Fix DFS tree invalidation logic --- src/coreclr/jit/compiler.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 3613e9a6274b08..6a203bdd3880d0 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5226,6 +5226,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl { DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::fgSearchImprovedLayout); } + else + { + // If we didn't run 3-opt, we might still have a profile-aware DFS tree computed during LSRA available. + // This tree's presence can trigger asserts if pre/postorder numbers are recomputed, + // so invalidate the tree either way. + fgInvalidateDfsTree(); + } // Now that the flowgraph is finalized, run post-layout optimizations. // From f08a75694f41285c9980ce7f3e58bf47de2dabee Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 20 Feb 2025 01:04:23 -0500 Subject: [PATCH 15/20] Style; use bbPreorderNum for ordinal --- src/coreclr/jit/fgopt.cpp | 201 ++++++++++++++++++-------------------- 1 file changed, 97 insertions(+), 104 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 7a05d523d0d592..dd081beee8ea9e 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -4659,7 +4659,7 @@ void Compiler::ThreeOptLayout::SwapPartitions( // Update the ordinals for the blocks we moved for (unsigned i = s2Start; i <= s4End; i++) { - blockOrder[i]->bbPostorderNum = i; + blockOrder[i]->bbPreorderNum = i; } #ifdef DEBUG @@ -4720,8 +4720,8 @@ bool Compiler::ThreeOptLayout::ConsiderEdge(FlowEdge* edge) return false; } - const unsigned srcPos = srcBlk->bbPostorderNum; - const unsigned dstPos = dstBlk->bbPostorderNum; + const unsigned srcPos = srcBlk->bbPreorderNum; + const unsigned dstPos = dstBlk->bbPreorderNum; assert(srcPos < compiler->m_dfsTree->GetPostOrderCount()); assert(dstPos < compiler->m_dfsTree->GetPostOrderCount()); @@ -4812,19 +4812,6 @@ void Compiler::ThreeOptLayout::AddNonFallthroughPreds(unsigned blockPos) bool Compiler::ThreeOptLayout::Run() { assert(numCandidateBlocks > 0); - - // Initialize ordinals for the hot blocks - for (unsigned i = 0; i < numCandidateBlocks; i++) - { - BasicBlock* const block = blockOrder[i]; - - // 3-opt does not expect to be given handler blocks to reorder - assert(!block->hasHndIndex()); - - // Repurpose 'bbPostorderNum' for the block's ordinal - block->bbPostorderNum = i; - } - RunThreeOpt(); return ReorderBlockList(); } @@ -4873,8 +4860,8 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned BasicBlock* const srcBlk = candidateEdge->getSourceBlock(); BasicBlock* const dstBlk = candidateEdge->getDestinationBlock(); - const unsigned srcPos = srcBlk->bbPostorderNum; - const unsigned dstPos = dstBlk->bbPostorderNum; + const unsigned srcPos = srcBlk->bbPreorderNum; + const unsigned dstPos = dstBlk->bbPreorderNum; // This edge better be between blocks in the current region assert((srcPos >= startPos) && (srcPos <= endPos)); @@ -4998,7 +4985,7 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned SwapPartitions(startPos, s2Start, s3Start, s3End, endPos); // Ensure this move created fallthrough from 'srcBlk' to 'dstBlk' - assert((srcBlk->bbPostorderNum + 1) == dstBlk->bbPostorderNum); + assert((srcBlk->bbPreorderNum + 1) == dstBlk->bbPreorderNum); // At every cut point is an opportunity to consider more candidate edges. // To the left of each cut point, consider successor edges that don't fall through. @@ -5138,88 +5125,89 @@ bool Compiler::ThreeOptLayout::ReorderBlockList() } } + if (compiler->compHndBBtabCount == 0) + { + return modified; + } + lastHotBlocks[blockOrder[numCandidateBlocks - 1]->bbTryIndex] = blockOrder[numCandidateBlocks - 1]; - if (compiler->compHndBBtabCount > 0) + // If we reordered within any try regions, make sure the EH table is up-to-date. + if (modified) { - // If we reordered within any try regions, make sure the EH table is up-to-date. - if (modified) + compiler->fgFindEHRegionEnds(); + } + + // We only ordered blocks within regions above. + // Now, move entire try regions up to their ideal predecessors, if possible. + for (EHblkDsc* const HBtab : EHClauses(compiler)) + { + // If this try region isn't in the candidate span of blocks, don't consider it. + // Also, if this try region's entry is also the method entry, don't move it. + BasicBlock* const tryBeg = HBtab->ebdTryBeg; + if ((tryBeg->bbPreorderNum >= numCandidateBlocks) || (blockOrder[tryBeg->bbPreorderNum] != tryBeg) || + tryBeg->IsFirst()) { - compiler->fgFindEHRegionEnds(); + continue; } - // We only ordered blocks within regions above. - // Now, move entire try regions up to their ideal predecessors, if possible. - for (EHblkDsc* const HBtab : EHClauses(compiler)) - { - // If this try region isn't in the candidate span of blocks, don't consider it. - // Also, if this try region's entry is also the method entry, don't move it. - BasicBlock* const tryBeg = HBtab->ebdTryBeg; - if ((tryBeg->bbPostorderNum >= numCandidateBlocks) || (blockOrder[tryBeg->bbPostorderNum] != tryBeg) || - tryBeg->IsFirst()) - { - continue; - } + // We will try using 3-opt's chosen predecessor for the try region. + BasicBlock* const tryBegPrev = tryBeg->Prev(); + BasicBlock* const tryLast = HBtab->ebdTryLast; + BasicBlock* insertionPoint = blockOrder[tryBeg->bbPreorderNum - 1]; + unsigned parentIndex = + insertionPoint->hasTryIndex() ? insertionPoint->getTryIndex() : EHblkDsc::NO_ENCLOSING_INDEX; - // We will try using 3-opt's chosen predecessor for the try region. - BasicBlock* const tryBegPrev = tryBeg->Prev(); - BasicBlock* const tryLast = HBtab->ebdTryLast; - BasicBlock* insertionPoint = blockOrder[tryBeg->bbPostorderNum - 1]; - unsigned parentIndex = - insertionPoint->hasTryIndex() ? insertionPoint->getTryIndex() : EHblkDsc::NO_ENCLOSING_INDEX; + // Can we move this try to after 'insertionPoint' without breaking EH nesting invariants? + if (parentIndex != HBtab->ebdEnclosingTryIndex) + { + // We cannot. Instead, get the last hot block in the parent region. + parentIndex = + (HBtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) ? (HBtab->ebdEnclosingTryIndex + 1) : 0; + insertionPoint = lastHotBlocks[parentIndex]; - // Can we move this try to after 'insertionPoint' without breaking EH nesting invariants? - if (parentIndex != HBtab->ebdEnclosingTryIndex) + // If the parent of this try region is the same as it, it won't have an entry in 'lastHotBlocks'. + // Tolerate this. + if (insertionPoint == nullptr) { - // We cannot. Instead, get the last hot block in the parent region. - parentIndex = (HBtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) - ? (HBtab->ebdEnclosingTryIndex + 1) - : 0; - insertionPoint = lastHotBlocks[parentIndex]; - - // If the parent of this try region is the same as it, it won't have an entry in 'lastHotBlocks'. - // Tolerate this. - if (insertionPoint == nullptr) - { - continue; - } + continue; } + } - // Don't break up call-finally pairs. - if (insertionPoint->isBBCallFinallyPair()) - { - insertionPoint = insertionPoint->Next(); - } + // Don't break up call-finally pairs. + if (insertionPoint->isBBCallFinallyPair()) + { + insertionPoint = insertionPoint->Next(); + } - // Nothing to do if we already fall through. - if (insertionPoint->NextIs(tryBeg)) - { - continue; - } + // Nothing to do if we already fall through. + if (insertionPoint->NextIs(tryBeg)) + { + continue; + } - compiler->fgUnlinkRange(tryBeg, tryLast); - compiler->fgMoveBlocksAfter(tryBeg, tryLast, insertionPoint); - modified = true; + compiler->fgUnlinkRange(tryBeg, tryLast); + compiler->fgMoveBlocksAfter(tryBeg, tryLast, insertionPoint); + modified = true; - // Update the parent regions' end blocks. - for (unsigned tryIndex = compiler->ehGetEnclosingTryIndex(tryBeg->getTryIndex()); - tryIndex != EHblkDsc::NO_ENCLOSING_INDEX; tryIndex = compiler->ehGetEnclosingTryIndex(tryIndex)) + // Update the parent regions' end blocks. + for (unsigned tryIndex = compiler->ehGetEnclosingTryIndex(tryBeg->getTryIndex()); + tryIndex != EHblkDsc::NO_ENCLOSING_INDEX; tryIndex = compiler->ehGetEnclosingTryIndex(tryIndex)) + { + EHblkDsc* const parentTab = compiler->ehGetDsc(tryIndex); + if (parentTab->ebdTryLast == tryLast) { - EHblkDsc* const parentTab = compiler->ehGetDsc(tryIndex); - if (parentTab->ebdTryLast == tryLast) + // Tolerate parent regions that are identical to the child region. + if (parentTab->ebdTryBeg != tryBeg) { - // Tolerate parent regions that are identical to the child region. - if (parentTab->ebdTryBeg != tryBeg) - { - parentTab->ebdTryLast = tryBegPrev; - } - } - else - { - // No need to continue searching if the parent region's end block differs from the child region's. - break; + parentTab->ebdTryLast = tryBegPrev; } } + else + { + // No need to continue searching if the parent region's end block differs from the child region's. + break; + } } } @@ -5234,10 +5222,14 @@ void Compiler::ThreeOptLayout::CompactHotJumps() { JITDUMP("Compacting hot jumps\n"); - auto isBackwardJump = [INDEBUG(this)](BasicBlock* block, BasicBlock* target) { - assert((block->bbPostorderNum < numCandidateBlocks) && (blockOrder[block->bbPostorderNum] == block)); - assert((target->bbPostorderNum < numCandidateBlocks) && (blockOrder[target->bbPostorderNum] == target)); - return block->bbPostorderNum >= target->bbPostorderNum; + auto isCandidateBlock = [this](BasicBlock* block) { + return (block->bbPreorderNum < numCandidateBlocks) && (blockOrder[block->bbPreorderNum] == block); + }; + + auto isBackwardJump = [&](BasicBlock* block, BasicBlock* target) { + assert(isCandidateBlock(block)); + assert(isCandidateBlock(target)); + return block->bbPreorderNum >= target->bbPreorderNum; }; for (unsigned i = 0; i < numCandidateBlocks; i++) @@ -5268,7 +5260,8 @@ void Compiler::ThreeOptLayout::CompactHotJumps() // If we aren't sure which successor is hotter, and we already fall into one of them, // do nothing. BasicBlock* const unlikelyTarget = unlikelyEdge->getDestinationBlock(); - if ((unlikelyEdge->getLikelihood() == 0.5) && (unlikelyTarget->bbPostorderNum == (i + 1))) + if ((unlikelyEdge->getLikelihood() == 0.5) && isCandidateBlock(unlikelyTarget) && + (unlikelyTarget->bbPreorderNum == (i + 1))) { continue; } @@ -5300,7 +5293,7 @@ void Compiler::ThreeOptLayout::CompactHotJumps() BasicBlock* const target = edge->getDestinationBlock(); const unsigned srcPos = i; - const unsigned dstPos = target->bbPostorderNum; + const unsigned dstPos = target->bbPreorderNum; // We don't need to do anything if this edge already falls through. if ((srcPos + 1) == dstPos) @@ -5331,18 +5324,18 @@ void Compiler::ThreeOptLayout::CompactHotJumps() { BasicBlock* const blockToMove = blockOrder[pos]; blockOrder[pos + offset] = blockOrder[pos]; - blockToMove->bbPostorderNum += offset; + blockToMove->bbPreorderNum += offset; } // Now, insert 'target' in the space after 'block'. blockOrder[srcPos + 1] = target; - target->bbPostorderNum = srcPos + 1; + target->bbPreorderNum = srcPos + 1; // Move call-finally pairs in tandem. if (target->isBBCallFinallyPair()) { - blockOrder[srcPos + 2] = target->Next(); - target->Next()->bbPostorderNum = srcPos + 2; + blockOrder[srcPos + 2] = target->Next(); + target->Next()->bbPreorderNum = srcPos + 2; } } else @@ -5356,15 +5349,15 @@ void Compiler::ThreeOptLayout::CompactHotJumps() { BasicBlock* const blockToMove = blockOrder[pos]; blockOrder[pos + 1] = blockOrder[pos]; - blockToMove->bbPostorderNum++; + blockToMove->bbPreorderNum++; } // Now, insert 'block' before 'target'. - blockOrder[dstPos] = block; - block->bbPostorderNum = dstPos; + blockOrder[dstPos] = block; + block->bbPreorderNum = dstPos; } - assert((block->bbPostorderNum + 1) == target->bbPostorderNum); + assert((block->bbPreorderNum + 1) == target->bbPreorderNum); } } @@ -5411,24 +5404,24 @@ PhaseStatus Compiler::fgSearchImprovedLayout() // The first block really shouldn't be cold, but if it is, ensure it's still placed first. if (!block->hasHndIndex() && (!block->isBBWeightCold(this) || block->IsFirst())) { + // Set the block's ordinal. + block->bbPreorderNum = numHotBlocks; initialLayout[numHotBlocks++] = block; } }; - // Stress 3-opt by giving it the post-order traversal as its initial layout, - // but keep the method entry block at the beginning. + // Stress 3-opt by giving it the post-order traversal as its initial layout. if (compStressCompile(STRESS_THREE_OPT_LAYOUT, 10)) { for (unsigned i = 0; i < m_dfsTree->GetPostOrderCount(); i++) { - BasicBlock* const block = m_dfsTree->GetPostOrder(i); - if (!block->hasHndIndex()) - { - initialLayout[numHotBlocks++] = block; - } + addToSequence(m_dfsTree->GetPostOrder(i)); } + // Keep the method entry block at the beginning. + // Update the swapped blocks' ordinals, too. std::swap(initialLayout[0], initialLayout[numHotBlocks - 1]); + std::swap(initialLayout[0]->bbPreorderNum, initialLayout[numHotBlocks - 1]->bbPreorderNum); } else { From 447b2dd1acb2f33e05939adf54ac97c0819c79da Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 20 Feb 2025 01:14:02 -0500 Subject: [PATCH 16/20] Cleanup --- src/coreclr/jit/fgopt.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index dd081beee8ea9e..e76b672d7c8b1d 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5152,10 +5152,8 @@ bool Compiler::ThreeOptLayout::ReorderBlockList() } // We will try using 3-opt's chosen predecessor for the try region. - BasicBlock* const tryBegPrev = tryBeg->Prev(); - BasicBlock* const tryLast = HBtab->ebdTryLast; - BasicBlock* insertionPoint = blockOrder[tryBeg->bbPreorderNum - 1]; - unsigned parentIndex = + BasicBlock* insertionPoint = blockOrder[tryBeg->bbPreorderNum - 1]; + unsigned parentIndex = insertionPoint->hasTryIndex() ? insertionPoint->getTryIndex() : EHblkDsc::NO_ENCLOSING_INDEX; // Can we move this try to after 'insertionPoint' without breaking EH nesting invariants? @@ -5186,11 +5184,14 @@ bool Compiler::ThreeOptLayout::ReorderBlockList() continue; } + BasicBlock* const tryBegPrev = tryBeg->Prev(); + BasicBlock* const tryLast = HBtab->ebdTryLast; compiler->fgUnlinkRange(tryBeg, tryLast); compiler->fgMoveBlocksAfter(tryBeg, tryLast, insertionPoint); modified = true; - // Update the parent regions' end blocks. + // If this region's parents also end on 'tryLast', + // update the parents' end pointers to 'tryBegPrev', since we moved this region up within its parents. for (unsigned tryIndex = compiler->ehGetEnclosingTryIndex(tryBeg->getTryIndex()); tryIndex != EHblkDsc::NO_ENCLOSING_INDEX; tryIndex = compiler->ehGetEnclosingTryIndex(tryIndex)) { From 6f7ec573ea4d102b06d8a948e57c5ea6cd215c3e Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 20 Feb 2025 17:01:51 -0500 Subject: [PATCH 17/20] Fix call-finally motion --- src/coreclr/jit/fgopt.cpp | 102 +++++++++++++------------------------- 1 file changed, 34 insertions(+), 68 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index e76b672d7c8b1d..ebf1eb08a95bdd 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5062,65 +5062,61 @@ bool Compiler::ThreeOptLayout::ReorderBlockList() lastHotBlocks[HBtab->ebdTryBeg->bbTryIndex] = HBtab->ebdTryBeg; } - // Reorder the block list + // Reorder the block list. + JITDUMP("Reordering block list\n"); bool modified = false; for (unsigned i = 1; i < numCandidateBlocks; i++) { BasicBlock* const block = blockOrder[i - 1]; - BasicBlock* const next = blockOrder[i]; + BasicBlock* const blockToMove = blockOrder[i]; lastHotBlocks[block->bbTryIndex] = block; - // Don't move try region entries yet. - // We will move entire try regions after. - if (compiler->bbIsTryBeg(next)) + // Don't move call-finally pair tails independently. + // When we encounter the head, we will move the entire pair. + if (blockToMove->isBBCallFinallyPairTail()) { continue; } - // If 'block' and 'next' are in the same region, we should have no problem creating fallthrough. - // If they aren't, then we will pick the last hot block we saw in the same region as 'next' to insert after. - BasicBlock* insertionPoint = lastHotBlocks[next->bbTryIndex]; - assert(insertionPoint != nullptr); - assert((block == insertionPoint) || !BasicBlock::sameTryRegion(block, next)); - - // Nothing to do if we already fall through. - if (insertionPoint->NextIs(next)) + // Only reorder blocks within the same try region. We don't want to make them non-contiguous. + if (compiler->bbIsTryBeg(blockToMove)) { continue; } - // Don't break up call-finally pairs. + // If these blocks aren't in the same try region, use the last block seen in the same region as 'blockToMove' + // for the insertion point. + // This will push the region containing 'block' down the method, but we will fix this after. + BasicBlock* insertionPoint = + BasicBlock::sameTryRegion(block, blockToMove) ? block : lastHotBlocks[blockToMove->bbTryIndex]; + + // Don't break up call-finally pairs by inserting something in the middle. if (insertionPoint->isBBCallFinallyPair()) { insertionPoint = insertionPoint->Next(); - assert(insertionPoint != next); + assert(blockToMove != insertionPoint); } - // Move call-finally pairs in tandem. - if (next->isBBCallFinallyPair()) + if (insertionPoint->NextIs(blockToMove)) { - BasicBlock* const callFinallyTail = next->Next(); - if (callFinallyTail != insertionPoint) - { - compiler->fgUnlinkRange(next, callFinallyTail); - compiler->fgMoveBlocksAfter(next, callFinallyTail, insertionPoint); - modified = true; - } + continue; } - else if (next->isBBCallFinallyPairTail()) + + // Move call-finallies together. + if (blockToMove->isBBCallFinallyPair()) { - BasicBlock* const callFinally = next->Prev(); - if (callFinally != insertionPoint) + BasicBlock* const callFinallyRet = blockToMove->Next(); + if (callFinallyRet != insertionPoint) { - compiler->fgUnlinkRange(callFinally, next); - compiler->fgMoveBlocksAfter(callFinally, next, insertionPoint); + compiler->fgUnlinkRange(blockToMove, callFinallyRet); + compiler->fgMoveBlocksAfter(blockToMove, callFinallyRet, insertionPoint); modified = true; } } else { - compiler->fgUnlinkBlock(next); - compiler->fgInsertBBafter(insertionPoint, next); + compiler->fgUnlinkBlock(blockToMove); + compiler->fgInsertBBafter(insertionPoint, blockToMove); modified = true; } } @@ -5130,14 +5126,14 @@ bool Compiler::ThreeOptLayout::ReorderBlockList() return modified; } - lastHotBlocks[blockOrder[numCandidateBlocks - 1]->bbTryIndex] = blockOrder[numCandidateBlocks - 1]; - // If we reordered within any try regions, make sure the EH table is up-to-date. if (modified) { compiler->fgFindEHRegionEnds(); } + JITDUMP("Moving try regions\n"); + // We only ordered blocks within regions above. // Now, move entire try regions up to their ideal predecessors, if possible. for (EHblkDsc* const HBtab : EHClauses(compiler)) @@ -5152,24 +5148,15 @@ bool Compiler::ThreeOptLayout::ReorderBlockList() } // We will try using 3-opt's chosen predecessor for the try region. - BasicBlock* insertionPoint = blockOrder[tryBeg->bbPreorderNum - 1]; - unsigned parentIndex = + BasicBlock* insertionPoint = blockOrder[tryBeg->bbPreorderNum - 1]; + const unsigned parentIndex = insertionPoint->hasTryIndex() ? insertionPoint->getTryIndex() : EHblkDsc::NO_ENCLOSING_INDEX; // Can we move this try to after 'insertionPoint' without breaking EH nesting invariants? if (parentIndex != HBtab->ebdEnclosingTryIndex) { - // We cannot. Instead, get the last hot block in the parent region. - parentIndex = - (HBtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) ? (HBtab->ebdEnclosingTryIndex + 1) : 0; - insertionPoint = lastHotBlocks[parentIndex]; - - // If the parent of this try region is the same as it, it won't have an entry in 'lastHotBlocks'. - // Tolerate this. - if (insertionPoint == nullptr) - { - continue; - } + // We cannot. + continue; } // Don't break up call-finally pairs. @@ -5184,32 +5171,11 @@ bool Compiler::ThreeOptLayout::ReorderBlockList() continue; } - BasicBlock* const tryBegPrev = tryBeg->Prev(); BasicBlock* const tryLast = HBtab->ebdTryLast; compiler->fgUnlinkRange(tryBeg, tryLast); compiler->fgMoveBlocksAfter(tryBeg, tryLast, insertionPoint); modified = true; - - // If this region's parents also end on 'tryLast', - // update the parents' end pointers to 'tryBegPrev', since we moved this region up within its parents. - for (unsigned tryIndex = compiler->ehGetEnclosingTryIndex(tryBeg->getTryIndex()); - tryIndex != EHblkDsc::NO_ENCLOSING_INDEX; tryIndex = compiler->ehGetEnclosingTryIndex(tryIndex)) - { - EHblkDsc* const parentTab = compiler->ehGetDsc(tryIndex); - if (parentTab->ebdTryLast == tryLast) - { - // Tolerate parent regions that are identical to the child region. - if (parentTab->ebdTryBeg != tryBeg) - { - parentTab->ebdTryLast = tryBegPrev; - } - } - else - { - // No need to continue searching if the parent region's end block differs from the child region's. - break; - } - } + compiler->fgFindEHRegionEnds(); } return modified; From 5d32277a6e8971f253c4ee3b8b90d8f09e69c0d0 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 20 Feb 2025 17:23:40 -0500 Subject: [PATCH 18/20] fgFindEHRegionEnds -> fgFindTryRegionEnds --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fgopt.cpp | 4 +-- src/coreclr/jit/jiteh.cpp | 62 +++++++------------------------------- 3 files changed, 14 insertions(+), 54 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ffc1b2b26febf0..301f151ec0a1fa 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3061,7 +3061,7 @@ class Compiler void fgSetHndEnd(EHblkDsc* handlerTab, BasicBlock* newHndLast); - void fgFindEHRegionEnds(); + void fgFindTryRegionEnds(); void fgSkipRmvdBlocks(EHblkDsc* handlerTab); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index ebf1eb08a95bdd..3f6a546dfa3870 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5129,7 +5129,7 @@ bool Compiler::ThreeOptLayout::ReorderBlockList() // If we reordered within any try regions, make sure the EH table is up-to-date. if (modified) { - compiler->fgFindEHRegionEnds(); + compiler->fgFindTryRegionEnds(); } JITDUMP("Moving try regions\n"); @@ -5175,7 +5175,7 @@ bool Compiler::ThreeOptLayout::ReorderBlockList() compiler->fgUnlinkRange(tryBeg, tryLast); compiler->fgMoveBlocksAfter(tryBeg, tryLast, insertionPoint); modified = true; - compiler->fgFindEHRegionEnds(); + compiler->fgFindTryRegionEnds(); } return modified; diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp index c731fea0043e41..7b54254cdecfd9 100644 --- a/src/coreclr/jit/jiteh.cpp +++ b/src/coreclr/jit/jiteh.cpp @@ -1276,19 +1276,22 @@ void Compiler::fgSetHndEnd(EHblkDsc* handlerTab, BasicBlock* newHndLast) } //------------------------------------------------------------- -// fgFindEHRegionEnds: Walk the block list, and set each try/handler region's end block. +// fgFindTryRegionEnds: Walk the main method body, and set each try region's end block. // -void Compiler::fgFindEHRegionEnds() +void Compiler::fgFindTryRegionEnds() { assert(compHndBBtabCount != 0); - unsigned unsetTryEnds = compHndBBtabCount; - unsigned unsetHndEnds = compHndBBtabCount; + unsigned unsetTryEnds = 0; - // Null out try/handler end pointers to signify the given clause hasn't been visited yet. + // Null out try end pointers to signify the given clause hasn't been visited yet. for (EHblkDsc* const HBtab : EHClauses(this)) { - HBtab->ebdTryLast = nullptr; - HBtab->ebdHndLast = nullptr; + // Ignore try regions inside handler regions. + if (!HBtab->ebdTryLast->hasHndIndex()) + { + HBtab->ebdTryLast = nullptr; + unsetTryEnds++; + } } // Updates the try region's (and all of its parent regions') end block to 'block,' @@ -1311,27 +1314,7 @@ void Compiler::fgFindEHRegionEnds() } }; - // Updates the handler region's (and all of its parent regions') end block to 'block,' - // if the handler region's end block hasn't been updated yet. - auto setHndEnd = [this, &unsetHndEnds](BasicBlock* block) { - for (unsigned hndIndex = block->getHndIndex(); hndIndex != EHblkDsc::NO_ENCLOSING_INDEX; - hndIndex = ehGetEnclosingHndIndex(hndIndex)) - { - EHblkDsc* const HBtab = ehGetDsc(hndIndex); - if (HBtab->ebdHndLast == nullptr) - { - assert(unsetHndEnds != 0); - HBtab->ebdHndLast = block; - unsetHndEnds--; - } - else - { - break; - } - } - }; - - // Iterate backwards through the main method body, and update each try region's end block + // Iterate backwards through the main method body, and update each try region's end block. for (BasicBlock* block = fgLastBBInMainFunction(); (unsetTryEnds != 0) && (block != nullptr); block = block->Prev()) { if (block->hasTryIndex()) @@ -1340,30 +1323,7 @@ void Compiler::fgFindEHRegionEnds() } } - // If we don't have a funclet section, then all of the try regions should have been updated above - assert((unsetTryEnds == 0) || (fgFirstFuncletBB != nullptr)); - - // If we have a funclet section, update the ends of any try regions nested in funclets - for (BasicBlock* block = fgLastBB; (unsetTryEnds != 0) && (block != fgLastBBInMainFunction()); - block = block->Prev()) - { - if (block->hasTryIndex()) - { - setTryEnd(block); - } - } - - // Finally, update the handler regions' ends - for (BasicBlock* block = fgLastBB; (unsetHndEnds != 0) && (block != nullptr); block = block->Prev()) - { - if (block->hasHndIndex()) - { - setHndEnd(block); - } - } - assert(unsetTryEnds == 0); - assert(unsetHndEnds == 0); } /***************************************************************************** From 52536be23c01f2780fe478694a7fe4dbeec56bc7 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 20 Feb 2025 19:47:49 -0500 Subject: [PATCH 19/20] Style --- src/coreclr/jit/fgopt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 3f6a546dfa3870..e4baed1f249839 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5171,7 +5171,7 @@ bool Compiler::ThreeOptLayout::ReorderBlockList() continue; } - BasicBlock* const tryLast = HBtab->ebdTryLast; + BasicBlock* const tryLast = HBtab->ebdTryLast; compiler->fgUnlinkRange(tryBeg, tryLast); compiler->fgMoveBlocksAfter(tryBeg, tryLast, insertionPoint); modified = true; From 60303ad956a4303e21ff41204e2e21728ff5869f Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Fri, 21 Feb 2025 11:13:09 -0500 Subject: [PATCH 20/20] Recompute try region ends only when necessary --- src/coreclr/jit/fgopt.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index e4baed1f249839..eb5a57e2ae79f6 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5175,7 +5175,12 @@ bool Compiler::ThreeOptLayout::ReorderBlockList() compiler->fgUnlinkRange(tryBeg, tryLast); compiler->fgMoveBlocksAfter(tryBeg, tryLast, insertionPoint); modified = true; - compiler->fgFindTryRegionEnds(); + + // If we moved this region within another region, recompute the try region end blocks. + if (parentIndex != EHblkDsc::NO_ENCLOSING_INDEX) + { + compiler->fgFindTryRegionEnds(); + } } return modified;