diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 810730e3a94a4e..c2954a7d45f3b6 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4493,6 +4493,8 @@ class Compiler unsigned fgBBNumMax; // The max bbNum that has been assigned to basic blocks unsigned fgDomBBcount; // # of BBs for which we have dominator and reachability information BasicBlock** fgBBReversePostorder; // Blocks in reverse postorder + BasicBlock** fgSSAPostOrder; // Blocks in postorder, computed during SSA + unsigned fgSSAPostOrderCount; // Number of blocks in fgSSAPostOrder // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute // dominance queries in O(1). fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and @@ -5057,7 +5059,7 @@ class Compiler // The value numbers for this compilation. ValueNumStore* vnStore; - struct ValueNumberState* vnVisitState; + class ValueNumberState* vnState; public: ValueNumStore* GetValueNumStore() @@ -5723,7 +5725,7 @@ class Compiler protected: friend class SsaBuilder; - friend struct ValueNumberState; + friend class ValueNumberState; //--------------------- Detect the basic blocks --------------------------- diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 665aeeee217dfa..c8b854879c0fb7 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -769,7 +769,6 @@ bool Compiler::fgRemoveDeadBlocks() // Notes: // Each block's `bbPreorderNum` and `bbPostorderNum` is set. // The `fgBBReversePostorder` array is filled in with the `BasicBlock*` in reverse post-order. -// This algorithm only pays attention to the actual blocks. It ignores any imaginary entry block. // // Unreachable blocks will have higher pre and post order numbers than reachable blocks. // Hence they will appear at lower indices in the fgBBReversePostorder array. diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 100c65c0cc57f7..2ad8108c9cd9aa 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -69,6 +69,8 @@ PhaseStatus Compiler::fgSsaBuild() JitTestCheckSSA(); #endif // DEBUG + fgSSAPostOrder = builder.GetPostOrder(&fgSSAPostOrderCount); + return PhaseStatus::MODIFIED_EVERYTHING; } @@ -144,7 +146,7 @@ SsaBuilder::SsaBuilder(Compiler* pCompiler) // Return Value: // The number of nodes visited while performing DFS on the graph. // -int SsaBuilder::TopologicalSort(BasicBlock** postOrder, int count) +unsigned SsaBuilder::TopologicalSort(BasicBlock** postOrder, int count) { Compiler* comp = m_pCompiler; @@ -179,7 +181,7 @@ int SsaBuilder::TopologicalSort(BasicBlock** postOrder, int count) }; // Compute order. - int postIndex = 0; + unsigned postIndex = 0; BasicBlock* block = comp->fgFirstBB; BitVecOps::AddElemD(&m_visitedTraits, m_visited, block->bbNum); @@ -206,16 +208,13 @@ int SsaBuilder::TopologicalSort(BasicBlock** postOrder, int count) // all successors have been visited blocks.Pop(); - DBG_SSA_JITDUMP("[SsaBuilder::TopologicalSort] postOrder[%d] = " FMT_BB "\n", postIndex, block->bbNum); + DBG_SSA_JITDUMP("[SsaBuilder::TopologicalSort] postOrder[%u] = " FMT_BB "\n", postIndex, block->bbNum); postOrder[postIndex] = block; block->bbPostorderNum = postIndex; postIndex++; } } - // In the absence of EH (because catch/finally have no preds), this should be valid. - // assert(postIndex == (count - 1)); - return postIndex; } @@ -1500,16 +1499,7 @@ void SsaBuilder::Build() // Allocate the postOrder array for the graph. - BasicBlock** postOrder; - - if (blockCount > DEFAULT_MIN_OPTS_BB_COUNT) - { - postOrder = new (m_allocator) BasicBlock*[blockCount]; - } - else - { - postOrder = (BasicBlock**)_alloca(blockCount * sizeof(BasicBlock*)); - } + m_postOrder = new (m_allocator) BasicBlock*[blockCount]; m_visitedTraits = BitVecTraits(blockCount, m_pCompiler); m_visited = BitVecOps::MakeEmpty(&m_visitedTraits); @@ -1527,12 +1517,12 @@ void SsaBuilder::Build() } // Topologically sort the graph. - int count = TopologicalSort(postOrder, blockCount); + m_postOrderCount = TopologicalSort(m_postOrder, blockCount); JITDUMP("[SsaBuilder] Topologically sorted the graph.\n"); EndPhase(PHASE_BUILD_SSA_TOPOSORT); // Compute IDom(b). - ComputeImmediateDom(postOrder, count); + ComputeImmediateDom(m_postOrder, m_postOrderCount); m_pCompiler->fgSsaDomTree = m_pCompiler->fgBuildDomTree(); EndPhase(PHASE_BUILD_SSA_DOMS); @@ -1551,7 +1541,7 @@ void SsaBuilder::Build() } // Insert phi functions. - InsertPhiFunctions(postOrder, count); + InsertPhiFunctions(m_postOrder, m_postOrderCount); // Rename local variables and collect UD information for each ssa var. RenameVariables(); diff --git a/src/coreclr/jit/ssabuilder.h b/src/coreclr/jit/ssabuilder.h index 325903dc3e6f0d..22515537355675 100644 --- a/src/coreclr/jit/ssabuilder.h +++ b/src/coreclr/jit/ssabuilder.h @@ -33,6 +33,12 @@ class SsaBuilder // variable are stored in the "per SSA data" on the local descriptor. void Build(); + BasicBlock** GetPostOrder(unsigned* count) + { + *count = m_postOrderCount; + return m_postOrder; + } + private: // Ensures that the basic block graph has a root for the dominator graph, by ensuring // that there is a first block that is not in a try region (adding an empty block for that purpose @@ -44,7 +50,7 @@ class SsaBuilder // the blocks in post order (i.e., a node's children first) in the array. Returns the // number of nodes visited while sorting the graph. In other words, valid entries in // the output array. - int TopologicalSort(BasicBlock** postOrder, int count); + unsigned TopologicalSort(BasicBlock** postOrder, int count); // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted // order. Requires count to be the valid entries in the "postOrder" array. Computes @@ -101,4 +107,6 @@ class SsaBuilder BitVec m_visited; SsaRenameState m_renameStack; + BasicBlock** m_postOrder = nullptr; + unsigned m_postOrderCount = 0; }; diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 4e804aba7f4e7b..1a6406a543715f 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -9630,220 +9630,62 @@ void ValueNumStore::RunTests(Compiler* comp) } #endif // DEBUG -typedef JitExpandArrayStack BlockStack; - -// This represents the "to do" state of the value number computation. -struct ValueNumberState -{ - // These two stacks collectively represent the set of blocks that are candidates for - // processing, because at least one predecessor has been processed. Blocks on "m_toDoAllPredsDone" - // have had *all* predecessors processed, and thus are candidates for some extra optimizations. - // Blocks on "m_toDoNotAllPredsDone" have at least one predecessor that has not been processed. - // Blocks are initially on "m_toDoNotAllPredsDone" may be moved to "m_toDoAllPredsDone" when their last - // unprocessed predecessor is processed, thus maintaining the invariants. - BlockStack m_toDoAllPredsDone; - BlockStack m_toDoNotAllPredsDone; - - Compiler* m_comp; - - // TBD: This should really be a bitset... - // For now: - // first bit indicates completed, - // second bit indicates that it's been pushed on all-done stack, - // third bit indicates that it's been pushed on not-all-done stack. - BYTE* m_visited; - - enum BlockVisitBits - { - BVB_complete = 0x1, - BVB_onAllDone = 0x2, - BVB_onNotAllDone = 0x4, - // Set for statically reachable blocks that we have proven unreachable. - BVB_provenUnreachable = 0x8, - }; - - bool GetVisitBit(unsigned bbNum, BlockVisitBits bvb) - { - return (m_visited[bbNum] & bvb) != 0; - } - void SetVisitBit(unsigned bbNum, BlockVisitBits bvb) - { - m_visited[bbNum] |= bvb; - } +class ValueNumberState +{ + Compiler* m_comp; + BitVecTraits m_blockTraits; + BitVec m_provenUnreachableBlocks; +public: ValueNumberState(Compiler* comp) - : m_toDoAllPredsDone(comp->getAllocator(CMK_ValueNumber), /*minSize*/ 4) - , m_toDoNotAllPredsDone(comp->getAllocator(CMK_ValueNumber), /*minSize*/ 4) - , m_comp(comp) - , m_visited(new (comp, CMK_ValueNumber) BYTE[comp->fgBBNumMax + 1]()) + : m_comp(comp) + , m_blockTraits(comp->fgBBNumMax + 1, comp) + , m_provenUnreachableBlocks(BitVecOps::MakeEmpty(&m_blockTraits)) { } - BasicBlock* ChooseFromNotAllPredsDone() + //------------------------------------------------------------------------ + // SetUnreachable: Mark that a block has been proven unreachable. + // + // Parameters: + // bb - The block. + // + void SetUnreachable(BasicBlock* bb) { - assert(m_toDoAllPredsDone.Size() == 0); - // If we have no blocks with all preds done, then (ideally, if all cycles have been captured by loops) - // we must have at least one block within a loop. We want to do the loops first. Doing a loop entry block - // should break the cycle, making the rest of the body of the loop (unless there's a nested loop) doable by the - // all-preds-done rule. If several loop entry blocks are available, at least one should have all non-loop preds - // done -- we choose that. - for (unsigned i = 0; i < m_toDoNotAllPredsDone.Size(); i++) - { - BasicBlock* cand = m_toDoNotAllPredsDone.Get(i); - - // Skip any already-completed blocks (a block may have all its preds finished, get added to the - // all-preds-done todo set, and get processed there). Do this by moving the last one down, to - // keep the array compact. - while (GetVisitBit(cand->bbNum, BVB_complete)) - { - if (i + 1 < m_toDoNotAllPredsDone.Size()) - { - cand = m_toDoNotAllPredsDone.Pop(); - m_toDoNotAllPredsDone.Set(i, cand); - } - else - { - // "cand" is the last element; delete it. - (void)m_toDoNotAllPredsDone.Pop(); - break; - } - } - // We may have run out of non-complete candidates above. If so, we're done. - if (i == m_toDoNotAllPredsDone.Size()) - { - break; - } - - // See if "cand" is a loop entry. - unsigned lnum; - if (m_comp->optBlockIsLoopEntry(cand, &lnum)) - { - // "lnum" is the innermost loop of which "cand" is the entry; find the outermost. - unsigned lnumPar = m_comp->optLoopTable[lnum].lpParent; - while (lnumPar != BasicBlock::NOT_IN_LOOP) - { - if (m_comp->optLoopTable[lnumPar].lpEntry == cand) - { - lnum = lnumPar; - } - else - { - break; - } - lnumPar = m_comp->optLoopTable[lnumPar].lpParent; - } - - bool allNonLoopPredsDone = true; - for (FlowEdge* pred = m_comp->BlockPredsWithEH(cand); pred != nullptr; pred = pred->getNextPredEdge()) - { - BasicBlock* predBlock = pred->getSourceBlock(); - if (!m_comp->optLoopTable[lnum].lpContains(predBlock)) - { - if (!GetVisitBit(predBlock->bbNum, BVB_complete)) - { - allNonLoopPredsDone = false; - } - } - } - if (allNonLoopPredsDone) - { - return cand; - } - } - } - - // If we didn't find a loop entry block with all non-loop preds done above, then return a random member (if - // there is one). - if (m_toDoNotAllPredsDone.Size() == 0) - { - return nullptr; - } - else - { - return m_toDoNotAllPredsDone.Pop(); - } + BitVecOps::AddElemD(&m_blockTraits, m_provenUnreachableBlocks, bb->bbNum); } -// Debugging output that is too detailed for a normal JIT dump... -#define DEBUG_VN_VISIT 0 - - // Record that "blk" has been visited, and add any unvisited successors of "blk" to the appropriate todo set. - void FinishVisit(BasicBlock* blk) + //------------------------------------------------------------------------ + // IsReachable: Check if a block is reachable. Takes static reachability + // and proven unreachability into account. + // + // Parameters: + // bb - The block. + // + // Returns: + // True if the basic block is potentially reachable. False if the basic + // block is definitely not reachable. + // + bool IsReachable(BasicBlock* bb) { -#ifdef DEBUG_VN_VISIT - JITDUMP("finish(" FMT_BB ").\n", blk->bbNum); -#endif // DEBUG_VN_VISIT - - SetVisitBit(blk->bbNum, BVB_complete); - - blk->VisitAllSuccs(m_comp, [&](BasicBlock* succ) { -#ifdef DEBUG_VN_VISIT - JITDUMP(" Succ(" FMT_BB ").\n", succ->bbNum); -#endif // DEBUG_VN_VISIT - - if (GetVisitBit(succ->bbNum, BVB_complete)) - { - return BasicBlockVisit::Continue; - } -#ifdef DEBUG_VN_VISIT - JITDUMP(" Not yet completed.\n"); -#endif // DEBUG_VN_VISIT - - bool allPredsVisited = true; - bool provenUnreachable = true; - for (FlowEdge* pred = m_comp->BlockPredsWithEH(succ); pred != nullptr; pred = pred->getNextPredEdge()) - { - BasicBlock* predBlock = pred->getSourceBlock(); - if (!GetVisitBit(predBlock->bbNum, BVB_complete)) - { - allPredsVisited = false; - break; - } - - if (provenUnreachable && !GetVisitBit(predBlock->bbNum, BVB_provenUnreachable) && - IsReachableFromPred(predBlock, succ)) - { - provenUnreachable = false; - } - } - - if (allPredsVisited) - { -#ifdef DEBUG_VN_VISIT - JITDUMP(" All preds complete, adding to allDone.\n"); -#endif // DEBUG_VN_VISIT - - // Only last completion of last succ should add to this. - assert(!GetVisitBit(succ->bbNum, BVB_onAllDone)); - m_toDoAllPredsDone.Push(succ); - SetVisitBit(succ->bbNum, BVB_onAllDone); - - if (provenUnreachable) - { - SetVisitBit(succ->bbNum, BVB_provenUnreachable); - } - } - else - { -#ifdef DEBUG_VN_VISIT - JITDUMP(" Not all preds complete Adding to notallDone, if necessary...\n"); -#endif // DEBUG_VN_VISIT - - if (!GetVisitBit(succ->bbNum, BVB_onNotAllDone)) - { -#ifdef DEBUG_VN_VISIT - JITDUMP(" Was necessary.\n"); -#endif // DEBUG_VN_VISIT - m_toDoNotAllPredsDone.Push(succ); - SetVisitBit(succ->bbNum, BVB_onNotAllDone); - } - } - - return BasicBlockVisit::Continue; - }); + return (bb->bbPostorderNum < m_comp->fgSSAPostOrderCount) && + (m_comp->fgSSAPostOrder[bb->bbPostorderNum] == bb) && + !BitVecOps::IsMember(&m_blockTraits, m_provenUnreachableBlocks, bb->bbNum); } - bool IsReachableFromPred(BasicBlock* predBlock, BasicBlock* block) + //------------------------------------------------------------------------ + // IsReachableFromPred: Check if a block is reachability from one of its + // predecessors. + // + // Parameters: + // block - A block + // predBlock - A predecessor of 'block' + // + // Returns: + // False if 'block' is definitely not reachable even though it is a + // direct successor of 'predBlock'. + // + bool IsReachableFromPred(BasicBlock* block, BasicBlock* predBlock) { if (!predBlock->KindIs(BBJ_COND) || predBlock->JumpsToNext()) { @@ -9867,11 +9709,6 @@ struct ValueNumberState BasicBlock* unreachableSucc = isTaken ? predBlock->Next() : predBlock->GetJumpDest(); return block != unreachableSucc; } - - bool ToDoExists() - { - return m_toDoAllPredsDone.Size() > 0 || m_toDoNotAllPredsDone.Size() > 0; - } }; //------------------------------------------------------------------------ @@ -10003,42 +9840,48 @@ PhaseStatus Compiler::fgValueNumber() #endif // DEBUG ValueNumberState vs(this); + vnState = &vs; - vnVisitState = &vs; - - // Push the first block. This has no preds. - vs.m_toDoAllPredsDone.Push(fgFirstBB); - - while (vs.ToDoExists()) + // SSA has already computed a post-order taking EH successors into account. + // Visiting that in reverse will ensure we visit a block's predecessors + // before itself whenever possible. + for (unsigned i = fgSSAPostOrderCount; i != 0; i--) { - while (vs.m_toDoAllPredsDone.Size() > 0) - { - BasicBlock* toDo = vs.m_toDoAllPredsDone.Pop(); - - // TODO-TP: We can skip VN'ing blocks we have proven unreachable - // here. However, currently downstream phases are not prepared to - // handle the fact that some blocks that are seemingly reachable - // have not been VN'd. - fgValueNumberBlock(toDo); + BasicBlock* block = fgSSAPostOrder[i - 1]; + JITDUMP("Visiting " FMT_BB "\n", block->bbNum); - // Record that we've visited "toDo", and add successors to the right sets. - vs.FinishVisit(toDo); - } - // OK, we've run out of blocks whose predecessors are done. Pick one whose predecessors are not all done, - // process that. This may make more "all-done" blocks, so we'll go around the outer loop again -- - // note that this is an "if", not a "while" loop. - if (vs.m_toDoNotAllPredsDone.Size() > 0) + if (block != fgFirstBB) { - BasicBlock* toDo = vs.ChooseFromNotAllPredsDone(); - if (toDo == nullptr) + bool anyPredReachable = false; + for (FlowEdge* pred = BlockPredsWithEH(block); pred != nullptr; pred = pred->getNextPredEdge()) { - continue; // We may have run out, because of completed blocks on the not-all-preds done list. + BasicBlock* predBlock = pred->getSourceBlock(); + if (!vs.IsReachable(predBlock)) + { + JITDUMP(" " FMT_BB " is an unreachable pred\n", predBlock->bbNum); + continue; + } + + if (!vs.IsReachableFromPred(block, predBlock)) + { + JITDUMP(" " FMT_BB " is a reachable pred, but " FMT_BB " is not reachable from it\n", + predBlock->bbNum, block->bbNum); + continue; + } + + JITDUMP(" " FMT_BB " is a reachable pred\n", predBlock->bbNum); + anyPredReachable = true; + break; } - fgValueNumberBlock(toDo); - // Record that we've visited "toDo", and add successors to the right sest. - vs.FinishVisit(toDo); + if (!anyPredReachable) + { + JITDUMP(" " FMT_BB " was proven unreachable\n", block->bbNum); + vs.SetUnreachable(block); + } } + + fgValueNumberBlock(block); } #ifdef DEBUG @@ -10068,9 +9911,9 @@ void Compiler::fgValueNumberBlock(BasicBlock* blk) for (GenTreePhi::Use& use : phiNode->Uses()) { GenTreePhiArg* phiArg = use.GetNode()->AsPhiArg(); - if (vnVisitState->GetVisitBit(phiArg->gtPredBB->bbNum, ValueNumberState::BVB_provenUnreachable)) + if (!vnState->IsReachable(phiArg->gtPredBB)) { - JITDUMP(" Phi arg [%06u] refers to proven unreachable pred " FMT_BB "\n", dspTreeID(phiArg), + JITDUMP(" Phi arg [%06u] refers to unreachable pred " FMT_BB "\n", dspTreeID(phiArg), phiArg->gtPredBB->bbNum); if ((use.GetNext() != nullptr) || (phiVNP.GetLiberal() != ValueNumStore::NoVN)) @@ -10078,7 +9921,8 @@ void Compiler::fgValueNumberBlock(BasicBlock* blk) continue; } - JITDUMP(" ..but it looks like all preds are unreachable, so we are using it anyway\n"); + assert(!vnState->IsReachable(blk)); + JITDUMP(" ..but all preds are unreachable, so we are using it anyway\n"); } ValueNum phiArgSsaNumVN = vnStore->VNForIntCon(phiArg->GetSsaNum());