Skip to content

Commit

Permalink
Constrained mask should take into account busy registers (dotnet#85933)
Browse files Browse the repository at this point in the history
* NO MERGE

* Revert "NO MERGE"

This reverts commit 68657c6.

* Make sure constrained regMask doesn't include busy registers

* add the missing parameter

* fix superpmi failure

* fix the release errors

* review comment

* review comment

* use gtSkipReloadOrCopy()

* update the assert
  • Loading branch information
kunalspathak authored May 16, 2023
1 parent 0cfc81d commit 17bf9b8
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 12 deletions.
21 changes: 20 additions & 1 deletion src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1010,7 +1010,26 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
// the same code as above
else if (op2reg == targetReg)
{
noway_assert(GenTree::OperIsCommutative(oper));

#ifdef DEBUG
unsigned lclNum1 = (unsigned)-1;
unsigned lclNum2 = (unsigned)-2;

GenTree* op1Skip = op1->gtSkipReloadOrCopy();
GenTree* op2Skip = op2->gtSkipReloadOrCopy();

if (op1Skip->OperIsLocalRead())
{
lclNum1 = op1Skip->AsLclVarCommon()->GetLclNum();
}
if (op2Skip->OperIsLocalRead())
{
lclNum2 = op2Skip->AsLclVarCommon()->GetLclNum();
}

assert(GenTree::OperIsCommutative(oper) || (lclNum1 == lclNum2));
#endif

dst = op2;
src = op1;
}
Expand Down
34 changes: 25 additions & 9 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum)
// minRegCount registers, otherwise returns regMaskActual.
//
// Arguments:
// refposition - RefPosition for which we want to constain.
// regMaskActual - regMask that needs to be constrained
// regMaskConstraint - regMask constraint that needs to be
// applied to regMaskActual
Expand All @@ -459,15 +460,30 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum)
// Return Value:
// New regMask that has minRegCount registers after instersection.
// Otherwise returns regMaskActual.
regMaskTP LinearScan::getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstraint, unsigned minRegCount)
regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition,
regMaskTP regMaskActual,
regMaskTP regMaskConstraint,
unsigned minRegCount)
{
regMaskTP newMask = regMaskActual & regMaskConstraint;
if (genCountBits(newMask) >= minRegCount)
if (genCountBits(newMask) < minRegCount)
{
return newMask;
// Constrained mask does not have minimum required registers needed.
return regMaskActual;
}

return regMaskActual;
if ((refPosition != nullptr) && !refPosition->RegOptional())
{
regMaskTP busyRegs = regsBusyUntilKill | regsInUseThisLocation;
if ((newMask & ~busyRegs) == RBM_NONE)
{
// Constrained mask does not have at least one free register to allocate.
// Skip for RegOptional, because its ok to not have registers for them.
return regMaskActual;
}
}

return newMask;
}

//------------------------------------------------------------------------
Expand Down Expand Up @@ -508,32 +524,32 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
case LSRA_LIMIT_CALLEE:
if (!compiler->opts.compDbgEnC)
{
mask = getConstrainedRegMask(mask, RBM_CALLEE_SAVED, minRegCount);
mask = getConstrainedRegMask(refPosition, mask, RBM_CALLEE_SAVED, minRegCount);
}
break;

case LSRA_LIMIT_CALLER:
{
mask = getConstrainedRegMask(mask, RBM_CALLEE_TRASH, minRegCount);
mask = getConstrainedRegMask(refPosition, mask, RBM_CALLEE_TRASH, minRegCount);
}
break;

case LSRA_LIMIT_SMALL_SET:
if ((mask & LsraLimitSmallIntSet) != RBM_NONE)
{
mask = getConstrainedRegMask(mask, LsraLimitSmallIntSet, minRegCount);
mask = getConstrainedRegMask(refPosition, mask, LsraLimitSmallIntSet, minRegCount);
}
else if ((mask & LsraLimitSmallFPSet) != RBM_NONE)
{
mask = getConstrainedRegMask(mask, LsraLimitSmallFPSet, minRegCount);
mask = getConstrainedRegMask(refPosition, mask, LsraLimitSmallFPSet, minRegCount);
}
break;

#if defined(TARGET_AMD64)
case LSRA_LIMIT_UPPER_SIMD_SET:
if ((mask & LsraLimitUpperSimdSet) != RBM_NONE)
{
mask = getConstrainedRegMask(mask, LsraLimitUpperSimdSet, minRegCount);
mask = getConstrainedRegMask(refPosition, mask, LsraLimitUpperSimdSet, minRegCount);
}
break;
#endif
Expand Down
5 changes: 4 additions & 1 deletion src/coreclr/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,10 @@ class LinearScan : public LinearScanInterface
return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK);
}

regMaskTP getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstrain, unsigned minRegCount);
regMaskTP getConstrainedRegMask(RefPosition* refPosition,
regMaskTP regMaskActual,
regMaskTP regMaskConstrain,
unsigned minRegCount);
regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask);

// This controls the heuristics used to select registers
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1883,7 +1883,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc
#endif // TARGET_ARM64
{
newRefPosition->registerAssignment =
getConstrainedRegMask(oldAssignment, calleeSaveMask, minRegCountForRef);
getConstrainedRegMask(newRefPosition, oldAssignment, calleeSaveMask, minRegCountForRef);
}

if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) &&
Expand Down Expand Up @@ -2247,6 +2247,8 @@ void LinearScan::buildIntervals()
RegState* floatRegState = &compiler->codeGen->floatRegState;
intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
regsInUseThisLocation = RBM_NONE;
regsInUseNextLocation = RBM_NONE;

for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
{
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,10 @@ bool LinearScan::isRMWRegOper(GenTree* tree)

// x86/x64 does support a three op multiply when op2|op1 is a contained immediate
case GT_MUL:
#ifdef TARGET_X86
case GT_SUB_HI:
case GT_LSH_HI:
#endif
{
if (varTypeIsFloating(tree->TypeGet()))
{
Expand Down

0 comments on commit 17bf9b8

Please sign in to comment.