Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Constrained mask should take into account busy registers #85933

Merged
merged 12 commits into from
May 16, 2023
31 changes: 30 additions & 1 deletion src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1010,7 +1010,36 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
// the same code as above
else if (op2reg == targetReg)
{
noway_assert(GenTree::OperIsCommutative(oper));
unsigned lclNum1 = 0;
unsigned lclNum2 = 0;

#ifdef DEBUG
if (op1->OperIsLocalRead())
{
lclNum1 = op1->AsLclVarCommon()->GetLclNum();
}
else if (op1->IsCopyOrReload())
{
GenTree* copySrc = op1->AsCopyOrReload()->gtGetOp1();
if (copySrc->OperIsLocalRead())
{
lclNum1 = copySrc->AsLclVarCommon()->GetLclNum();
}
}
if (op2->OperIsLocalRead())
{
lclNum2 = op2->AsLclVarCommon()->GetLclNum();
}
else if (op2->IsCopyOrReload())
{
GenTree* copySrc = op2->AsCopyOrReload()->gtGetOp1();
if (copySrc->OperIsLocalRead())
{
lclNum2 = copySrc->AsLclVarCommon()->GetLclNum();
}
}
#endif
noway_assert(GenTree::OperIsCommutative(oper) || (lclNum1 == lclNum2));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realized this can't be a noway_assert now: it would mean we're asserting different things in Release and Debug: and the case where you're asserting lclNum1 == lclNum2 would fail in Release.

dst = op2;
src = op1;
}
Expand Down
34 changes: 25 additions & 9 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum)
// minRegCount registers, otherwise returns regMaskActual.
//
// Arguments:
// Refposition - Refposition for which we want to constaint.
// regMaskActual - regMask that needs to be constrained
// regMaskConstraint - regMask constraint that needs to be
// applied to regMaskActual
Expand All @@ -459,15 +460,30 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum)
// Return Value:
// New regMask that has minRegCount registers after instersection.
// Otherwise returns regMaskActual.
regMaskTP LinearScan::getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstraint, unsigned minRegCount)
regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition,
regMaskTP regMaskActual,
regMaskTP regMaskConstraint,
unsigned minRegCount)
{
regMaskTP newMask = regMaskActual & regMaskConstraint;
if (genCountBits(newMask) >= minRegCount)
if (genCountBits(newMask) < minRegCount)
{
return newMask;
// Constrained mask does not have minium required registers needed.
return regMaskActual;
}

return regMaskActual;
if ((refPosition != nullptr) && !refPosition->RegOptional())
{
regMaskTP busyRegs = regsBusyUntilKill | regsInUseThisLocation;
if ((newMask & ~busyRegs) == RBM_NONE)
{
// Constrained mask do not have at least one free register to allocate.
// Skip for RegOptional, because its ok to not have registers for them.
return regMaskActual;
}
}

return newMask;
}

//------------------------------------------------------------------------
Expand Down Expand Up @@ -508,32 +524,32 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
case LSRA_LIMIT_CALLEE:
if (!compiler->opts.compDbgEnC)
{
mask = getConstrainedRegMask(mask, RBM_CALLEE_SAVED, minRegCount);
mask = getConstrainedRegMask(refPosition, mask, RBM_CALLEE_SAVED, minRegCount);
}
break;

case LSRA_LIMIT_CALLER:
{
mask = getConstrainedRegMask(mask, RBM_CALLEE_TRASH, minRegCount);
mask = getConstrainedRegMask(refPosition, mask, RBM_CALLEE_TRASH, minRegCount);
}
break;

case LSRA_LIMIT_SMALL_SET:
if ((mask & LsraLimitSmallIntSet) != RBM_NONE)
{
mask = getConstrainedRegMask(mask, LsraLimitSmallIntSet, minRegCount);
mask = getConstrainedRegMask(refPosition, mask, LsraLimitSmallIntSet, minRegCount);
}
else if ((mask & LsraLimitSmallFPSet) != RBM_NONE)
{
mask = getConstrainedRegMask(mask, LsraLimitSmallFPSet, minRegCount);
mask = getConstrainedRegMask(refPosition, mask, LsraLimitSmallFPSet, minRegCount);
}
break;

#if defined(TARGET_AMD64)
case LSRA_LIMIT_UPPER_SIMD_SET:
if ((mask & LsraLimitUpperSimdSet) != RBM_NONE)
{
mask = getConstrainedRegMask(mask, LsraLimitUpperSimdSet, minRegCount);
mask = getConstrainedRegMask(refPosition, mask, LsraLimitUpperSimdSet, minRegCount);
}
break;
#endif
Expand Down
5 changes: 4 additions & 1 deletion src/coreclr/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,10 @@ class LinearScan : public LinearScanInterface
return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK);
}

regMaskTP getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstrain, unsigned minRegCount);
regMaskTP getConstrainedRegMask(RefPosition* refPosition,
regMaskTP regMaskActual,
regMaskTP regMaskConstrain,
unsigned minRegCount);
regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask);

// This controls the heuristics used to select registers
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1883,7 +1883,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc
#endif // TARGET_ARM64
{
newRefPosition->registerAssignment =
getConstrainedRegMask(oldAssignment, calleeSaveMask, minRegCountForRef);
getConstrainedRegMask(newRefPosition, oldAssignment, calleeSaveMask, minRegCountForRef);
}

if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) &&
Expand Down Expand Up @@ -2243,6 +2243,8 @@ void LinearScan::buildIntervals()
RegState* floatRegState = &compiler->codeGen->floatRegState;
intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
regsInUseThisLocation = RBM_NONE;
regsInUseNextLocation = RBM_NONE;

for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
{
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,10 @@ bool LinearScan::isRMWRegOper(GenTree* tree)

// x86/x64 does support a three op multiply when op2|op1 is a contained immediate
case GT_MUL:
#ifdef TARGET_X86
case GT_SUB_HI:
case GT_LSH_HI:
#endif
{
if (varTypeIsFloating(tree->TypeGet()))
{
Expand Down