Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arm64: Use csel and ccmp for conditional moves #67894

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4229,6 +4229,18 @@ GenTree* Compiler::optAssertionPropLocal_RelOp(ASSERT_VALARG_TP assertions, GenT
return optAssertionProp_Update(op2, tree, stmt);
}

/*****************************************************************************
*
* Given a tree ConditionalOp, check using standard morph function.
*/

GenTree* Compiler::optAssertionProp_ConditionalOp(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt)
{
assert(tree->OperIsConditional());
GenTree* newTree = fgMorphTree(tree);
return optAssertionProp_Update(newTree, tree, stmt);
}

//------------------------------------------------------------------------
// optAssertionProp_Cast: Propagate assertion for a cast, possibly removing it.
//
Expand Down Expand Up @@ -4912,6 +4924,15 @@ GenTree* Compiler::optAssertionProp(ASSERT_VALARG_TP assertions, GenTree* tree,
}
return nullptr;

case GT_SELECT:
case GT_CEQ:
case GT_CNE:
case GT_CLT:
case GT_CLE:
case GT_CGE:
case GT_CGT:
return optAssertionProp_ConditionalOp(assertions, tree, stmt);

default:
return nullptr;
}
Expand Down Expand Up @@ -5813,7 +5834,8 @@ GenTree* Compiler::optVNConstantPropOnJTrue(BasicBlock* block, GenTree* test)
Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Statement* stmt, GenTree* tree)
{
// Don't perform const prop on expressions marked with GTF_DONT_CSE
if (!tree->CanCSE())

if (!tree->CanCSE() && !(tree->gtFlags & GTF_DO_CONST_PROP))
{
return WALK_CONTINUE;
}
Expand Down Expand Up @@ -5849,6 +5871,13 @@ Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Sta
case GT_NEG:
case GT_CAST:
case GT_INTRINSIC:
case GT_SELECT:
case GT_CEQ:
case GT_CNE:
case GT_CLT:
case GT_CLE:
case GT_CGE:
case GT_CGT:
break;

case GT_JTRUE:
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1042,6 +1042,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void genIntToFloatCast(GenTree* treeNode);
void genCkfinite(GenTree* treeNode);
void genCodeForCompare(GenTreeOp* tree);
#ifdef TARGET_ARM64
void genCodeForConditional(GenTreeConditional* tree);
#endif
void genIntrinsic(GenTree* treeNode);
void genPutArgStk(GenTreePutArgStk* treeNode);
void genPutArgReg(GenTreeOp* tree);
Expand Down Expand Up @@ -1704,6 +1707,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue);
#endif // TARGET_XARCH

#ifdef TARGET_ARM64
static insCond InsCondForCompareOp(GenTree* tree);
static insCond InvertInsCond(insCond cond);
static insCflags InsCflagsForInsCond(insCond cond);
#endif

#ifndef TARGET_LOONGARCH64
// Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions
// such as X86's SETcc. A sequence of instructions rather than just a single one is required for
Expand Down
166 changes: 166 additions & 0 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4403,6 +4403,60 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree)
}
}

//------------------------------------------------------------------------
// genCodeForCompare: Produce code for a GT_CEQ/GT_CNE node.
//
// Arguments:
// tree - the node
//
void CodeGen::genCodeForConditional(GenTreeConditional* tree)
{
emitter* emit = GetEmitter();

GenTree* opcond = tree->gtCond;
GenTree* op1 = tree->gtOp1;
GenTree* op2 = tree->gtOp2;
var_types op1Type = genActualType(op1->TypeGet());
var_types op2Type = genActualType(op2->TypeGet());
emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type));
insCond cond = InsCondForCompareOp(opcond);

assert(!op1->isUsedFromMemory());
assert(genTypeSize(op1Type) == genTypeSize(op2Type));

regNumber targetReg = tree->GetRegNum();
regNumber srcReg1 = genConsumeReg(op1);

if (tree->OperIs(GT_SELECT))
{
regNumber srcReg2 = genConsumeReg(op2);
emit->emitIns_R_R_R_COND(INS_csel, cmpSize, targetReg, srcReg1, srcReg2, cond);
regSet.verifyRegUsed(targetReg);
}
else
{
assert(!varTypeIsFloating(op2Type));
// We don't support swapping op1 and op2 to generate cmp reg, imm.
assert(!op1->isContainedIntOrIImmed());
// This should not be generating into a register.
assert(targetReg == REG_NA);

// For the ccmp flags, take the condition of the compare, invert it, then turn into cflags.
insCflags cflags = InsCflagsForInsCond(InvertInsCond(InsCondForCompareOp(tree)));

if (op2->isContainedIntOrIImmed())
{
GenTreeIntConCommon* intConst = op2->AsIntConCommon();
emit->emitIns_R_I_FLAGS_COND(INS_ccmp, cmpSize, srcReg1, (int)intConst->IconValue(), cflags, cond);
}
else
{
regNumber srcReg2 = genConsumeReg(op2);
emit->emitIns_R_R_FLAGS_COND(INS_ccmp, cmpSize, srcReg1, srcReg2, cflags, cond);
}
}
}

//------------------------------------------------------------------------
// genCodeForJumpCompare: Generates code for jmpCompare statement.
//
Expand Down Expand Up @@ -10373,4 +10427,116 @@ void CodeGen::genCodeForCond(GenTreeOp* tree)
genProduceReg(tree);
}

//------------------------------------------------------------------------
// InsCondForCompareOp: Map the condition in a Compare/Conditional op to a insCond.
//
// Arguments:
// tree - the node
//
insCond CodeGen::InsCondForCompareOp(GenTree* tree)
{
assert(tree->OperIsCompare() || tree->OperIsConditionalCompare());

if (tree->OperIsCompare())
{
switch (tree->AsOp()->OperGet())
{
case GT_EQ:
case GT_TEST_EQ:
return INS_COND_EQ;
case GT_NE:
case GT_TEST_NE:
return INS_COND_NE;
case GT_GE:
return INS_COND_GE;
case GT_GT:
return INS_COND_GT;
case GT_LT:
return INS_COND_LT;
case GT_LE:
return INS_COND_LE;
default:
assert(false && "Invalid condition");
return INS_COND_EQ;
}
}
else
{
switch (tree->AsConditional()->OperGet())
{
case GT_CEQ:
return INS_COND_EQ;
case GT_CNE:
return INS_COND_NE;
case GT_CGE:
return INS_COND_GE;
case GT_CGT:
return INS_COND_GT;
case GT_CLT:
return INS_COND_LT;
case GT_CLE:
return INS_COND_LE;
default:
assert(false && "Invalid condition");
return INS_COND_EQ;
}
}
}

//------------------------------------------------------------------------
// InvertInsCond: Invert an insCond
//
// Arguments:
// cond - the insCond.
//
insCond CodeGen::InvertInsCond(insCond cond)
{
switch (cond)
{
case INS_COND_EQ:
return INS_COND_NE;
case INS_COND_NE:
return INS_COND_EQ;
case INS_COND_GE:
return INS_COND_LT;
case INS_COND_GT:
return INS_COND_LE;
case INS_COND_LT:
return INS_COND_GE;
case INS_COND_LE:
return INS_COND_GT;
default:
assert(false && "Invalid condition");
return INS_COND_EQ;
}
}

//------------------------------------------------------------------------
// InsCflagsForInsCond: Get the Cflags set by a successful insCond compare.
//
// Arguments:
// cond - the insCond.
//
insCflags CodeGen::InsCflagsForInsCond(insCond cond)
{
switch (cond)
{
case INS_COND_EQ:
return INS_FLAGS_Z;
case INS_COND_NE:
return INS_FLAGS_NONE;
case INS_COND_GE:
return INS_FLAGS_Z;
case INS_COND_GT:
return INS_FLAGS_NONE;
case INS_COND_LT:
return INS_FLAGS_NC;
case INS_COND_LE:
return INS_FLAGS_NZC;
default:
assert(false && "Invalid condition");
return INS_FLAGS_NONE;
}
}

#endif // TARGET_ARM64
12 changes: 12 additions & 0 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,18 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
genCodeForCompare(treeNode->AsOp());
break;

#ifdef TARGET_ARM64
case GT_SELECT:
case GT_CEQ:
case GT_CNE:
case GT_CLT:
case GT_CLE:
case GT_CGE:
case GT_CGT:
genCodeForConditional(treeNode->AsConditional());
break;
#endif

case GT_JTRUE:
genCodeForJumpTrue(treeNode->AsOp());
break;
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2628,7 +2628,16 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
assert(jtrue->OperIs(GT_JTRUE));

GenTreeOp* relop = jtrue->gtGetOp1()->AsOp();
GenTreeOp* relop;
if (jtrue->gtGetOp1()->OperIsCompare())
{
relop = jtrue->gtGetOp1()->AsOp();
}
else
{
assert(jtrue->gtGetOp1()->OperIsConditionalCompare());
relop = jtrue->gtGetOp1()->AsConditional();
}
GenCondition condition = GenCondition::FromRelop(relop);

if (condition.PreferSwap())
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4821,6 +4821,10 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
//
DoPhase(this, PHASE_UNROLL_LOOPS, &Compiler::optUnrollLoops);

// If conversion
//
DoPhase(this, PHASE_IF_CONVERSION, &Compiler::optIfConversion);
Comment on lines +4824 to +4826
Copy link
Contributor

@SingleAccretion SingleAccretion May 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am curious why you chose this rather early stage for this.

On the face of it, doing this early has the significant disadvantage of decanonicalizing the IR w.r.t. relops.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs doing after loop information is available. The next phase is PHASE_CLEAR_LOOP_INFO, so I wanted to go before that.
The phase before is loop unrolling. If conversion is only modifying code outside of loops (for now), so it made sense then to do this after loop unrolling to maximise the number of changes.
As far as I'm aware, other compilers also do if conversion fairly early too.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The next phase is PHASE_CLEAR_LOOP_INFO, so I wanted to go before that.

Note that PHASE_CLEAR_LOOP_INFO doesn't actually clear much, only information used by cloning.

I see you are using the "inside a loop" information as a performance heuristic currently, that should be in a "good enough" shape for that even after the optimizer. We're using the loop table for a similar purpose much later for loop alignment.

Ultimately though the question is if there are actual problems because of this, i. e. if there are any regressions in the diffs. I suppose there aren't any?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that PHASE_CLEAR_LOOP_INFO doesn't actually clear much, only information used by cloning.

ah, ok, maybe it can move down a bit then.
A quick scan through the passes, I'm thinking this should be done before loop hoisting too.

if there are any regressions in the diffs. I suppose there aren't any?

I'm not certain either way yet. Certainly, the tests I've tried look good.
I've got three asserts left from "superpmi.py replay" to fix up (there's a comment elsewhere on here about that). Next step after that would be to do a full test build and run.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, ok, maybe it can move down a bit then.

Note my thinking with this is to avoid adding all the front-end support required (you've hit this problem with constant folding).

A quick scan through the passes, I'm thinking this should be done before loop hoisting too.

Well, it is currently done "before hoisting". It does not make sense to interject it between SSA and the optimizer phases I think (since it alters the flow graph). I was hoping we could slot it just after the optimizer (whether it'd be before lowering or after range check does not matter much I think, though making it LIR-only will make some things (morph support) unnecessary).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After a full test run, of 2000+ tests, I get 11 failures, of which there are 3 unique failures: 1) The constant folding issue I already know about 2) a "pThread" assert, which I get without my patch (will ignore for now) and 3) a segfault - I'll debug this.

Question still remains if this is right place to have the phase. Looking at other compilers:
*LLVM if converts very early, and optimises every occurrence to select nodes. This is because removing the if then branches makes later phases, especially vectorisation, much easier. Then at the end of the compilation, it restores back to if/then branches for certain cases.
*GCC if converts loops early to allow vectorisation. Scalar code is if converted at the end of compilation
*OpenJDK if converts early. It does everything outside of loops and for inside loops uses historical branch taken / not taken counts.

Back to dotnet...
My original patch added if conversion after lowering but the ability to reshape the graph was limited and didn't work for multiple conditions. Agreed with by @EgorBo too.
As a test, I tried pushing if conversion down to just before lowering, and this seems to work in theory - I then get later some later errors, just requires some debugging.


// Clear loop table info that is not used after this point, and might become invalid.
//
DoPhase(this, PHASE_CLEAR_LOOP_INFO, &Compiler::optClearLoopIterInfo);
Expand Down
40 changes: 39 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -2349,6 +2349,9 @@ class Compiler
FieldSeqNode* fieldSeq,
var_types type = TYP_I_IMPL);

GenTreeConditional* gtNewConditionalNode(
genTreeOps oper, GenTree* cond, GenTree* op1, GenTree* op2, var_types type);

#ifdef FEATURE_SIMD
GenTreeSIMD* gtNewSIMDNode(
var_types type, GenTree* op1, SIMDIntrinsicID simdIntrinsicID, CorInfoType simdBaseJitType, unsigned simdSize);
Expand Down Expand Up @@ -2781,6 +2784,7 @@ class Compiler
GenTree* gtFoldExprSpecial(GenTree* tree);
GenTree* gtFoldBoxNullable(GenTree* tree);
GenTree* gtFoldExprCompare(GenTree* tree);
GenTree* gtFoldExprConditional(GenTree* tree);
GenTree* gtCreateHandleCompare(genTreeOps oper,
GenTree* op1,
GenTree* op2,
Expand Down Expand Up @@ -6045,7 +6049,8 @@ class Compiler
PhaseStatus optCloneLoops();
void optCloneLoop(unsigned loopInd, LoopCloneContext* context);
void optEnsureUniqueHead(unsigned loopInd, weight_t ambientWeight);
PhaseStatus optUnrollLoops(); // Unrolls loops (needs to have cost info)
PhaseStatus optUnrollLoops(); // Unrolls loops (needs to have cost info)
PhaseStatus optIfConversion(); // If conversion
void optRemoveRedundantZeroInits();

protected:
Expand Down Expand Up @@ -6427,6 +6432,10 @@ class Compiler

bool optInvertWhileLoop(BasicBlock* block);

bool optIfConvert(BasicBlock* block);
bool optIfConvertSelect(GenTree* original_condition, GenTree* asg_node);
bool optIfConvertCCmp(GenTree* original_condition, GenTree* asg_node);

private:
static bool optIterSmallOverflow(int iterAtExit, var_types incrType);
static bool optIterSmallUnderflow(int iterAtExit, var_types decrType);
Expand Down Expand Up @@ -7299,6 +7308,7 @@ class Compiler
GenTree* optAssertionProp_Cast(ASSERT_VALARG_TP assertions, GenTreeCast* cast, Statement* stmt);
GenTree* optAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCall* call, Statement* stmt);
GenTree* optAssertionProp_RelOp(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt);
GenTree* optAssertionProp_ConditionalOp(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt);
GenTree* optAssertionProp_Comma(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt);
GenTree* optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt);
GenTree* optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt);
Expand Down Expand Up @@ -11010,6 +11020,34 @@ class GenTreeVisitor
break;
#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS)

case GT_SELECT:
case GT_CEQ:
case GT_CNE:
case GT_CLT:
case GT_CLE:
case GT_CGE:
case GT_CGT:
{
GenTreeConditional* const conditional = node->AsConditional();

result = WalkTree(&conditional->gtCond, conditional);
if (result == fgWalkResult::WALK_ABORT)
{
return result;
}
result = WalkTree(&conditional->gtOp1, conditional);
if (result == fgWalkResult::WALK_ABORT)
{
return result;
}
result = WalkTree(&conditional->gtOp2, conditional);
if (result == fgWalkResult::WALK_ABORT)
{
return result;
}
break;
}

// Binary nodes
default:
{
Expand Down
Loading