Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT ARM64/SVE2: Add increment/decrement from predicate register instructions #94811

Merged
merged 28 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
6b9db26
Add incp/decp
amanasifkhalid Nov 15, 2023
f96fa21
Add saturating inc/dec instructions
amanasifkhalid Nov 15, 2023
91fc564
Add SVE_DO_2A format
amanasifkhalid Nov 15, 2023
fbfdc9b
Fix build
amanasifkhalid Nov 15, 2023
83bcb09
Merge from main; address feedback
amanasifkhalid Nov 22, 2023
13d8fb6
Merge branch 'main' into arm64-sve2
amanasifkhalid Nov 27, 2023
52f5a50
Fix unit tests
amanasifkhalid Nov 27, 2023
d000a17
Update PerfScores
amanasifkhalid Nov 27, 2023
36becce
Fix merge conflict
amanasifkhalid Dec 5, 2023
ebc6141
Fix encodings
amanasifkhalid Dec 11, 2023
158c71d
Clean up IF_SVE_DO_2A
amanasifkhalid Dec 11, 2023
46f2b67
Add comment
amanasifkhalid Dec 11, 2023
a8e634b
Merge from main
amanasifkhalid Dec 11, 2023
4fe7266
Style; comment ALL_ARM64_EMITTER_UNIT_TESTS
amanasifkhalid Dec 11, 2023
90c5bbe
Revert comment
amanasifkhalid Dec 11, 2023
3c3148a
Delete unused variables
amanasifkhalid Dec 11, 2023
b72b908
Formatting
amanasifkhalid Dec 11, 2023
953bb55
Add missing asserts
amanasifkhalid Dec 12, 2023
c15420a
Remove debug code
amanasifkhalid Dec 12, 2023
59ae6e2
Merge branch 'main' into arm64-sve2
amanasifkhalid Dec 12, 2023
9e14baa
Fix build
amanasifkhalid Dec 12, 2023
059b027
Merge
amanasifkhalid Dec 15, 2023
3361154
Add emitDispInsHelp comments
amanasifkhalid Dec 17, 2023
02c03ed
Merge from main; update emitDispPredicateReg to use PREDICATE_SIZED
amanasifkhalid Dec 18, 2023
538f524
Use INS_OPTS_SCALABLE_B_WITH_SCALAR
amanasifkhalid Dec 18, 2023
51d0b1c
Remove old code
amanasifkhalid Dec 18, 2023
0744ee9
Merge from main
amanasifkhalid Dec 18, 2023
dbe6add
Style
amanasifkhalid Dec 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6362,6 +6362,31 @@ void CodeGen::genArm64EmitterUnitTests()
theEmitter->emitIns_R_R(INS_rev, EA_4BYTE, REG_R10, REG_R5);
theEmitter->emitIns_R_R(INS_rev16, EA_4BYTE, REG_R11, REG_R6);

// TODO-SVE: Fix once we add predicate registers
// decp
theEmitter->emitIns_R_R(INS_sve_decp, EA_8BYTE, REG_R0, REG_R1, INS_OPTS_8B); // DECP <Xdn>, <Pm>.<T>
theEmitter->emitIns_R_R(INS_sve_decp, EA_8BYTE, REG_R0, REG_R1, INS_OPTS_8B); // DECP <Zdn>.<T>, <Pm>.<T>

// incp
theEmitter->emitIns_R_R(INS_sve_incp, EA_8BYTE, REG_R0, REG_R1, INS_OPTS_8B); // INCP <Xdn>, <Pm>.<T>
theEmitter->emitIns_R_R(INS_sve_incp, EA_8BYTE, REG_R0, REG_R1, INS_OPTS_8B); // INCP <Zdn>.<T>, <Pm>.<T>

// sqdecp
theEmitter->emitIns_R_R(INS_sve_sqdecp, EA_8BYTE, REG_R0, REG_R1, INS_OPTS_8B); // SQDECP <Xdn>, <Pm>.<T>, <Wdn>
theEmitter->emitIns_R_R(INS_sve_sqdecp, EA_8BYTE, REG_R0, REG_R1, INS_OPTS_8B); // SQDECP <Zdn>.<T>, <Pm>.<T>

// sqincp
theEmitter->emitIns_R_R(INS_sve_sqincp, EA_8BYTE, REG_R0, REG_R1, INS_OPTS_8B); // SQINCP <Xdn>, <Pm>.<T>, <Wdn>
theEmitter->emitIns_R_R(INS_sve_sqincp, EA_8BYTE, REG_R0, REG_R1, INS_OPTS_8B); // SQINCP <Zdn>.<T>, <Pm>.<T>

// uqdecp
theEmitter->emitIns_R_R(INS_sve_uqdecp, EA_8BYTE, REG_R0, REG_R1, INS_OPTS_8B); // UQDECP <Wdn>, <Pm>.<T>
theEmitter->emitIns_R_R(INS_sve_uqdecp, EA_8BYTE, REG_R0, REG_R1, INS_OPTS_8B); // UQDECP <Zdn>.<T>, <Pm>.<T>

// uqincp
theEmitter->emitIns_R_R(INS_sve_uqincp, EA_8BYTE, REG_R0, REG_R1, INS_OPTS_8B); // UQINCP <Wdn>, <Pm>.<T>
theEmitter->emitIns_R_R(INS_sve_uqincp, EA_8BYTE, REG_R0, REG_R1, INS_OPTS_8B); // UQINCP <Zdn>.<T>, <Pm>.<T>

#endif // ALL_ARM64_EMITTER_UNIT_TESTS

#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
Expand Down
103 changes: 103 additions & 0 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,27 @@ void emitter::emitInsSanityCheck(instrDesc* id)
assert(datasize == EA_8BYTE);
break;

case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
assert(isValidVectorDatasize(id->idOpSize())); // X

FALLTHROUGH;
case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
elemsize = id->idOpSize();
assert(insOptsNone(id->idInsOpt()));
assert(isGeneralRegister(id->idReg1())); // ddddd
assert(isPredicateRegister(id->idReg2())); // MMMM
assert(isValidVectorElemsize(elemsize)); // xx
break;

case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count
case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should these forms be merged?

(Or should we wait to decide on merging until after many/all instructions have been implemented?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm ok with either doing it now or waiting until after we've implemented all of them. @kunalspathak @TIHan do you have any preference for this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

waiting until after we've implemented all of them

Will be good to wait so we can track the coverage of them as listed in #94549. We can then have one pass to combine all the duplicate ones.

elemsize = id->idOpSize();
assert(insOptsNone(id->idInsOpt()));
assert(isVectorRegister(id->idReg1())); // ddddd
assert(isPredicateRegister(id->idReg2())); // MMMM
assert(isValidVectorElemsize(elemsize)); // xx
break;

default:
printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
assert(!"Unexpected format");
Expand Down Expand Up @@ -989,6 +1010,8 @@ bool emitter::emitInsMayWriteToGCReg(instrDesc* id)
case IF_DV_2B: // DV_2B .Q.........iiiii ......nnnnnddddd Rd Vn[] (umov - to general)
case IF_DV_2H: // DV_2H X........X...... ......nnnnnddddd Rd Vn (fmov - to general)

case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why might these two groups write to GC Reg? (It's possible I've been missing some from my PRs)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah sorry, didn't mean to push that in the first place. I don't think we should worry about touching emitInsMayWriteToGCReg for now. Once we start emitting these SVE instructions for actual codegen, we might need to come back to this method.

case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
return true;

case IF_DV_2C: // DV_2C .Q.........iiiii ......nnnnnddddd Vd Rn (dup/ins - vector from general)
Expand Down Expand Up @@ -1021,6 +1044,9 @@ bool emitter::emitInsMayWriteToGCReg(instrDesc* id)
case IF_DV_3F: // DV_3F .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
case IF_DV_3G: // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector)
case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar)

case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count
case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count
// Tracked GC pointers cannot be placed into the SIMD registers.
return false;

Expand Down Expand Up @@ -10571,6 +10597,20 @@ void emitter::emitIns_Call(EmitCallType callType,
return ureg << 10;
}

/*****************************************************************************
*
* Returns an encoding for the specified register used in the 'Pm' position
*/

/*static*/ emitter::code_t emitter::insEncodeReg_Pm(regNumber reg)
{
// TODO-SVE: Fix once we add predicate registers
assert(emitter::isPredicateRegister(reg));
emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
assert((ureg >= 0) && (ureg <= 15));
return ureg << 16;
}

/*****************************************************************************
*
* Returns an encoding for the specified condition code.
Expand Down Expand Up @@ -13311,6 +13351,35 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
code = emitInsCodeSve(ins, fmt);
elemsize = id->idOpSize();
code |= insEncodeReg_Rd(id->idReg1()); // ddddd
code |= insEncodeReg_Pm(id->idReg2()); // MMMM
code |= insEncodeElemsize(elemsize); // xx
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count
case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count
code = emitInsCodeSve(ins, fmt);
elemsize = id->idOpSize();
code |= insEncodeReg_Vd(id->idReg1()); // ddddd
code |= insEncodeReg_Pm(id->idReg2()); // MMMM
code |= insEncodeElemsize(elemsize); // xx
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
code = emitInsCodeSve(ins, fmt);
elemsize = id->idOpSize();
code |= insEncodeReg_Rd(id->idReg1()); // ddddd
code |= insEncodeReg_Pm(id->idReg2()); // MMMM
code |= insEncodeDatasizeVLS(code, id->idOpSize()); // X
code |= insEncodeElemsize(elemsize); // xx
dst += emitOutput_Instr(dst, code);
break;

default:
assert(!"Unexpected format");
break;
Expand Down Expand Up @@ -13849,6 +13918,20 @@ void emitter::emitDispVectorElemList(
}
}

//------------------------------------------------------------------------
// emitDispPredicateReg: Display a predicate register name with with an arrangement suffix
//
void emitter::emitDispPredicateReg(regNumber reg, insOpts opt, bool addComma)
{
// TODO-SVE: Fix once we add predicate registers
assert(isPredicateRegister(reg));
printf(emitVectorRegName(reg));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like we need a emitPredicateRegName than handles the "fake" predicate registers but still outputs p0 through p15

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kunal has a temporary implementation for this in #95016. As an aside, there seems to be potential for circular dependencies between these two PRs. Should I wait for Kunal's to be merged in first? I can then update my implementation with the new methods #95016 adds in (like emitPredicateRegName) and also update the predicate register TODOs to use the aliases you suggested above.

emitDispArrangement(opt);

if (addComma)
emitDispComma();
}

//------------------------------------------------------------------------
// emitDispArrangement: Display a SIMD vector arrangement suffix
//
Expand Down Expand Up @@ -15444,6 +15527,18 @@ void emitter::emitDispInsHelp(
}
break;

case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
emitDispReg(id->idReg1(), size, true); // ddddd
emitDispPredicateReg(id->idReg2(), id->idInsOpt(), true); // MMMM
break;

case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count
case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count
emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); // ddddd
emitDispPredicateReg(id->idReg2(), id->idInsOpt(), true); // MMMM
break;

default:
printf("unexpected format %s", emitIfName(id->idInsFmt()));
assert(!"unexpectedFormat");
Expand Down Expand Up @@ -17629,6 +17724,14 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
}
break;

case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count
case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency = PERFSCORE_LATENCY_1C;
break;

default:
// all other instructions
perfScoreUnhandledInstruction(id, &result);
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ void emitDispVectorReg(regNumber reg, insOpts opt, bool addComma);
void emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma);
void emitDispVectorRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma);
void emitDispVectorElemList(regNumber firstReg, unsigned listSize, emitAttr elemsize, unsigned index, bool addComma);
void emitDispPredicateReg(regNumber reg, insOpts opt, bool addComma);
void emitDispArrangement(insOpts opt);
void emitDispElemsize(emitAttr elemsize);
void emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr);
Expand Down Expand Up @@ -312,6 +313,9 @@ static code_t insEncodeReg_Vm(regNumber reg);
// Returns an encoding for the specified register used in the 'Va' position
static code_t insEncodeReg_Va(regNumber reg);

// Returns an encoding for the specified register used in the 'Pm' position
static code_t insEncodeReg_Pm(regNumber reg);

// Returns an encoding for the imm which represents the condition code.
static code_t insEncodeCond(insCond cond);

Expand Down Expand Up @@ -664,6 +668,12 @@ inline static bool isFloatReg(regNumber reg)
return isVectorRegister(reg);
}

inline static bool isPredicateRegister(regNumber reg)
{
// TODO-SVE: Fix once we add predicate registers
return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could use the new REG_P0 to REG_P15. And/or define REG_P_FIRST and REG_P_LAST?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is done in #95016

}

inline static bool insOptsNone(insOpts opt)
{
return (opt == INS_OPTS_NONE);
Expand Down