diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 33259628755df1..7c0856775a7b6e 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5120,6 +5120,138 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_I(INS_sve_uqrshrn, EA_SCALABLE, REG_V15, REG_V12, 1, INS_OPTS_SCALABLE_H); // UQRSHRN .H, {.S-.S }, # + // IF_SVE_DM_2A + theEmitter->emitIns_R_R(INS_sve_decp, EA_8BYTE, REG_R0, REG_P0, + INS_OPTS_SCALABLE_B_WITH_SCALAR); // DECP , . + theEmitter->emitIns_R_R(INS_sve_decp, EA_8BYTE, REG_R1, REG_P1, + INS_OPTS_SCALABLE_H_WITH_SCALAR); // DECP , . + theEmitter->emitIns_R_R(INS_sve_decp, EA_8BYTE, REG_R2, REG_P2, + INS_OPTS_SCALABLE_S_WITH_SCALAR); // DECP , . + theEmitter->emitIns_R_R(INS_sve_decp, EA_8BYTE, REG_R3, REG_P3, + INS_OPTS_SCALABLE_D_WITH_SCALAR); // DECP , . + + theEmitter->emitIns_R_R(INS_sve_incp, EA_8BYTE, REG_R4, REG_P4, + INS_OPTS_SCALABLE_B_WITH_SCALAR); // INCP , . + theEmitter->emitIns_R_R(INS_sve_incp, EA_8BYTE, REG_R5, REG_P5, + INS_OPTS_SCALABLE_H_WITH_SCALAR); // INCP , . + theEmitter->emitIns_R_R(INS_sve_incp, EA_8BYTE, REG_R6, REG_P6, + INS_OPTS_SCALABLE_S_WITH_SCALAR); // INCP , . + theEmitter->emitIns_R_R(INS_sve_incp, EA_8BYTE, REG_R7, REG_P7, + INS_OPTS_SCALABLE_D_WITH_SCALAR); // INCP , . + + // IF_SVE_DN_2A + // Note: B is reserved + theEmitter->emitIns_R_R(INS_sve_decp, EA_SCALABLE, REG_V0, REG_P0, INS_OPTS_SCALABLE_H); // DECP ., . + theEmitter->emitIns_R_R(INS_sve_decp, EA_SCALABLE, REG_V1, REG_P1, INS_OPTS_SCALABLE_S); // DECP ., . + theEmitter->emitIns_R_R(INS_sve_decp, EA_SCALABLE, REG_V2, REG_P2, INS_OPTS_SCALABLE_D); // DECP ., . + + theEmitter->emitIns_R_R(INS_sve_incp, EA_SCALABLE, REG_V3, REG_P3, INS_OPTS_SCALABLE_H); // INCP ., . + theEmitter->emitIns_R_R(INS_sve_incp, EA_SCALABLE, REG_V4, REG_P4, INS_OPTS_SCALABLE_S); // INCP ., . + theEmitter->emitIns_R_R(INS_sve_incp, EA_SCALABLE, REG_V5, REG_P5, INS_OPTS_SCALABLE_D); // INCP ., . + + // IF_SVE_DO_2A + theEmitter->emitIns_R_R(INS_sve_sqdecp, EA_4BYTE, REG_R0, REG_P0, + INS_OPTS_SCALABLE_B_WITH_SCALAR); // SQDECP , ., + theEmitter->emitIns_R_R(INS_sve_sqdecp, EA_4BYTE, REG_R1, REG_P1, + INS_OPTS_SCALABLE_H_WITH_SCALAR); // SQDECP , ., + theEmitter->emitIns_R_R(INS_sve_sqdecp, EA_4BYTE, REG_R2, REG_P2, + INS_OPTS_SCALABLE_S_WITH_SCALAR); // SQDECP , ., + theEmitter->emitIns_R_R(INS_sve_sqdecp, EA_4BYTE, REG_R3, REG_P3, + INS_OPTS_SCALABLE_D_WITH_SCALAR); // SQDECP , ., + + theEmitter->emitIns_R_R(INS_sve_sqdecp, EA_8BYTE, REG_R4, REG_P4, + INS_OPTS_SCALABLE_B_WITH_SCALAR); // SQDECP , . + theEmitter->emitIns_R_R(INS_sve_sqdecp, EA_8BYTE, REG_R5, REG_P5, + INS_OPTS_SCALABLE_H_WITH_SCALAR); // SQDECP , . + theEmitter->emitIns_R_R(INS_sve_sqdecp, EA_8BYTE, REG_R6, REG_P6, + INS_OPTS_SCALABLE_S_WITH_SCALAR); // SQDECP , . + theEmitter->emitIns_R_R(INS_sve_sqdecp, EA_8BYTE, REG_R7, REG_P7, + INS_OPTS_SCALABLE_D_WITH_SCALAR); // SQDECP , . + + theEmitter->emitIns_R_R(INS_sve_sqincp, EA_4BYTE, REG_R0, REG_P0, + INS_OPTS_SCALABLE_H_WITH_SCALAR); // SQINCP , ., + theEmitter->emitIns_R_R(INS_sve_sqincp, EA_4BYTE, REG_R1, REG_P1, + INS_OPTS_SCALABLE_S_WITH_SCALAR); // SQINCP , ., + theEmitter->emitIns_R_R(INS_sve_sqincp, EA_4BYTE, REG_R2, REG_P2, + INS_OPTS_SCALABLE_B_WITH_SCALAR); // SQINCP , ., + theEmitter->emitIns_R_R(INS_sve_sqincp, EA_4BYTE, REG_R3, REG_P3, + INS_OPTS_SCALABLE_D_WITH_SCALAR); // SQINCP , ., + + theEmitter->emitIns_R_R(INS_sve_sqincp, EA_8BYTE, REG_R4, REG_P4, + INS_OPTS_SCALABLE_B_WITH_SCALAR); // SQINCP , . + theEmitter->emitIns_R_R(INS_sve_sqincp, EA_8BYTE, REG_R5, REG_P5, + INS_OPTS_SCALABLE_H_WITH_SCALAR); // SQINCP , . + theEmitter->emitIns_R_R(INS_sve_sqincp, EA_8BYTE, REG_R6, REG_P6, + INS_OPTS_SCALABLE_S_WITH_SCALAR); // SQINCP , . + theEmitter->emitIns_R_R(INS_sve_sqincp, EA_8BYTE, REG_R7, REG_P7, + INS_OPTS_SCALABLE_D_WITH_SCALAR); // SQINCP , . + + theEmitter->emitIns_R_R(INS_sve_uqdecp, EA_4BYTE, REG_R0, REG_P0, + INS_OPTS_SCALABLE_B_WITH_SCALAR); // UQDECP , . + theEmitter->emitIns_R_R(INS_sve_uqdecp, EA_4BYTE, REG_R1, REG_P1, + INS_OPTS_SCALABLE_H_WITH_SCALAR); // UQDECP , . + theEmitter->emitIns_R_R(INS_sve_uqdecp, EA_4BYTE, REG_R2, REG_P2, + INS_OPTS_SCALABLE_S_WITH_SCALAR); // UQDECP , . + theEmitter->emitIns_R_R(INS_sve_uqdecp, EA_4BYTE, REG_R3, REG_P3, + INS_OPTS_SCALABLE_D_WITH_SCALAR); // UQDECP , . + + theEmitter->emitIns_R_R(INS_sve_uqdecp, EA_8BYTE, REG_R4, REG_P4, + INS_OPTS_SCALABLE_B_WITH_SCALAR); // UQDECP , . + theEmitter->emitIns_R_R(INS_sve_uqdecp, EA_8BYTE, REG_R5, REG_P5, + INS_OPTS_SCALABLE_H_WITH_SCALAR); // UQDECP , . + theEmitter->emitIns_R_R(INS_sve_uqdecp, EA_8BYTE, REG_R6, REG_P6, + INS_OPTS_SCALABLE_S_WITH_SCALAR); // UQDECP , . + theEmitter->emitIns_R_R(INS_sve_uqdecp, EA_8BYTE, REG_R7, REG_P7, + INS_OPTS_SCALABLE_D_WITH_SCALAR); // UQDECP , . + + theEmitter->emitIns_R_R(INS_sve_uqincp, EA_4BYTE, REG_R0, REG_P0, + INS_OPTS_SCALABLE_B_WITH_SCALAR); // UQINCP , . + theEmitter->emitIns_R_R(INS_sve_uqincp, EA_4BYTE, REG_R1, REG_P1, + INS_OPTS_SCALABLE_H_WITH_SCALAR); // UQINCP , . + theEmitter->emitIns_R_R(INS_sve_uqincp, EA_4BYTE, REG_R2, REG_P2, + INS_OPTS_SCALABLE_S_WITH_SCALAR); // UQINCP , . + theEmitter->emitIns_R_R(INS_sve_uqincp, EA_4BYTE, REG_R3, REG_P3, + INS_OPTS_SCALABLE_D_WITH_SCALAR); // UQINCP , . + + theEmitter->emitIns_R_R(INS_sve_uqincp, EA_8BYTE, REG_R4, REG_P4, + INS_OPTS_SCALABLE_B_WITH_SCALAR); // UQINCP , . + theEmitter->emitIns_R_R(INS_sve_uqincp, EA_8BYTE, REG_R5, REG_P5, + INS_OPTS_SCALABLE_H_WITH_SCALAR); // UQINCP , . + theEmitter->emitIns_R_R(INS_sve_uqincp, EA_8BYTE, REG_R6, REG_P6, + INS_OPTS_SCALABLE_S_WITH_SCALAR); // UQINCP , . + theEmitter->emitIns_R_R(INS_sve_uqincp, EA_8BYTE, REG_R7, REG_P7, + INS_OPTS_SCALABLE_D_WITH_SCALAR); // UQINCP , . + + // IF_SVE_DP_2A + // NOTE: B is reserved + theEmitter->emitIns_R_R(INS_sve_sqdecp, EA_SCALABLE, REG_V0, REG_P0, + INS_OPTS_SCALABLE_H); // SQDECP ., . + theEmitter->emitIns_R_R(INS_sve_sqdecp, EA_SCALABLE, REG_V1, REG_P1, + INS_OPTS_SCALABLE_S); // SQDECP ., . + theEmitter->emitIns_R_R(INS_sve_sqdecp, EA_SCALABLE, REG_V2, REG_P2, + INS_OPTS_SCALABLE_D); // SQDECP ., . + + theEmitter->emitIns_R_R(INS_sve_sqincp, EA_SCALABLE, REG_V3, REG_P3, + INS_OPTS_SCALABLE_H); // SQINCP ., . + theEmitter->emitIns_R_R(INS_sve_sqincp, EA_SCALABLE, REG_V4, REG_P4, + INS_OPTS_SCALABLE_S); // SQINCP ., . + theEmitter->emitIns_R_R(INS_sve_sqincp, EA_SCALABLE, REG_V5, REG_P5, + INS_OPTS_SCALABLE_D); // SQINCP ., . + + theEmitter->emitIns_R_R(INS_sve_uqdecp, EA_SCALABLE, REG_V6, REG_P6, + INS_OPTS_SCALABLE_H); // UQDECP ., . + theEmitter->emitIns_R_R(INS_sve_uqdecp, EA_SCALABLE, REG_V7, REG_P7, + INS_OPTS_SCALABLE_S); // UQDECP ., . + theEmitter->emitIns_R_R(INS_sve_uqdecp, EA_SCALABLE, REG_V8, REG_P0, + INS_OPTS_SCALABLE_D); // UQDECP ., . + + theEmitter->emitIns_R_R(INS_sve_uqincp, EA_SCALABLE, REG_V9, REG_P1, + INS_OPTS_SCALABLE_H); // UQINCP ., . + theEmitter->emitIns_R_R(INS_sve_uqincp, EA_SCALABLE, REG_V10, REG_P2, + INS_OPTS_SCALABLE_S); // UQINCP ., . + theEmitter->emitIns_R_R(INS_sve_uqincp, EA_SCALABLE, REG_V11, REG_P3, + INS_OPTS_SCALABLE_D); // UQINCP ., . + // IF_SVE_DQ_0A theEmitter->emitIns_I(INS_sve_setffr, EA_PTRSIZE, 0); // SETFFR diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f0b073b58b64b1..594b9e67e69652 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1182,11 +1182,29 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow - elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_H); assert(isVectorRegister(id->idReg1())); // nnnn assert(isVectorRegister(id->idReg2())); // ddddd - assert(id->idInsOpt() == INS_OPTS_SCALABLE_H); - assert(isScalableVectorSize(elemsize)); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count + assert(isValidGeneralDatasize(id->idOpSize())); // X + + FALLTHROUGH; + case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count + assert(insOptsScalableWithScalar(id->idInsOpt())); // xx + assert(isGeneralRegister(id->idReg1())); // ddddd + assert(isPredicateRegister(id->idReg2())); // MMMM + assert(isValidGeneralDatasize(id->idOpSize())); + break; + + case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count + case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); // xx + assert(isPredicateRegister(id->idReg1())); // MMMM + assert(isVectorRegister(id->idReg2())); // ddddd + assert(isScalableVectorSize(id->idOpSize())); break; case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise @@ -1205,7 +1223,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow - elemsize = id->idOpSize(); assert(insOptsScalableSimple(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // nnnnn assert(isVectorRegister(id->idReg2())); // ddddd @@ -6884,6 +6901,46 @@ void emitter::emitIns_R_R( } break; + case INS_sve_incp: + case INS_sve_decp: + assert(isPredicateRegister(reg2)); // MMMM + + if (isGeneralRegister(reg1)) // ddddd + { + assert(insOptsScalableWithScalar(opt)); // xx + assert(size == EA_8BYTE); + fmt = IF_SVE_DM_2A; + } + else + { + assert(insOptsScalableAtLeastHalf(opt)); // xx + assert(isVectorRegister(reg1)); // ddddd + assert(isScalableVectorSize(size)); + fmt = IF_SVE_DN_2A; + } + break; + + case INS_sve_sqincp: + case INS_sve_uqincp: + case INS_sve_sqdecp: + case INS_sve_uqdecp: + assert(isPredicateRegister(reg2)); // MMMM + + if (isGeneralRegister(reg1)) // ddddd + { + assert(insOptsScalableWithScalar(opt)); // xx + assert(isValidGeneralDatasize(size)); + fmt = IF_SVE_DO_2A; + } + else + { + assert(insOptsScalableAtLeastHalf(opt)); // xx + assert(isVectorRegister(reg1)); // ddddd + assert(isScalableVectorSize(size)); + fmt = IF_SVE_DP_2A; + } + break; + case INS_sve_ctermeq: case INS_sve_ctermne: assert(insOptsNone(opt)); @@ -14849,6 +14906,32 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_R_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_P_8_to_5(id->idReg2()); // MMMM + code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count + case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_P_8_to_5(id->idReg2()); // MMMM + code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_R_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_P_8_to_5(id->idReg2()); // MMMM + code |= insEncodeVLSElemsize(id->idOpSize()); // X + code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise code = emitInsCodeSve(ins, fmt); dst += emitOutput_Instr(dst, code); @@ -17278,6 +17361,43 @@ void emitter::emitDispInsHelp( emitDispImm(emitGetInsSC(id), false); // iiii break; + // , . + case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count + emitDispReg(id->idReg1(), id->idOpSize(), true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_SIZED, id->idInsOpt(), false); // MMMM + break; + + // ., . + case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count + case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_SIZED, id->idInsOpt(), false); // MMMM + break; + + // , ., + // , . + case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count + if ((ins == INS_sve_sqdecp) || (ins == INS_sve_sqincp)) + { + // 32-bit result: , ., + // 64-bit result: , . + const bool is32BitResult = (id->idOpSize() == EA_4BYTE); // X + emitDispReg(id->idReg1(), EA_8BYTE, true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_SIZED, id->idInsOpt(), is32BitResult); // MMMM + + if (is32BitResult) + { + emitDispReg(id->idReg1(), EA_4BYTE, false); + } + } + else + { + assert((ins == INS_sve_uqdecp) || (ins == INS_sve_uqincp)); + emitDispReg(id->idReg1(), id->idOpSize(), true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_SIZED, id->idInsOpt(), false); // MMMM + } + break; + // none case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise break; @@ -19805,6 +19925,14 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; + case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count + case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count + case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count + case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate result.insThroughput = PERFSCORE_THROUGHPUT_1C;