Skip to content

Commit

Permalink
ARM64-SVE: Implement IF_SVE_BV_2A
Browse files Browse the repository at this point in the history
  • Loading branch information
a74nh committed Feb 28, 2024
1 parent 6b8d34b commit 438a75e
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 13 deletions.
24 changes: 24 additions & 0 deletions src/coreclr/jit/codegenarm64test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5365,6 +5365,30 @@ void CodeGen::genArm64EmitterUnitTestsSve()
theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_Q,
INS_SCALABLE_OPTS_UNPREDICATED); // ZIP2 <Zd>.Q, <Zn>.Q, <Zm>.Q

// IF_SVE_BV_2A
theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V15, REG_P5, 0, INS_OPTS_SCALABLE_B); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V5, REG_P15, 27, INS_OPTS_SCALABLE_B); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V31, REG_P0, -128, INS_OPTS_SCALABLE_B); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V0, REG_P5, 127, INS_OPTS_SCALABLE_B); // MOV <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}

theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V15, REG_P5, 0, INS_OPTS_SCALABLE_H); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V23, REG_P12, 10, INS_OPTS_SCALABLE_S); // MOV <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V4, REG_P0, -128, INS_OPTS_SCALABLE_D); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V19, REG_P15, 127, INS_OPTS_SCALABLE_H); // MOV <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}

theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, 256, INS_OPTS_SCALABLE_S, INS_SCALABLE_OPTS_SHIFT); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, 3072, INS_OPTS_SCALABLE_D, INS_SCALABLE_OPTS_SHIFT); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, -3072, INS_OPTS_SCALABLE_H); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, -32768, INS_OPTS_SCALABLE_S); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_P0, REG_V0, 32512, INS_OPTS_SCALABLE_D); // MOV <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}

// IF_SVE_BV_2A_A
theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P12, 5, INS_OPTS_SCALABLE_B, INS_SCALABLE_OPTS_PREDICATE_MERGE); // CPY <Zd>.<T>, <Pg>/M, #<imm>{, <shift>}

// IF_SVE_BV_2A_J
theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V27, REG_P13, 5632, INS_OPTS_SCALABLE_H, INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV <Zd>.<T>, <Pg>/M, #<imm>{, <shift>}
theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V27, REG_P13, -5632, INS_OPTS_SCALABLE_H, INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV <Zd>.<T>, <Pg>/M, #<imm>{, <shift>}

// IF_SVE_BZ_3A
theEmitter->emitIns_R_R_R(INS_sve_tbl, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
INS_OPTS_SCALABLE_B); // TBL <Zd>.<T>, {<Zn>.<T>}, <Zm>.<T>
Expand Down
102 changes: 91 additions & 11 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1127,6 +1127,18 @@ void emitter::emitInsSanityCheck(instrDesc* id)
assert(isValidUimm4From1(emitGetInsSC(id)));
break;

case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
assert(insOptsScalableStandard(id->idInsOpt())); // xx
// Size specifier must be able to fit left-shifted immediate
assert(insOptsScalableAtLeastHalf(id->idInsOpt()) || !id->idOptionalShift());
assert(isVectorRegister(id->idReg1())); // ddddd
assert(isPredicateRegister(id->idReg2())); // gggg
assert(isValidSimm8(emitGetInsSC(id))); // iiiiiiii
assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
break;

case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements
elemsize = id->idOpSize();
assert(insOptsScalableStandard(id->idInsOpt()));
Expand Down Expand Up @@ -9154,16 +9166,18 @@ void emitter::emitIns_R_R_I(instruction ins,
insOpts opt /* = INS_OPTS_NONE */,
insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
{
emitAttr size = EA_SIZE(attr);
emitAttr elemsize = EA_UNKNOWN;
insFormat fmt = IF_NONE;
bool isLdSt = false;
bool isLdrStr = false;
bool isSIMD = false;
bool isAddSub = false;
bool setFlags = false;
unsigned scale = 0;
bool unscaledOp = false;
emitAttr size = EA_SIZE(attr);
emitAttr elemsize = EA_UNKNOWN;
insFormat fmt = IF_NONE;
bool isLdSt = false;
bool isLdrStr = false;
bool isSIMD = false;
bool isAddSub = false;
bool setFlags = false;
unsigned scale = 0;
bool unscaledOp = false;
bool optionalShift = false;
bool hasShift = false;

/* Figure out the encoding format of the instruction */
switch (ins)
Expand Down Expand Up @@ -9746,6 +9760,31 @@ void emitter::emitIns_R_R_I(instruction ins,
}
break;

case INS_sve_mov:
case INS_sve_cpy:
optionalShift = true;
assert(insOptsScalableStandard(opt));
assert(isVectorRegister(reg1)); // DDDDD
assert(isPredicateRegister(reg2)); // GGGG
if (!isValidSimm8(imm))
{
assert(isValidSimm8_MultipleOf256(imm));
assert(insOptsScalableAtLeastHalf(opt));
hasShift = true;
imm = imm / 256;
}
if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE)
{
fmt = IF_SVE_BV_2A_J;
}
else
{
fmt = IF_SVE_BV_2A;
}
// MOV is an alias for CPY, and is always the preferred disassembly.
ins = INS_sve_mov;
break;

case INS_sve_sqrshrn:
case INS_sve_sqrshrun:
case INS_sve_uqrshrn:
Expand Down Expand Up @@ -10072,7 +10111,18 @@ void emitter::emitIns_R_R_I(instruction ins,

assert(fmt != IF_NONE);

instrDesc* id = emitNewInstrSC(attr, imm);
instrDesc* id;

if (!optionalShift)
{
id = emitNewInstrSC(attr, imm);
}
else
{
// Instructions with optional shifts (MOV, DUP, etc.) need larger instrDesc to store state
id = emitNewInstrCns(attr, imm);
id->idOptionalShift(hasShift);
}

id->idIns(ins);
id->idInsFmt(fmt);
Expand Down Expand Up @@ -23578,6 +23628,19 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id)
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
imm = emitGetInsSC(id);
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd
code |= insEncodeReg_P_19_to_16(id->idReg2()); // gggg
code |= insEncodeImm8_12_to_5(imm); // iiiiiiii
code |= (id->idOptionalShift() ? 0x2000 : 0); // h
code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD
Expand Down Expand Up @@ -28867,6 +28930,16 @@ void emitter::emitDispInsHelp(
emitDispImm(imm, false);
break;

// <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
imm = emitGetInsSC(id);
emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // gggg
emitDispImmOptsLSL(emitGetInsSC(id), id->idOptionalShift(), 8); // iiiiiiii, h
break;

default:
printf("unexpected format %s", emitIfName(id->idInsFmt()));
assert(!"unexpectedFormat");
Expand Down Expand Up @@ -31303,6 +31376,13 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insLatency = PERFSCORE_LATENCY_2C;
break;

case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
result.insLatency = PERFSCORE_LATENCY_2C;
break;

case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements
case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements
case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,12 @@ static bool isValidSimm8(ssize_t value)
return (-0x80 <= value) && (value <= 0x7F);
};

// Returns true if 'value' is a legal signed multiple of 256 immediate 8 bit encoding (such as for MOV).
static bool isValidSimm8_MultipleOf256(ssize_t value)
{
return (-0x8000 <= value) && (value <= 0x7f00) && (value % 256 == 0);
};

// Returns true if 'value' is a legal unsigned immediate 12 bit encoding (such as for CMP, CMN).
static bool isValidUimm12(ssize_t value)
{
Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/scripts/emitUnitTests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@ group=all
search=
setup=
verbose=
while getopts sc:g:v: line
debugger=
while getopts sc:g:v:d line
do
case $line in
c) export CORE_ROOT=$OPTARG;;
g) group=$OPTARG;;
s) setup=1;;
v) verbose=$OPTARG;;
d) debugger="gdb --args";;
*) echo $usage; exit 2;;
esac
done
Expand Down Expand Up @@ -85,7 +87,7 @@ export DOTNET_JitRawHexCodeFile=$output_dir/clr_hex.txt

# Run the dummy app in clr

$CORE_ROOT/corerun $app_dll
$debugger $CORE_ROOT/corerun $app_dll

# Extract the instructions from the clr output, from the first NOP to the first set of 2 NOPS.

Expand Down

0 comments on commit 438a75e

Please sign in to comment.