Skip to content

Commit

Permalink
[LoongArch64] Fix the wrong GCInfo when testing GCStress. (#72572)
Browse files Browse the repository at this point in the history
* [LoongArch64] Fix the wrong GCInfo when testing GCStress.

* add split for LoongArch64.

* Fix some assert errors.

* amend for format checking.

* amend the code for CRs.

* amend the code for CRs.

* amend the code for CRs round-3.

* replace `#ifdef FEATURE_ARG_SPLIT` as `#if FEATURE_ARG_SPLIT`

* amend the GCTypes for CRs round-4.

* amend the GCTypes for CRs round-5.

* adding some comments for CRs round-6.

* amend the code format.
  • Loading branch information
shushanhf authored Aug 4, 2022
1 parent f4013f4 commit 76533d4
Show file tree
Hide file tree
Showing 9 changed files with 255 additions and 176 deletions.
5 changes: 4 additions & 1 deletion src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -954,8 +954,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void genLeaInstruction(GenTreeAddrMode* lea);
void genSetRegToCond(regNumber dstReg, GenTree* tree);

#if defined(TARGET_ARMARCH)
#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
void genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale);
#endif // TARGET_ARMARCH || TARGET_LOONGARCH64

#if defined(TARGET_ARMARCH)
void genCodeForMulLong(GenTreeOp* mul);
#endif // TARGET_ARMARCH

Expand Down
292 changes: 188 additions & 104 deletions src/coreclr/jit/codegenloongarch64.cpp

Large diffs are not rendered by default.

86 changes: 33 additions & 53 deletions src/coreclr/jit/emitloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -586,35 +586,39 @@ void emitter::emitIns(instruction ins)
}

/*****************************************************************************
* emitter::emitIns_S_R() and emitter::emitIns_R_S():
*
* Add an Load/Store instruction(s): base+offset and base-addr-computing if needed.
* For referencing a stack-based local variable and a register
*
* Special notes for LoongArch64:
* The parameter `offs` has special info.
* The real value of `offs` is positive.
* If the `offs` is negative which its real value abs(offs),
* the negative `offs` is special for optimizing the large offset which >2047.
* when offs >2047 we can't encode one instruction to load/store the data,
* if there are several load/store at this case, you have to repeat the similar
* large offs with redundant instructions and maybe eat up the `SC_IG_BUFFER_SIZE`.
*
* Optimize the following:
* (1) The real value of `offs` is positive. `offs` = `offs`.
*
* (2) If the `offs` is negtive, `offs` = -(offs),
* the negtive `offs` is special for optimizing the large offset which >2047.
* when offs >2047 we can't encode one instruction to load/store the data,
* if there are several load/store at this case, you have to repeat the similar
* large offs with reduntant instructions and maybe eat up the `SC_IG_BUFFER_SIZE`.
*
* Before optimizing the following instructions:
* lu12i.w x0, 0x0
* ori x0, x0, 0x9ac
* add.d x0, x0, fp
* fst.s fa0, x0, 0
*
* For the offs within range [0,0x7ff], using one instruction:
* ori x0, x0, offs
* For the offs within range [0x1000,0xffffffff], using two instruction
* lu12i.w x0, offs-hi-20bits
* ori x0, x0, offs-low-12bits
* After optimized the instructions:
* For the offs within range [0,0x7ff], using one instruction:
* ori x0, x0, offs
* For the offs within range [0x1000,0xffffffff], using two instruction
* lu12i.w x0, offs-hi-20bits
* ori x0, x0, offs-low-12bits
*
* Store/Load the data:
* fstx.s fa0, x0, fp
* Then Store/Load the data:
* fstx.s fa0, x0, fp
*
* If the store/load are repeated,
* If storing/loading the second field of a struct,
* addi_d x0,x0,sizeof(type)
* fstx.s fa0, x0, fp
*
Expand Down Expand Up @@ -671,7 +675,7 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
assert(isValidSimm20(imm2 >> 12));
emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_RA, imm2 >> 12);

emitIns_R_R_R(INS_add_d, attr, REG_RA, REG_RA, reg2);
emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_RA, REG_RA, reg2);

imm2 = imm2 & 0x7ff;
imm = imm3 ? imm2 - imm3 : imm2;
Expand Down Expand Up @@ -792,7 +796,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
assert(isValidSimm20(imm2 >> 12));
emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_RA, imm2 >> 12);

emitIns_R_R_R(INS_add_d, attr, REG_RA, REG_RA, reg2);
emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_RA, REG_RA, reg2);

imm2 = imm2 & 0x7ff;
imm3 = imm3 ? imm2 - imm3 : imm2;
Expand Down Expand Up @@ -2566,6 +2570,8 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
assert((addr & 3) == 0);

dst += 4;
emitGCregDeadUpd(REG_T2, dst);

#ifdef DEBUG
code = emitInsCode(INS_pcaddu18i);
assert((code | (14)) == 0x1e00000e);
Expand Down Expand Up @@ -2596,6 +2602,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t

emitOutput_Instr(dst, code);
dst += 4;
emitGCregDeadUpd(REG_T2, dst);

code = emitInsCode(INS_ori);
code |= (code_t)REG_T2;
Expand All @@ -2620,24 +2627,14 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t

dst += 4;

// update volatile regs within emitThisGCrefRegs and emitThisByrefRegs.
if (gcrefRegs != emitThisGCrefRegs)
{
emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
}
if (byrefRegs != emitThisByrefRegs)
{
emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
}

// If the method returns a GC ref, mark INTRET (A0) appropriately.
if (id->idGCref() == GCT_GCREF)
{
gcrefRegs = emitThisGCrefRegs | RBM_INTRET;
gcrefRegs |= RBM_INTRET;
}
else if (id->idGCref() == GCT_BYREF)
{
byrefRegs = emitThisByrefRegs | RBM_INTRET;
byrefRegs |= RBM_INTRET;
}

// If is a multi-register return method is called, mark INTRET_1 (A1) appropriately
Expand Down Expand Up @@ -3146,7 +3143,7 @@ void emitter::emitJumpDistBind()
size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
{
BYTE* dstRW = *dp + writeableOffset;
BYTE* dstRW2 = dstRW; // addr for updating gc info if needed.
BYTE* dstRW2 = dstRW + 4; // addr for updating gc info if needed.
code_t code = 0;
instruction ins;
size_t sz; // = emitSizeOfInsDsc(id);
Expand Down Expand Up @@ -3178,7 +3175,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)

*(code_t*)dstRW = 0x1c000000 | (code_t)reg1;

dstRW2 = dstRW;
dstRW += 4;

#ifdef DEBUG
Expand All @@ -3202,20 +3198,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
*(code_t*)dstRW = 0x28c00000 | (code_t)reg1 | (code_t)(reg1 << 5);
}

if (id->idGCref() != GCT_NONE)
{
emitGCregLiveUpd(id->idGCref(), reg1, dstRW);
}
else
{
emitGCregDeadUpd(reg1, dstRW);
}

dstRW += 4;

emitRecordRelocation(dstRW2 - writeableOffset, id->idAddr()->iiaAddr, IMAGE_REL_LOONGARCH64_PC);

dstRW2 += 4;
emitRecordRelocation(dstRW - 8 - writeableOffset, id->idAddr()->iiaAddr, IMAGE_REL_LOONGARCH64_PC);

sz = sizeof(instrDesc);
}
Expand All @@ -3224,7 +3209,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
{
ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr);
regNumber reg1 = id->idReg1();
dstRW2 += 4;

switch (id->idCodeSize())
{
Expand Down Expand Up @@ -3406,7 +3390,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
*(code_t*)dstRW = code;
}
dstRW += 4;
dstRW2 = dstRW;
}
else
{
Expand All @@ -3431,7 +3414,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
code |= (code_t)(imm & 0xfff) << 10;
*(code_t*)dstRW = code;
dstRW += 4;
dstRW2 = dstRW;

ins = INS_lu32i_d;
code = emitInsCode(INS_lu32i_d);
Expand Down Expand Up @@ -3470,7 +3452,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)

*(code_t*)dstRW = code;
dstRW += 4;
dstRW2 = dstRW;
}
}

Expand Down Expand Up @@ -3510,7 +3491,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
code = 0x1c000000;
*(code_t*)dstRW = code | (code_t)reg1 | ((imm & 0xfffff000) >> 7);
dstRW += 4;
dstRW2 = dstRW;
#ifdef DEBUG
code = emitInsCode(INS_pcaddu12i);
assert(code == 0x1c000000);
Expand Down Expand Up @@ -3538,7 +3518,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
code |= (code_t)(imm & 0xfff) << 10;
*(code_t*)dstRW = code;
dstRW += 4;
dstRW2 = dstRW;

ins = INS_lu32i_d;
code = emitInsCode(INS_lu32i_d);
Expand Down Expand Up @@ -3760,16 +3739,17 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
sz = sizeof(instrDesc);
}
dstRW += emitOutputCall(ig, *dp, id, 0);
ins = INS_nop;

dstRW2 = dstRW;
ins = INS_nop;
break;

// case INS_OPTS_NONE:
default:
*(code_t*)dstRW = id->idAddr()->iiaGetInstrEncode();
dstRW += 4;
dstRW2 = dstRW;
ins = id->idIns();
sz = emitSizeOfInsDsc(id);
ins = id->idIns();
sz = emitSizeOfInsDsc(id);
break;
}

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1162,7 +1162,7 @@ void CallArgABIInformation::SetByteSize(unsigned byteSize, unsigned byteAlignmen
// is a HFA of doubles, since double and float registers overlap.
void CallArgABIInformation::SetMultiRegNums()
{
#if FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI)
#if FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64)
if (NumRegs == 1)
{
return;
Expand All @@ -1183,7 +1183,7 @@ void CallArgABIInformation::SetMultiRegNums()
argReg = (regNumber)(argReg + regSize);
SetRegNum(regIndex, argReg);
}
#endif // FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI)
#endif // FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64)
}

//---------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -4311,7 +4311,7 @@ struct CallArgABIInformation
, IsBackFilled(false)
, IsStruct(false)
, PassedByRef(false)
#ifdef FEATURE_ARG_SPLIT
#if FEATURE_ARG_SPLIT
, m_isSplit(false)
#endif
#ifdef FEATURE_HFA_FIELDS_PRESENT
Expand Down Expand Up @@ -4366,7 +4366,7 @@ struct CallArgABIInformation
bool PassedByRef : 1;

private:
#ifdef FEATURE_ARG_SPLIT
#if FEATURE_ARG_SPLIT
// True when this argument is split between the registers and OutArg area
bool m_isSplit : 1;
#endif
Expand Down
8 changes: 5 additions & 3 deletions src/coreclr/jit/instrsloongarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,11 @@ INST(rdtimeh_w, "rdtimeh.w", 0, 0x00006400)
INST(rdtime_d, "rdtime.d", 0, 0x00006800)
INST(cpucfg, "cpucfg", 0, 0x00006c00)

////R_R, fr---->gr
INST(movfr2gr_s, "movfr2gr.s", 0, 0x0114b400)
INST(movfr2gr_d, "movfr2gr.d", 0, 0x0114b800)
INST(movfrh2gr_s, "movfrh2gr.s", 0, 0x0114bc00)

////R_R_I_I.
INST(bstrins_w, "bstrins.w", 0, 0x00600000)
INST(bstrins_d, "bstrins.d", 0, 0x00800000)
Expand Down Expand Up @@ -415,9 +420,6 @@ INST(fmov_d, "fmov.d", 0, 0x01149800)
INST(movgr2fr_w, "movgr2fr.w", 0, 0x0114a400)
INST(movgr2fr_d, "movgr2fr.d", 0, 0x0114a800)
INST(movgr2frh_w, "movgr2frh.w", 0, 0x0114ac00)
INST(movfr2gr_s, "movfr2gr.s", 0, 0x0114b400)
INST(movfr2gr_d, "movfr2gr.d", 0, 0x0114b800)
INST(movfrh2gr_s, "movfrh2gr.s", 0, 0x0114bc00)

////
INST(movgr2fcsr, "movgr2fcsr", 0, 0x0114c000)
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,8 @@
// Arm64 Windows supports FEATURE_ARG_SPLIT, note this is different from
// the official Arm64 ABI.
// Case: splitting 16 byte struct between x7 and stack
#if defined(TARGET_ARM) || defined(TARGET_ARM64)
// LoongArch64's ABI supports FEATURE_ARG_SPLIT which splitting 16 byte struct between a7 and stack.
#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
#define FEATURE_ARG_SPLIT 1
#else
#define FEATURE_ARG_SPLIT 0
Expand Down
10 changes: 1 addition & 9 deletions src/coreclr/jit/lsraloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,12 +388,10 @@ int LinearScan::BuildNode(GenTree* tree)
}
break;

#if FEATURE_ARG_SPLIT
case GT_PUTARG_SPLIT:
srcCount = BuildPutArgSplit(tree->AsPutArgSplit());
dstCount = tree->AsPutArgSplit()->gtNumRegs;
break;
#endif // FEATURE_ARG_SPLIT

case GT_PUTARG_STK:
srcCount = BuildPutArgStk(tree->AsPutArgStk());
Expand Down Expand Up @@ -882,18 +880,16 @@ int LinearScan::BuildCall(GenTreeCall* call)
srcCount++;
}
}
#if FEATURE_ARG_SPLIT
else if (argNode->OperGet() == GT_PUTARG_SPLIT)
{
unsigned regCount = argNode->AsPutArgSplit()->gtNumRegs;
assert(regCount == curArgTabEntry->numRegs);
assert(regCount == abiInfo.NumRegs);
for (unsigned int i = 0; i < regCount; i++)
{
BuildUse(argNode, genRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i);
}
srcCount += regCount;
}
#endif // FEATURE_ARG_SPLIT
else
{
assert(argNode->OperIs(GT_PUTARG_REG));
Expand Down Expand Up @@ -921,11 +917,9 @@ int LinearScan::BuildCall(GenTreeCall* call)
// Skip arguments that have been moved to the Late Arg list
if (arg.GetLateNode() == nullptr)
{
#if FEATURE_ARG_SPLIT
// PUTARG_SPLIT nodes must be in the gtCallLateArgs list, since they
// define registers used by the call.
assert(argNode->OperGet() != GT_PUTARG_SPLIT);
#endif // FEATURE_ARG_SPLIT
if (argNode->gtOper == GT_PUTARG_STK)
{
assert(arg.AbiInfo.GetRegNum() == REG_STK);
Expand Down Expand Up @@ -1038,7 +1032,6 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
return srcCount;
}

#if FEATURE_ARG_SPLIT
//------------------------------------------------------------------------
// BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node
//
Expand Down Expand Up @@ -1127,7 +1120,6 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
BuildDefs(argNode, dstCount, argMask);
return srcCount;
}
#endif // FEATURE_ARG_SPLIT

//------------------------------------------------------------------------
// BuildBlockStore: Build the RefPositions for a block store node.
Expand Down
Loading

0 comments on commit 76533d4

Please sign in to comment.