From 0dd5ee5f8584e1fe52545cdb1698e05b83535d26 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 16 Feb 2025 00:58:17 +0100 Subject: [PATCH 1/6] JIT: Unify struct arg morphing This PR unifies the multi-reg and single-reg struct arg morphing into a single path. It combines the optimizations from either path to apply to both cases. It also deletes `FEATURE_PUT_STRUCT_ARG_STK`; this define was defined everywhere except win-x64, and win-x64 now benefits from it by supporting `FIELD_LIST` stack arguments. Another difference is that multi-reg arguments were morphed after args were evaluated into temps before. Now we instead morph multi-reg args at the same time as single-reg args; this means the temp-evaluation step may see field lists, but physical promotion also produces those nowadays, so it knows how to handle them. The main difference here ends up being that we sometimes will save an argument to a temp as multiple field-wise stores instead of a block store, which improves CQ. --- src/coreclr/jit/codegen.h | 6 - src/coreclr/jit/codegencommon.cpp | 2 +- src/coreclr/jit/codegenlinear.cpp | 4 +- src/coreclr/jit/codegenxarch.cpp | 11 - src/coreclr/jit/compiler.h | 5 +- src/coreclr/jit/gentree.cpp | 55 ++- src/coreclr/jit/gentree.h | 83 +--- src/coreclr/jit/jit.h | 5 - src/coreclr/jit/lower.cpp | 5 +- src/coreclr/jit/lowerxarch.cpp | 2 - src/coreclr/jit/lsraxarch.cpp | 4 - src/coreclr/jit/morph.cpp | 682 +++++++++--------------------- 12 files changed, 251 insertions(+), 613 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 2520864b707421..e7b2d366d2c031 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1194,12 +1194,10 @@ class CodeGen final : public CodeGenInterface void genSetBlockSrc(GenTreeBlk* blkNode, regNumber srcReg); void genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg); -#ifdef FEATURE_PUT_STRUCT_ARG_STK void genConsumePutStructArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg); -#endif // FEATURE_PUT_STRUCT_ARG_STK #if FEATURE_ARG_SPLIT void genConsumeArgSplitStruct(GenTreePutArgSplit* putArgNode); #endif // FEATURE_ARG_SPLIT @@ -1287,7 +1285,6 @@ class CodeGen final : public CodeGenInterface void genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArgVarNum); #endif // !TARGET_X86 -#ifdef FEATURE_PUT_STRUCT_ARG_STK #ifdef TARGET_X86 bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk); void genPushReg(var_types type, regNumber srcReg); @@ -1309,7 +1306,6 @@ class CodeGen final : public CodeGenInterface #else void genStructPutArgPartialRepMovs(GenTreePutArgStk* putArgStkNode); #endif -#endif // FEATURE_PUT_STRUCT_ARG_STK void genCodeForStoreBlk(GenTreeBlk* storeBlkNode); void genCodeForInitBlkLoop(GenTreeBlk* initBlkNode); @@ -1403,14 +1399,12 @@ class CodeGen final : public CodeGenInterface return compiler->lvaGetDesc(tree->AsLclVarCommon())->lvIsRegCandidate(); } -#ifdef FEATURE_PUT_STRUCT_ARG_STK #ifdef TARGET_X86 bool m_pushStkArg; #else // !TARGET_X86 unsigned m_stkArgVarNum; unsigned m_stkArgOffset; #endif // !TARGET_X86 -#endif // !FEATURE_PUT_STRUCT_ARG_STK #if defined(DEBUG) && defined(TARGET_XARCH) void genStackPointerCheck(bool doStackPointerCheck, diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 75c2cf474e6d4b..1639b86de810b8 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -201,7 +201,7 @@ void CodeGenInterface::CopyRegisterInfo() CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) { -#if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(TARGET_X86) +#if !defined(TARGET_X86) m_stkArgVarNum = BAD_VAR_NUM; #endif diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 945f468d86fa81..7e11a642329602 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1741,7 +1741,6 @@ void CodeGen::genConsumeMultiOpOperands(GenTreeMultiOp* tree) } #endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) -#if FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node. // Also loads in the right register the addresses of the @@ -1824,7 +1823,6 @@ void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode, inst_RV_IV(INS_mov, sizeReg, size, EA_PTRSIZE); } } -#endif // FEATURE_PUT_STRUCT_ARG_STK #if FEATURE_ARG_SPLIT //------------------------------------------------------------------------ @@ -1899,7 +1897,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArg #if FEATURE_FASTTAILCALL if (putArgStk->gtCall->IsFastTailCall()) { - areaSize = compiler->info.compArgStackSize; + areaSize = compiler->lvaParameterStackSize; } #endif diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index e0da7904ecc688..de93862822b21a 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3485,7 +3485,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) } } -#ifdef FEATURE_PUT_STRUCT_ARG_STK // Generate code for a load from some address + offset // base: tree node which can be either a local or an indir // offset: distance from the "base" location from which to load @@ -3502,7 +3501,6 @@ void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst GetEmitter()->emitIns_R_AR(ins, size, dst, base->AsIndir()->Addr()->GetRegNum(), offset); } } -#endif // FEATURE_PUT_STRUCT_ARG_STK //---------------------------------------------------------------------------------- // genCodeForCpBlkUnroll - Generate unrolled block copy code. @@ -3790,7 +3788,6 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode) instGen(INS_r_movsb); } -#ifdef FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // CodeGen::genMove8IfNeeded: Conditionally move 8 bytes of a struct to the argument area // @@ -4219,7 +4216,6 @@ void CodeGen::genClearStackVec3ArgUpperBits() } } #endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) -#endif // FEATURE_PUT_STRUCT_ARG_STK // // genCodeForCpObj - Generate code for CpObj nodes to copy structs that have interleaved @@ -5993,13 +5989,11 @@ void CodeGen::genCall(GenTreeCall* call) #ifdef DEBUG assert(argSize == arg.AbiInfo.ByteSize); -#ifdef FEATURE_PUT_STRUCT_ARG_STK if (source->TypeIs(TYP_STRUCT) && !source->OperIs(GT_FIELD_LIST)) { unsigned loadSize = source->GetLayout(compiler)->GetSize(); assert(argSize == roundUp(loadSize, TARGET_POINTER_SIZE)); } -#endif // FEATURE_PUT_STRUCT_ARG_STK #endif // DEBUG } } @@ -8392,8 +8386,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) { unsigned baseVarNum = getBaseVarForPutArgStk(putArgStk); -#ifdef UNIX_AMD64_ABI - if (data->OperIs(GT_FIELD_LIST)) { genPutArgStkFieldList(putArgStk, baseVarNum); @@ -8407,7 +8399,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) m_stkArgVarNum = BAD_VAR_NUM; return; } -#endif // UNIX_AMD64_ABI noway_assert(targetType != TYP_STRUCT); @@ -8509,7 +8500,6 @@ void CodeGen::genPushReg(var_types type, regNumber srcReg) } #endif // TARGET_X86 -#if defined(FEATURE_PUT_STRUCT_ARG_STK) // genStoreRegToStackArg: Store a register value into the stack argument area // // Arguments: @@ -8649,7 +8639,6 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk) unreached(); } } -#endif // defined(FEATURE_PUT_STRUCT_ARG_STK) /***************************************************************************** * diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f8855bfc41ab2d..ae1b0da5d21f34 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6779,7 +6779,7 @@ class Compiler #endif // FEATURE_SIMD GenTree* fgMorphIndexAddr(GenTreeIndexAddr* tree); GenTree* fgMorphExpandCast(GenTreeCast* tree); - GenTreeFieldList* fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl); + GenTreeFieldList* fgMorphLclArgToFieldList(GenTreeLclVarCommon* lcl); GenTreeCall* fgMorphArgs(GenTreeCall* call); void fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg); @@ -11802,8 +11802,7 @@ class Compiler const CORINFO_FPSTRUCT_LOWERING* GetFpStructLowering(CORINFO_CLASS_HANDLE structHandle); #endif // defined(UNIX_AMD64_ABI) - void fgMorphMultiregStructArgs(GenTreeCall* call); - GenTree* fgMorphMultiregStructArg(CallArg* arg); + bool fgTryMorphStructArg(CallArg* arg); bool killGCRefs(GenTree* tree); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 46c57705853a1f..c90f4ee43ed9d5 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -261,14 +261,12 @@ void GenTree::InitNodeSize() GenTree::s_gtNodeSizes[GT_MOD] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_UMOD] = TREE_NODE_SZ_LARGE; #endif -#ifdef FEATURE_PUT_STRUCT_ARG_STK // TODO-Throughput: This should not need to be a large node. The object info should be // obtained from the child node. GenTree::s_gtNodeSizes[GT_PUTARG_STK] = TREE_NODE_SZ_LARGE; #if FEATURE_ARG_SPLIT GenTree::s_gtNodeSizes[GT_PUTARG_SPLIT] = TREE_NODE_SZ_LARGE; #endif // FEATURE_ARG_SPLIT -#endif // FEATURE_PUT_STRUCT_ARG_STK // This list of assertions should come to contain all GenTree subtypes that are declared // "small". @@ -324,16 +322,12 @@ void GenTree::InitNodeSize() static_assert_no_msg(sizeof(GenTreeILOffset) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreePhiArg) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeAllocObj) <= TREE_NODE_SZ_LARGE); // *** large node -#ifndef FEATURE_PUT_STRUCT_ARG_STK - static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_SMALL); -#else // FEATURE_PUT_STRUCT_ARG_STK // TODO-Throughput: This should not need to be a large node. The object info should be // obtained from the child node. static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE); #if FEATURE_ARG_SPLIT static_assert_no_msg(sizeof(GenTreePutArgSplit) <= TREE_NODE_SZ_LARGE); #endif // FEATURE_ARG_SPLIT -#endif // FEATURE_PUT_STRUCT_ARG_STK #ifdef FEATURE_HW_INTRINSICS static_assert_no_msg(sizeof(GenTreeHWIntrinsic) <= TREE_NODE_SZ_SMALL); @@ -1147,6 +1141,27 @@ void GenTreeFieldList::InsertFieldLIR( m_uses.InsertUse(insertAfter, new (compiler, CMK_ASTNode) Use(node, offset, type)); } +//--------------------------------------------------------------- +// SoleFieldOrThis: +// If this FIELD_LIST has only one field, then return it; otherwise return +// the field list. +// +// Returns: +// Sole field, or "this". +// +GenTree* GenTreeFieldList::SoleFieldOrThis() +{ + Use* head = m_uses.GetHead(); + assert(head != nullptr); + + if (head->GetNext() == nullptr) + { + return head->GetNode(); + } + + return this; +} + //--------------------------------------------------------------- // IsHfaArg: Is this arg considered a homogeneous floating-point aggregate? // @@ -2621,13 +2636,6 @@ void CallArgs::ResetFinalArgsAndABIInfo() m_abiInformationDetermined = false; } -#if !defined(FEATURE_PUT_STRUCT_ARG_STK) -unsigned GenTreePutArgStk::GetStackByteSize() const -{ - return genTypeSize(genActualType(gtOp1->gtType)); -} -#endif // !defined(FEATURE_PUT_STRUCT_ARG_STK) - /***************************************************************************** * * Returns non-zero if the two trees are identical. @@ -3048,6 +3056,25 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK) return false; } +//------------------------------------------------------------------------ +// EffectiveUse: Return the use pointer to the "effective val". +// +// Arguments: +// use - Use edge +// +// Return Value: +// Edge pointing to non-comma node. +// +GenTree** GenTree::EffectiveUse(GenTree** use) +{ + while ((*use)->OperIs(GT_COMMA)) + { + use = &(*use)->AsOp()->gtOp2; + } + + return use; +} + //------------------------------------------------------------------------ // gtHasRef: Find out whether the given tree contains a local. // @@ -12733,7 +12760,6 @@ void Compiler::gtDispTree(GenTree* tree, } } } -#if FEATURE_PUT_STRUCT_ARG_STK else if (tree->OperGet() == GT_PUTARG_STK) { const GenTreePutArgStk* putArg = tree->AsPutArgStk(); @@ -12766,7 +12792,6 @@ void Compiler::gtDispTree(GenTree* tree, printf(" (%d stackByteSize), (%d numRegs)", putArg->GetStackByteSize(), putArg->gtNumRegs); } #endif // FEATURE_ARG_SPLIT -#endif // FEATURE_PUT_STRUCT_ARG_STK if (tree->OperIs(GT_FIELD_ADDR)) { diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 2fb34c31a5cc7b..904976fa50be38 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1794,7 +1794,6 @@ struct GenTree bool OperSupportsReverseOpEvalOrder(Compiler* comp) const; static bool RequiresNonNullOp2(genTreeOps oper); - bool IsValidCallArgument(); #endif // DEBUG inline bool IsIntegralConst(ssize_t constVal) const; @@ -1938,6 +1937,8 @@ struct GenTree //--------------------------------------------------------------------- + static GenTree** EffectiveUse(GenTree** use); + #if defined(DEBUG) || CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_MEM_ALLOC || \ NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS || DUMP_FLOWGRAPHS static const char* OpName(genTreeOps op); @@ -2829,6 +2830,8 @@ struct GenTreeFieldList : public GenTree // Insert a new field use after the specified use without updating side effect flags. void InsertFieldLIR(Compiler* compiler, Use* insertAfter, GenTree* node, unsigned offset, var_types type); + GenTree* SoleFieldOrThis(); + //-------------------------------------------------------------------------- // Equals: Check if 2 FIELD_LIST nodes are equal. // @@ -8654,9 +8657,7 @@ struct GenTreePutArgStk : public GenTreeUnOp { private: unsigned m_byteOffset; -#ifdef FEATURE_PUT_STRUCT_ARG_STK unsigned m_byteSize; // The number of bytes that this argument is occupying on the stack with padding. -#endif public: #if defined(UNIX_X86_ABI) @@ -8674,7 +8675,6 @@ struct GenTreePutArgStk : public GenTreeUnOp // In future if we need to add more such bool fields consider bit fields. #endif -#ifdef FEATURE_PUT_STRUCT_ARG_STK // Instruction selection: during codegen time, what code sequence we will be using // to encode this operation. // TODO-Throughput: The following information should be obtained from the child @@ -8694,23 +8694,18 @@ struct GenTreePutArgStk : public GenTreeUnOp private: uint8_t m_argLoadSizeDelta; #endif // TARGET_XARCH -#endif // FEATURE_PUT_STRUCT_ARG_STK public: - GenTreePutArgStk(genTreeOps oper, - var_types type, - GenTree* op1, - unsigned stackByteOffset, -#if defined(FEATURE_PUT_STRUCT_ARG_STK) - unsigned stackByteSize, -#endif + GenTreePutArgStk(genTreeOps oper, + var_types type, + GenTree* op1, + unsigned stackByteOffset, + unsigned stackByteSize, GenTreeCall* callNode, bool putInIncomingArgArea) : GenTreeUnOp(oper, type, op1 DEBUGARG(/*largeNode*/ false)) , m_byteOffset(stackByteOffset) -#if defined(FEATURE_PUT_STRUCT_ARG_STK) , m_byteSize(stackByteSize) -#endif #if defined(UNIX_X86_ABI) , gtPadAlign(0) #endif @@ -8720,12 +8715,10 @@ struct GenTreePutArgStk : public GenTreeUnOp #if FEATURE_FASTTAILCALL , gtPutInIncomingArgArea(putInIncomingArgArea) #endif // FEATURE_FASTTAILCALL -#if defined(FEATURE_PUT_STRUCT_ARG_STK) , gtPutArgStkKind(Kind::Invalid) #if defined(TARGET_XARCH) , m_argLoadSizeDelta(UINT8_MAX) #endif -#endif // FEATURE_PUT_STRUCT_ARG_STK { } @@ -8766,7 +8759,6 @@ struct GenTreePutArgStk : public GenTreeUnOp } #endif -#ifdef FEATURE_PUT_STRUCT_ARG_STK unsigned GetStackByteSize() const { return m_byteSize; @@ -8813,9 +8805,6 @@ struct GenTreePutArgStk : public GenTreeUnOp { return gtPutArgStkKind == Kind::Push; } -#else // !FEATURE_PUT_STRUCT_ARG_STK - unsigned GetStackByteSize() const; -#endif // !FEATURE_PUT_STRUCT_ARG_STK #if DEBUGGABLE_GENTREE GenTreePutArgStk() @@ -8831,11 +8820,9 @@ struct GenTreePutArgSplit : public GenTreePutArgStk { unsigned gtNumRegs; - GenTreePutArgSplit(GenTree* op1, - unsigned stackByteOffset, -#if defined(FEATURE_PUT_STRUCT_ARG_STK) - unsigned stackByteSize, -#endif + GenTreePutArgSplit(GenTree* op1, + unsigned stackByteOffset, + unsigned stackByteSize, unsigned numRegs, GenTreeCall* callNode, bool putIncomingArgArea) @@ -8843,9 +8830,7 @@ struct GenTreePutArgSplit : public GenTreePutArgStk TYP_STRUCT, op1, stackByteOffset, -#if defined(FEATURE_PUT_STRUCT_ARG_STK) stackByteSize, -#endif callNode, putIncomingArgArea) , gtNumRegs(numRegs) @@ -9927,50 +9912,6 @@ inline bool GenTree::IsBoxedValue() return (gtOper == GT_BOX) && (gtFlags & GTF_BOX_VALUE); } -#ifdef DEBUG -//------------------------------------------------------------------------ -// IsValidCallArgument: Given an GenTree node that represents an argument -// enforce (or don't enforce) the following invariant. -// -// Arguments: -// instance method for a GenTree node -// -// Return values: -// true: the GenTree node is accepted as a valid argument -// false: the GenTree node is not accepted as a valid argument -// -// Notes: -// For targets that don't support arguments as a list of fields, we do not support GT_FIELD_LIST. -// -// Currently for AMD64 UNIX we allow a limited case where a GT_FIELD_LIST is -// allowed but every element must be a GT_LCL_FLD. -// -// For the future targets that allow for Multireg args (and this includes the current ARM64 target), -// or that allow for passing promoted structs, we allow a GT_FIELD_LIST of arbitrary nodes. -// These would typically start out as GT_LCL_VARs or GT_LCL_FLDS or GT_INDs, -// but could be changed into constants or GT_COMMA trees by the later -// optimization phases. - -inline bool GenTree::IsValidCallArgument() -{ - if (OperIs(GT_FIELD_LIST)) - { -#if !FEATURE_MULTIREG_ARGS && !FEATURE_PUT_STRUCT_ARG_STK - - return false; - -#else // FEATURE_MULTIREG_ARGS or FEATURE_PUT_STRUCT_ARG_STK - - // We allow this GT_FIELD_LIST as an argument - return true; - -#endif // FEATURE_MULTIREG_ARGS or FEATURE_PUT_STRUCT_ARG_STK - } - // We don't have either kind of list, so it satisfies the invariant. - return true; -} -#endif // DEBUG - inline GenTree* GenTree::gtGetOp1() const { return AsOp()->gtOp1; diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index 983103b80cef12..837a15faa21b54 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -345,11 +345,6 @@ typedef ptrdiff_t ssize_t; #define UNIX_LOONGARCH64_ONLY(x) #endif // TARGET_LOONGARCH64 -#if defined(UNIX_AMD64_ABI) || !defined(TARGET_64BIT) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || \ - defined(TARGET_RISCV64) -#define FEATURE_PUT_STRUCT_ARG_STK 1 -#endif - #if defined(UNIX_AMD64_ABI) #define UNIX_AMD64_ABI_ONLY_ARG(x) , x #define UNIX_AMD64_ABI_ONLY(x) x diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index bd8997941de4b7..5f8bdd9f27b3f6 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1693,10 +1693,7 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, const bool putInIncomingArgArea = call->IsFastTailCall(); putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, TYP_VOID, arg, stackSeg.GetStackOffset(), -#ifdef FEATURE_PUT_STRUCT_ARG_STK - stackSeg.GetStackSize(), -#endif - call, putInIncomingArgArea); + stackSeg.GetStackSize(), call, putInIncomingArgArea); } } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index f3d6ad039bcf17..2f99e1a1423f5d 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -669,7 +669,6 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) return; } -#ifdef FEATURE_PUT_STRUCT_ARG_STK if (src->TypeIs(TYP_STRUCT)) { assert(src->OperIs(GT_BLK) || src->OperIsLocalRead()); @@ -756,7 +755,6 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) { return; } -#endif // FEATURE_PUT_STRUCT_ARG_STK assert(!src->TypeIs(TYP_STRUCT)); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 294f67e612fbb3..0cd162a6c5a25c 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -536,11 +536,9 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = 0; break; -#ifdef FEATURE_PUT_STRUCT_ARG_STK case GT_PUTARG_STK: srcCount = BuildPutArgStk(tree->AsPutArgStk()); break; -#endif // FEATURE_PUT_STRUCT_ARG_STK case GT_STORE_BLK: srcCount = BuildBlockStore(tree->AsBlk()); @@ -1679,7 +1677,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) return useCount; } -#ifdef FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK. // @@ -1853,7 +1850,6 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk) return srcCount; } -#endif // FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // BuildLclHeap: Set the NodeInfo for a GT_LCLHEAP. diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 43b2d527c14795..fa5fc9b9aa90e5 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1030,64 +1030,6 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) prevExceptionTree = argx; prevExceptionFlags = exceptionFlags; } - -#if FEATURE_MULTIREG_ARGS - // In "fgMorphMultiRegStructArg" we will expand the arg into a GT_FIELD_LIST with multiple indirections, so - // here we consider spilling it into a local. We also need to spill it in case we have a node that we do not - // currently handle in multi-reg morphing. - // This logic can be skipped when the arg is already in the right multireg arg shape. - // - if (varTypeIsStruct(argx) && !arg.m_needTmp && !argx->OperIs(GT_FIELD_LIST)) - { - if ((arg.AbiInfo.NumRegs > 0) && ((arg.AbiInfo.NumRegs + arg.AbiInfo.GetStackSlotsNumber()) > 1)) - { - if ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) - { - // Spill multireg struct arguments that have stores or calls embedded in them. - SetNeedsTemp(&arg); - } - else if (!argx->OperIsLocalRead() && !argx->OperIsLoad()) - { - // TODO-CQ: handle HWI/SIMD/COMMA nodes in multi-reg morphing. - SetNeedsTemp(&arg); - } - else if (comp->opts.OptimizationEnabled()) - { - // Finally, we call gtPrepareCost to measure the cost of evaluating this tree. - comp->gtPrepareCost(argx); - - if (argx->GetCostEx() > (6 * IND_COST_EX)) - { - // Spill multireg struct arguments that are expensive to evaluate twice. - SetNeedsTemp(&arg); - } - } - } - - // We are only able to expand certain "BLK"s into field lists, so here we spill all the - // "mis-sized" ones. We could in theory support them directly with some arithmetic and - // shifts, but these cases are rare enough that it is probably not worth the complexity. - // No need to do this for stack args as they are directly supported by codegen. - // - if (argx->OperIs(GT_BLK) && (arg.AbiInfo.GetRegNum() != REG_STK)) - { - GenTreeBlk* argObj = argx->AsBlk(); - unsigned structSize = argObj->Size(); - unsigned lastLoadSize = structSize % TARGET_POINTER_SIZE; - - if ((lastLoadSize != 0) && !isPow2(lastLoadSize)) - { -#ifdef TARGET_ARM - // On ARM we don't expand split args larger than 16 bytes into field lists. - if (!arg.AbiInfo.IsSplit() || (structSize <= 16)) -#endif // TARGET_ARM - { - SetNeedsTemp(&arg); - } - } - } - } -#endif // FEATURE_MULTIREG_ARGS } #if FEATURE_FIXED_OUT_ARGS @@ -1601,29 +1543,36 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) } #endif -#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - noway_assert(argx->gtType != TYP_STRUCT); -#endif - - if (argx->OperIs(GT_FIELD_LIST)) + GenTree* argxEffectiveVal = argx->gtEffectiveVal(); + if (argxEffectiveVal->OperIs(GT_FIELD_LIST)) { - GenTreeFieldList* fieldList = argx->AsFieldList(); + GenTreeFieldList* fieldList = argxEffectiveVal->AsFieldList(); fieldList->gtFlags &= ~GTF_ALL_EFFECT; - for (GenTreeFieldList::Use& use : fieldList->Uses()) - { - unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect")); - GenTree* store = comp->gtNewTempStore(tmpVarNum, use.GetNode()); - store->SetMorphed(comp); + auto appendEffect = [=, &setupArg](GenTree* effect) { if (setupArg == nullptr) { - setupArg = store; + setupArg = effect; } else { - setupArg = comp->gtNewOperNode(GT_COMMA, TYP_VOID, setupArg, store); + setupArg = comp->gtNewOperNode(GT_COMMA, TYP_VOID, setupArg, effect); setupArg->SetMorphed(comp); } + }; + + for (GenTree* comma = argx; comma->OperIs(GT_COMMA); comma = comma->gtGetOp2()) + { + appendEffect(comma->gtGetOp1()); + } + + for (GenTreeFieldList::Use& use : fieldList->Uses()) + { + unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect")); + GenTree* store = comp->gtNewTempStore(tmpVarNum, use.GetNode()); + store->SetMorphed(comp); + + appendEffect(store); GenTree* setupUse = comp->gtNewLclvNode(tmpVarNum, genActualType(use.GetNode())); setupUse->SetMorphed(comp); @@ -1700,14 +1649,6 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) defArg = argx; -#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - - // All structs are either passed (and retyped) as integral types, OR they - // are passed by reference. - noway_assert(argx->gtType != TYP_STRUCT); - -#endif // !(defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) - #ifdef DEBUG if (comp->verbose) { @@ -2408,9 +2349,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) // information about late arguments in CallArgs. // This information is used later to construct the late args - // Note that this name a misnomer - it indicates that there are struct args - // that are passed by value in more than one register or on stack. - bool hasMultiregStructArgs = false; for (CallArg& arg : call->gtArgs.Args()) { GenTree** parentArgx = &arg.EarlyNodeRef(); @@ -2453,287 +2391,33 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) argx->gtType = TYP_I_IMPL; } - bool isStructArg = varTypeIsStruct(arg.GetSignatureType()); - GenTree* argObj = argx->gtEffectiveVal(); - bool makeOutArgCopy = false; - - if (argObj->OperIs(GT_FIELD_LIST)) + if (varTypeIsStruct(arg.GetSignatureType()) && !reMorphing) { - // FIELD_LISTs can be created directly by physical promotion. - // Physical promotion will create this shape even for single-reg - // arguments. We strip the field list here for that case as the - // rest of the JIT does not expect single-reg args to be wrapped - // like that. - if (arg.NewAbiInfo.HasExactlyOneRegisterSegment()) + bool makeOutArgCopy = false; + if (arg.NewAbiInfo.IsPassedByReference()) { - GenTreeFieldList* fieldList = argObj->AsFieldList(); - assert(fieldList->Uses().GetHead()->GetNext() == nullptr); - GenTree* node = fieldList->Uses().GetHead()->GetNode(); - - JITDUMP("Replacing single-field FIELD_LIST [%06u] by sole field [%06u]\n", dspTreeID(fieldList), - dspTreeID(node)); - - assert(varTypeUsesSameRegType(node, arg.AbiInfo.ArgType)); - GenTree** effectiveUse = parentArgx; - while ((*effectiveUse)->OperIs(GT_COMMA)) - { - effectiveUse = &(*effectiveUse)->AsOp()->gtOp2; - } - *effectiveUse = node; - - argx = *parentArgx; - argObj = node; + makeOutArgCopy = true; } - } - else if (isStructArg && !reMorphing) - { - unsigned originalSize; - if (argObj->TypeIs(TYP_STRUCT)) + else if (fgTryMorphStructArg(&arg)) { - assert(argObj->OperIs(GT_BLK, GT_LCL_VAR, GT_LCL_FLD)); - originalSize = argObj->GetLayout(this)->GetSize(); + argx = *parentArgx; } else { - originalSize = genTypeSize(argx); - } - - assert(argx->TypeGet() == arg.GetSignatureType()); - assert(originalSize == info.compCompHnd->getClassSize(arg.GetSignatureClassHandle())); - - // First, handle the case where the argument is passed by reference. - if (arg.NewAbiInfo.IsPassedByReference()) - { - assert(arg.AbiInfo.ByteSize == TARGET_POINTER_SIZE); makeOutArgCopy = true; -#ifdef UNIX_AMD64_ABI - assert(!"Structs are not passed by reference on x64/ux"); -#endif // UNIX_AMD64_ABI } - else // This is passed by value. - { - unsigned structSize = originalSize; - unsigned passingSize = originalSize; - - // Check to see if we can transform this struct load (GT_BLK) into a GT_IND of the appropriate size. - // When it can do this is platform-dependent: - // - In general, it can be done for power of 2 structs that fit in a single register. - // - For ARM and ARM64 it must also be a non-HFA struct, or have a single field. - // - This is irrelevant for X86, since structs are always passed by value on the stack. - // - var_types structBaseType = arg.AbiInfo.ArgType; - bool argIsLocal = argObj->OperIsLocalRead(); - bool canTransform = false; - - if (structBaseType != TYP_STRUCT) - { - if (isPow2(passingSize)) - { - canTransform = - (!arg.AbiInfo.IsHfaArg() || (passingSize == genTypeSize(arg.AbiInfo.GetHfaType()))); - } - else - { - // We can pass non-power-of-2 structs in a register, but we can only transform in that - // case if the arg is a local. - canTransform = argIsLocal; - passingSize = genTypeSize(structBaseType); - } - } -#if !defined(TARGET_X86) - else - { - hasMultiregStructArgs = true; - } -#endif // !TARGET_X86 - if (!canTransform) - { -#if defined(TARGET_AMD64) -#ifndef UNIX_AMD64_ABI - // On Windows structs are always copied and passed by reference (handled above) unless they are - // passed by value in a single register. - assert(arg.AbiInfo.GetStackSlotsNumber() == 1); - makeOutArgCopy = true; -#else // UNIX_AMD64_ABI - // On Unix, structs are always passed by value. - // We only need a copy if we have one of the following: - // - The sizes don't match for a non-lclVar argument. - // - We have a known struct type (e.g. SIMD) that requires multiple registers. - // TODO-Amd64-Unix-Throughput: We don't need to keep the structDesc in the argEntry if it's not - // actually passed in registers. - if (arg.AbiInfo.IsPassedInRegisters()) - { - if (argObj->OperIs(GT_BLK)) - { - if (passingSize != structSize) - { - makeOutArgCopy = true; - } - } - else if (!argIsLocal) - { - // This should only be the case of a value directly producing a known struct type. - assert(argObj->TypeGet() != TYP_STRUCT); - if (arg.AbiInfo.NumRegs > 1) - { - makeOutArgCopy = true; - } - } - } -#endif // UNIX_AMD64_ABI -#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if ((passingSize != structSize) && !argIsLocal) - { - makeOutArgCopy = true; - } -#endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - } - else if (argObj->TypeGet() != structBaseType) - { - // We have a struct argument that fits into a register, and it is either a power of 2, - // or a local. - // Change our argument, as needed, into a value of the appropriate type. - assert(structBaseType != TYP_STRUCT); - - // On RISC-V / LoongArch the passing size may be smaller than the original size if we pass a struct - // according to hardware FP calling convention and it has empty fields -#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) - assert(genTypeSize(structBaseType) >= originalSize); -#endif - - if (argObj->OperIsLoad()) - { - assert(argObj->AsIndir()->Size() == genTypeSize(structBaseType)); - argObj->SetOper(GT_IND); - // Use ChangeType over argx to update types in COMMAs as well - argx->ChangeType(structBaseType); - } - else if (argObj->OperIsLocalRead()) - { - unsigned lclNum = argObj->AsLclVarCommon()->GetLclNum(); - LclVarDsc* varDsc = lvaGetDesc(lclNum); - unsigned lclOffset = argObj->AsLclVarCommon()->GetLclOffs(); - unsigned argLclNum = BAD_VAR_NUM; - LclVarDsc* argVarDsc = nullptr; - - if (varDsc->lvPromoted) - { - argLclNum = lvaGetFieldLocal(varDsc, lclOffset); - } - else if (lclOffset == 0) - { - argLclNum = lclNum; - } - - // See if this local goes into the right register file. - // TODO-CQ: we could use a bitcast here, if it does not. - if (argLclNum != BAD_VAR_NUM) - { - argVarDsc = lvaGetDesc(argLclNum); - if ((genTypeSize(argVarDsc) != originalSize) || - !varTypeUsesSameRegType(argVarDsc, structBaseType)) - { - argLclNum = BAD_VAR_NUM; - } - } - - if (argLclNum != BAD_VAR_NUM) - { - argx->ChangeType(argVarDsc->TypeGet()); - argObj->SetOper(GT_LCL_VAR); - argObj->AsLclVar()->SetLclNum(argLclNum); - } - else if (varDsc->lvPromoted) - { - // Preserve independent promotion of "argObj" by decomposing the copy. - // TODO-CQ: condition this on the promotion actually being independent. - makeOutArgCopy = true; - } -#ifdef TARGET_AMD64 - else if (!argObj->OperIs(GT_LCL_VAR) || !argObj->TypeIs(TYP_SIMD8)) // Handled by lowering. -#else // !TARGET_ARM64 - else -#endif // !TARGET_ARM64 - { - // TODO-CQ: perform this transformation in lowering instead of here and - // avoid marking enregisterable structs DNER. - argx->ChangeType(structBaseType); - if (argObj->OperIs(GT_LCL_VAR)) - { - argObj->SetOper(GT_LCL_FLD); -#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) - // Single-field structs passed according to floating-point calling convention may have - // padding in front of the field passed in a register - assert(arg.NewAbiInfo.NumSegments == 1); - // Single-slot structs passed according to integer calling convention also go through - // here but since they always have zero offset nothing should change - ABIPassingSegment seg = arg.NewAbiInfo.Segment(0); - assert((seg.IsPassedInRegister() && genIsValidFloatReg(seg.GetRegister())) || - (seg.Offset == 0)); - argObj->AsLclFld()->SetLclOffs(seg.Offset); -#endif - } - lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::SwizzleArg)); - } - } + if (makeOutArgCopy) + { + fgMakeOutgoingStructArgCopy(call, &arg); - assert(varTypeIsEnregisterable(argObj) || - (makeOutArgCopy && varTypeIsEnregisterable(structBaseType))); - } - else if (argObj->OperIs(GT_LCL_VAR) && lvaGetDesc(argObj->AsLclVar())->lvPromoted) + if (arg.GetEarlyNode() != nullptr) { - // Set DNER to block independent promotion. - lvaSetVarDoNotEnregister(argObj->AsLclVar()->GetLclNum() - DEBUGARG(DoNotEnregisterReason::IsStructArg)); + flagsSummary |= arg.GetEarlyNode()->gtFlags; } } } - if (makeOutArgCopy) - { - fgMakeOutgoingStructArgCopy(call, &arg); - - if (arg.GetEarlyNode() != nullptr) - { - flagsSummary |= arg.GetEarlyNode()->gtFlags; - } - } - -#if FEATURE_MULTIREG_ARGS - if (!isStructArg) - { -#ifdef TARGET_ARM - if ((arg.AbiInfo.ArgType == TYP_LONG) || (arg.AbiInfo.ArgType == TYP_DOUBLE)) - { - assert((arg.AbiInfo.NumRegs == 2) || (arg.AbiInfo.GetStackSlotsNumber() == 2)); - } - else -#endif - { - // We must have exactly one register or slot. - assert(((arg.AbiInfo.NumRegs == 1) && (arg.AbiInfo.GetStackSlotsNumber() == 0)) || - ((arg.AbiInfo.NumRegs == 0) && (arg.AbiInfo.GetStackSlotsNumber() == 1))); - } - } -#endif - -#if defined(TARGET_X86) - if (isStructArg) - { - if (argx->OperIs(GT_LCL_VAR) && - (lvaGetPromotionType(argx->AsLclVar()->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT)) - { - argx = fgMorphLclArgToFieldlist(argx->AsLclVar()); - arg.SetEarlyNode(argx); - } - else if (argx->OperIs(GT_LCL_FLD)) - { - lvaSetVarDoNotEnregister(argx->AsLclFld()->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); - } - } -#endif // TARGET_X86 - flagsSummary |= arg.GetEarlyNode()->gtFlags; } // end foreach argument loop @@ -2779,11 +2463,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) call->gtArgs.EvalArgsToTemps(this, call); } - if (hasMultiregStructArgs) - { - fgMorphMultiregStructArgs(call); - } - #ifdef DEBUG if (verbose) { @@ -2802,83 +2481,29 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) #endif //----------------------------------------------------------------------------- -// fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and -// call fgMorphMultiregStructArg on each of them. +// fgTryMorphStructArg: +// Given a varTypeIsStruct argument, try to morph it into a shape that the +// backend supports. // // Arguments: -// call : a GenTreeCall node that has one or more TYP_STRUCT arguments\. +// arg - The argument // -// Notes: -// We only call fgMorphMultiregStructArg for struct arguments that are not passed as simple types. -// It will ensure that the struct arguments are in the correct form. -// If this method fails to find any TYP_STRUCT arguments it will assert. -// -void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) -{ - bool foundStructArg = false; - GenTreeFlags flagsSummary = GTF_EMPTY; - -#ifdef TARGET_X86 - assert(!"Logic error: no MultiregStructArgs for X86"); -#endif -#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI"); -#endif - - for (CallArg& arg : call->gtArgs.Args()) - { - if ((arg.AbiInfo.ArgType == TYP_STRUCT) && !arg.NewAbiInfo.IsPassedByReference()) - { - foundStructArg = true; - GenTree*& argx = arg.NodeRef(); - - if (!argx->OperIs(GT_FIELD_LIST)) - { - argx = fgMorphMultiregStructArg(&arg); - } - } - } - - // We should only call this method when we actually have one or more multireg struct args - assert(foundStructArg); - - // Update the flags - call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT); -} - -//----------------------------------------------------------------------------- -// fgMorphMultiregStructArg: Given a TYP_STRUCT arg from a call argument list, -// morph the argument as needed to be passed correctly. +// Returns: +// False if the argument cannot be put into a shape supported by the backend. // -// Arguments: -// arg - The argument containing a struct node. +// Remarks: +// The backend requires register-passed arguments to be of FIELD_LIST shape +// with one primitive node for each register. If the argument is passed in +// only one register, then the FIELD_LIST should be directly replaced by its +// operand. For stack-passed arguments the backend supports struct-typed +// arguments directly. // -// Notes: -// The arg node must be a GT_BLK or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT. -// If arg node is a lclVar passed on the stack, we will ensure that any lclVars that must be on the -// stack are marked as doNotEnregister, and then we return. -// -// If it is passed by register, we mutate the argument into the GT_FIELD_LIST form -// which is only used for struct arguments. -// -// If arg is a LclVar we check if it is struct promoted and has the right number of fields -// and if they are at the appropriate offsets we will use the struct promted fields -// in the GT_FIELD_LIST nodes that we create. -// If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements -// we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct -// this also forces the struct to be stack allocated into the local frame. -// For the GT_BLK case will clone the address expression and generate two (or more) -// indirections. -// -GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) +bool Compiler::fgTryMorphStructArg(CallArg* arg) { - GenTree* argNode = arg->GetNode(); + GenTree** use = GenTree::EffectiveUse(&arg->NodeRef()); + GenTree* argNode = *use; assert(varTypeIsStruct(argNode)); -#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) - NYI("fgMorphMultiregStructArg requires implementation for this target"); -#endif - bool isSplit = arg->NewAbiInfo.IsSplitAcrossRegistersAndStack(); #ifdef TARGET_ARM if ((isSplit && (arg->NewAbiInfo.CountRegsAndStackSlots() > 4)) || @@ -2893,7 +2518,14 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) // TODO-Arm-CQ: support decomposing "large" promoted structs into field lists. if (!isSplit) { - argNode = fgMorphLclArgToFieldlist(argNode->AsLclVar()); + GenTreeFieldList* fieldList = fgMorphLclArgToFieldList(argNode->AsLclVar()); + // TODO-Cleanup: The containment/reg optionality for x86 is + // conservative in the "no field list" case. +#ifdef TARGET_X86 + *use = fieldList; +#else + *use = fieldList->SoleFieldOrThis(); +#endif } else { @@ -2905,39 +2537,37 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) { lvaSetVarDoNotEnregister(argNode->AsLclFld()->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); } + else if (argNode->OperIs(GT_BLK)) + { + ClassLayout* layout = argNode->AsBlk()->GetLayout(); - return argNode; - } + var_types primitiveType = layout->GetRegisterType(); + if (primitiveType != TYP_UNDEF) + { + JITDUMP("Converting argument [%06u] to primitive indirection\n", dspTreeID(argNode)); -#if FEATURE_MULTIREG_ARGS - ClassLayout* layout = argNode->TypeIs(TYP_STRUCT) ? argNode->GetLayout(this) : nullptr; - unsigned structSize = argNode->TypeIs(TYP_STRUCT) ? layout->GetSize() : genTypeSize(argNode); + argNode->SetOper(GT_IND); + argNode->gtType = primitiveType; + } + } - if (layout != nullptr) - { - assert(ClassLayout::AreCompatible(typGetObjLayout(arg->GetSignatureClassHandle()), layout)); - } - else - { - assert(varTypeIsSIMD(argNode) && varTypeIsSIMD(arg->GetSignatureType())); + return true; } - // We should still have a TYP_STRUCT - assert(varTypeIsStruct(argNode)); - - GenTreeFieldList* newArg = nullptr; + GenTree* newArg = nullptr; - // Are we passing a struct LclVar? - // if (argNode->OperIs(GT_LCL_VAR)) { GenTreeLclVarCommon* lclNode = argNode->AsLclVarCommon(); unsigned lclNum = lclNode->GetLclNum(); LclVarDsc* varDsc = lvaGetDesc(lclNum); - varDsc->lvIsMultiRegArg = true; + if (!arg->NewAbiInfo.HasExactlyOneRegisterSegment()) + { + varDsc->lvIsMultiRegArg = true; + } - JITDUMP("Multireg struct argument V%02u : ", lclNum); + JITDUMP("Struct argument V%02u: ", lclNum); JITDUMPEXEC(arg->Dump(this)); // Try to see if we can use the promoted fields to pass this argument. @@ -2964,7 +2594,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) { // TODO-CQ: We should be able to tolerate mismatches by inserting GT_BITCAST in lowering. // - JITDUMP("Multireg struct V%02u will be passed using GT_LCL_FLD because of type mismatch: " + JITDUMP("Struct V%02u will be passed using GT_LCL_FLD because of type mismatch: " "register type is %s, field local V%02u's type is %s\n", lclNum, varTypeName(regType), fieldLclNum, varTypeName(fieldType)); fieldsMatch = false; @@ -2986,19 +2616,80 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) if (fieldsMatch) { - newArg = fgMorphLclArgToFieldlist(lclNode); + newArg = fgMorphLclArgToFieldList(lclNode)->SoleFieldOrThis(); } } } + else if (argNode->OperIsFieldList()) + { + // We can already see a field list here if physical promotion created it. + // Physical promotion will also create single-field field lists which + // the backend does not expect, so fix that here. + newArg = argNode->AsFieldList()->SoleFieldOrThis(); + if (newArg == argNode) + { + return true; + } + } // If we were not able to use the promoted fields... // if (newArg == nullptr) { - if (!arg->NewAbiInfo.HasAnyRegisterSegment()) + bool isUseOfIndependentlyPromotedStruct = + argNode->OperIs(GT_LCL_VAR) && + (lvaGetPromotionType(argNode->AsLclVarCommon()->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT); + if (isUseOfIndependentlyPromotedStruct) + { + if (arg->NewAbiInfo.HasExactlyOneRegisterSegment()) + { + // We already tried to use the fields above, but that failed. + // Here we prefer to create a copy to avoid DNER'ing the local. + // That turns out to be profitable mostly for single-register arguments. + return false; + } + } + else if (!argNode->TypeIs(TYP_STRUCT) && arg->NewAbiInfo.HasExactlyOneRegisterSegment()) { - // We leave this stack passed argument alone. - return argNode; + // This can be treated primitively. Leave it alone. + return true; + } + + if (!argNode->OperIsLocalRead() && !argNode->OperIsLoad()) + { + // A node we do not know how to turn into multiple registers. + // Usually HWINTRINSIC. Bail. + return false; + } + + ClassLayout* layout = argNode->TypeIs(TYP_STRUCT) ? argNode->GetLayout(this) : nullptr; + unsigned structSize = argNode->TypeIs(TYP_STRUCT) ? layout->GetSize() : genTypeSize(argNode); + + if (layout != nullptr) + { + assert(ClassLayout::AreCompatible(typGetObjLayout(arg->GetSignatureClassHandle()), layout)); + } + else + { + assert(varTypeIsSIMD(argNode) && varTypeIsSIMD(arg->GetSignatureType())); + } + + if (argNode->OperIsLoad()) + { + unsigned lastLoadSize = structSize % TARGET_POINTER_SIZE; + if ((lastLoadSize != 0) && !isPow2(lastLoadSize)) + { + // Cannot read this size from a non-local. Bail. + return false; + } + + GenTree* indirAddr = argNode->AsIndir()->Addr(); + if (((indirAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) && + (arg->NewAbiInfo.CountRegsAndStackSlots() > 1)) + { + // Cannot create multiple uses of the address. Bail. + return false; + } } auto createSlotAccess = [=](unsigned offset, var_types type) -> GenTree* { @@ -3006,16 +2697,10 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) if (type == TYP_UNDEF) { - if ((structSize - offset) < TARGET_POINTER_SIZE) + unsigned sizeLeft = structSize - offset; + if (sizeLeft < TARGET_POINTER_SIZE) { - // ArgsComplete has made it so that for loads from memory - // we will only see the easily handleable cases here, For - // locals we may see odd sizes, but for those we can load - // "too much" from the stack frame, and thus can just round - // up the size. - assert(isPow2(structSize - offset) || argNode->OperIsLocalRead()); - - switch (structSize - offset) + switch (sizeLeft) { case 1: type = TYP_UBYTE; @@ -3038,7 +2723,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) } #ifdef TARGET_ARM64 - if (argNode->OperIsLocalRead()) + if ((offset > 0) && argNode->OperIsLocalRead()) { // For arm64 it's beneficial to consider all tails to // be TYP_I_IMPL to allow more ldp's. @@ -3058,10 +2743,27 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) if (argNode->OperIsLocalRead()) { - GenTreeLclFld* lclFld = gtNewLclFldNode(argNode->AsLclVarCommon()->GetLclNum(), genActualType(type), - argNode->AsLclVarCommon()->GetLclOffs() + offset); - lclFld->SetMorphed(this); - return lclFld; + GenTreeLclVarCommon* lclVar = argNode->AsLclVarCommon(); + LclVarDsc* dsc = lvaGetDesc(lclVar); + GenTree* result; + // We sometimes end up with struct reinterpretations where the + // retyping into a primitive allows us to replace by a scalar + // local here, so make sure we do that if possible. + if ((lclVar->GetLclOffs() == 0) && (offset == 0) && (genTypeSize(type) == genTypeSize(dsc))) + { + result = gtNewLclVarNode(lclVar->GetLclNum()); + } + else + { + result = gtNewLclFldNode(lclVar->GetLclNum(), type, lclVar->GetLclOffs() + offset); + + if (!dsc->lvDoNotEnregister) + { + lvaSetVarDoNotEnregister(lclVar->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + } + } + result->SetMorphed(this); + return result; } else { @@ -3075,8 +2777,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) } else { - assert((indirAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); - GenTree* indirAddrDup = gtCloneExpr(indirAddr); GenTree* offsetNode = gtNewIconNode(offset, TYP_I_IMPL); addr = gtNewOperNode(GT_ADD, indirAddr->TypeGet(), indirAddrDup, offsetNode); @@ -3100,7 +2800,8 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) // createSlotAccess get the type from the layout. var_types slotType = varTypeUsesFloatReg(regType) ? regType : TYP_UNDEF; GenTree* access = createSlotAccess(seg.Offset, slotType); - newArg->AddField(this, access, seg.Offset, access->TypeGet()); + + newArg->AsFieldList()->AddField(this, access, seg.Offset, access->TypeGet()); } else { @@ -3108,33 +2809,24 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) { unsigned layoutOffset = seg.Offset + slotOffset; GenTree* access = createSlotAccess(layoutOffset, TYP_UNDEF); - newArg->AddField(this, access, layoutOffset, access->TypeGet()); + + newArg->AsFieldList()->AddField(this, access, layoutOffset, access->TypeGet()); } } } - if (argNode->OperIsLocalRead()) - { - lvaSetVarDoNotEnregister(argNode->AsLclVarCommon()->GetLclNum() - DEBUGARG(DoNotEnregisterReason::LocalField)); - } + newArg = newArg->AsFieldList()->SoleFieldOrThis(); } - // If we reach here we should have set newArg to something - noway_assert(newArg != nullptr); - - JITDUMP("fgMorphMultiregStructArg created tree:\n"); + JITDUMP("fgTryMorphStructArg created tree:\n"); DISPTREE(newArg); - argNode = newArg; // consider calling fgMorphTree(newArg); - -#endif // FEATURE_MULTIREG_ARGS - - return argNode; + *use = newArg; + return true; } //------------------------------------------------------------------------ -// fgMorphLclArgToFieldlist: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields +// fgMorphLclArgToFieldList: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields // // Arguments: // lcl - The GT_LCL_VAR node we will transform @@ -3142,7 +2834,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) // Return value: // The new GT_FIELD_LIST that we have created. // -GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl) +GenTreeFieldList* Compiler::fgMorphLclArgToFieldList(GenTreeLclVarCommon* lcl) { LclVarDsc* varDsc = lvaGetDesc(lcl); assert(varDsc->lvPromoted); @@ -3150,6 +2842,8 @@ GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl) unsigned fieldLclNum = varDsc->lvFieldLclStart; GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + fieldList->SetMorphed(this); + for (unsigned i = 0; i < fieldCount; i++) { LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum); @@ -3158,7 +2852,6 @@ GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl) fieldList->AddField(this, lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->TypeGet()); fieldLclNum++; } - fieldList->SetMorphed(this); return fieldList; } @@ -3322,29 +3015,42 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) GenTree* copyBlk = gtNewStoreLclVarNode(tmp, argx); copyBlk = fgMorphCopyBlock(copyBlk); + GenTree* argNode; + if (arg->NewAbiInfo.IsPassedByReference()) + { + argNode = gtNewLclVarAddrNode(tmp); + lvaSetVarAddrExposed(tmp DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS)); + } + else + { + argNode = gtNewLclvNode(tmp, lvaGetDesc(tmp)->TypeGet()); + } + argNode->SetMorphed(this); + #if FEATURE_FIXED_OUT_ARGS // For fixed out args we create the setup node here; EvalArgsToTemps knows // to handle the case of "already have a setup node" properly. arg->SetEarlyNode(copyBlk); - GenTree* argNode = call->gtArgs.MakeTmpArgNode(this, arg, tmp); - argNode->SetMorphed(this); arg->SetLateNode(argNode); #else // !FEATURE_FIXED_OUT_ARGS // Structs are always on the stack, and thus never need temps // so we have to put the copy and temp all into one expression. - GenTree* argNode = call->gtArgs.MakeTmpArgNode(this, arg, tmp); - argNode->SetMorphed(this); - // Change the expression to "(tmp=val),tmp" argNode = gtNewOperNode(GT_COMMA, argNode->TypeGet(), copyBlk, argNode); argNode->SetMorphed(this); arg->SetEarlyNode(argNode); - #endif // !FEATURE_FIXED_OUT_ARGS + + if (!arg->NewAbiInfo.IsPassedByReference()) + { + bool morphed = fgTryMorphStructArg(arg); + // Should always succeed for an unpromoted local. + assert(morphed); + } } /***************************************************************************** From 471039bb0aec2add2fba6c07e0e8ee0adb0d3d83 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 16 Feb 2025 15:52:59 +0100 Subject: [PATCH 2/6] Delete more unnecessary code --- src/coreclr/jit/gentree.h | 2 -- src/coreclr/jit/morph.cpp | 45 --------------------------------------- 2 files changed, 47 deletions(-) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 904976fa50be38..93578a3cd3d2ba 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4951,8 +4951,6 @@ class CallArgs void SetNeedsTemp(CallArg* arg); bool IsNonStandard(Compiler* comp, GenTreeCall* call, CallArg* arg); - GenTree* MakeTmpArgNode(Compiler* comp, CallArg* arg, unsigned lclNum); - // clang-format off bool HasThisPointer() const { return m_hasThisPointer; } bool HasRetBuffer() const { return m_hasRetBuffer; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index fa5fc9b9aa90e5..0b3941461dcb5b 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1417,51 +1417,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) assert(argsRemaining == 0); } -//------------------------------------------------------------------------------ -// MakeTmpArgNode: -// Create a temp for an argument if needed. We usually need this to be done -// in order to enforce ordering of the evaluation of arguments. -// -// Return Value: -// the newly created temp var tree. -// -GenTree* CallArgs::MakeTmpArgNode(Compiler* comp, CallArg* arg, unsigned lclNum) -{ - LclVarDsc* varDsc = comp->lvaGetDesc(lclNum); - var_types argType = varDsc->TypeGet(); - assert(genActualType(argType) == genActualType(arg->GetSignatureType())); - - GenTree* argNode = nullptr; - - if (varTypeIsStruct(argType)) - { - if (arg->NewAbiInfo.IsPassedByReference()) - { - argNode = comp->gtNewLclVarAddrNode(lclNum); - comp->lvaSetVarAddrExposed(lclNum DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS)); - } - // TODO-CQ: currently this mirrors the logic in "fgMorphArgs", but actually we only need - // this retyping for args passed in a single register: "(NumRegs == 1) && !IsSplit()". - else if (arg->AbiInfo.ArgType != TYP_STRUCT) - { - argNode = comp->gtNewLclFldNode(lclNum, arg->AbiInfo.ArgType, 0); - comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::SwizzleArg)); - } - else - { - // We are passing this struct by value in multiple registers and/or on stack. - argNode = comp->gtNewLclvNode(lclNum, argType); - } - } - else - { - assert(!arg->NewAbiInfo.IsPassedByReference()); - argNode = comp->gtNewLclvNode(lclNum, argType); - } - - return argNode; -} - //------------------------------------------------------------------------------ // EvalArgsToTemps: Handle arguments that were marked as requiring temps. // From f3ec1406428388614ac5bd003861f33ea8b13456 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 16 Feb 2025 16:01:32 +0100 Subject: [PATCH 3/6] Run jit-format --- src/coreclr/jit/morph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 0b3941461dcb5b..d0d8eb3dd7c783 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2989,7 +2989,7 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) arg->SetEarlyNode(copyBlk); arg->SetLateNode(argNode); -#else // !FEATURE_FIXED_OUT_ARGS +#else // !FEATURE_FIXED_OUT_ARGS // Structs are always on the stack, and thus never need temps // so we have to put the copy and temp all into one expression. From 1f293aa82d7abcb8ad9862b5bae12bfd145f7c0d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 16 Feb 2025 17:56:44 +0100 Subject: [PATCH 4/6] Fix mismatched register type --- src/coreclr/jit/morph.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index d0d8eb3dd7c783..f28b5315c73ffa 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2704,7 +2704,8 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg) // We sometimes end up with struct reinterpretations where the // retyping into a primitive allows us to replace by a scalar // local here, so make sure we do that if possible. - if ((lclVar->GetLclOffs() == 0) && (offset == 0) && (genTypeSize(type) == genTypeSize(dsc))) + if ((lclVar->GetLclOffs() == 0) && (offset == 0) && (genTypeSize(type) == genTypeSize(dsc)) && + varTypeUsesSameRegType(type, dsc)) { result = gtNewLclVarNode(lclVar->GetLclNum()); } From 4f1471863982acfc5e43b975d04cbf665cad4bf6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 16 Feb 2025 22:32:14 +0100 Subject: [PATCH 5/6] Set DNER --- src/coreclr/jit/morph.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index f28b5315c73ffa..03daec446aed35 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1566,6 +1566,8 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) { // Create a GT_LCL_FLD using the wider type to go to the late argument list defArg = comp->gtNewLclFldNode(tmpVarNum, scalarType, 0); + + comp->lvaSetVarDoNotEnregister(tmpVarNum DEBUGARG(DoNotEnregisterReason::LocalField)); } else { From bd5dd6dee5c90fe217adb27ee3b8d9f346ae65bd Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 17 Feb 2025 01:10:56 +0100 Subject: [PATCH 6/6] Update type of commas --- src/coreclr/jit/morph.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 03daec446aed35..24baa3519e073e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2508,6 +2508,8 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg) } } + // Potentially update commas + arg->GetNode()->ChangeType((*use)->TypeGet()); return true; } @@ -2780,6 +2782,8 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg) DISPTREE(newArg); *use = newArg; + // Potentially update commas + arg->GetNode()->ChangeType((*use)->TypeGet()); return true; }