Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace successive "ldr" and "str" instructions with "ldp" and "stp" #77540

Merged
merged 30 commits into from
Jan 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
b88ff31
Replace successive "ldr" and "str" instructions with "ldp" and "stp"
AndyJGraham Sep 6, 2022
f0c918c
No longer use a temporary buffer to build the optimized instruction.
AndyJGraham Oct 31, 2022
f1b236e
Addressed assorted review comments.
AndyJGraham Nov 1, 2022
c0533bd
Now optimizes ascending locations and decending locations with
AndyJGraham Nov 3, 2022
372ee97
Modification to remove last instructions.
AndyJGraham Nov 14, 2022
12fc291
Merge branch 'main'
AndyJGraham Nov 15, 2022
0b377ed
Ongoing improvements to remove previously-emitted instruction
AndyJGraham Nov 29, 2022
46b85f8
Stopped optimization of consecutive instructions that straddled an in…
AndyJGraham Dec 1, 2022
e4741f9
Addressed code change requests in GitHub.
AndyJGraham Dec 1, 2022
2822f64
Merge branch 'main'
AndyJGraham Dec 1, 2022
10a4510
Various fixes to ldp/stp optimization
BruceForstall Dec 2, 2022
d80a69a
Merge pull request #1 from BruceForstall/LdpStp_Modifications_Fixes
AndyJGraham Dec 5, 2022
f6a49bf
Delete unnecessary and incorrect assert
BruceForstall Dec 7, 2022
ed4d070
Merge pull request #2 from BruceForstall/LdpStp_Modifications_FixAsse…
AndyJGraham Dec 7, 2022
4b0e51e
Diagnostic change only, to confirm whether a theory is correct or
AndyJGraham Dec 9, 2022
2997a8e
Revert "Diagnostic change only, to confirm whether a theory is correc…
AndyJGraham Dec 14, 2022
f0907cc
Do not merge. Temporarily removed calls to
AndyJGraham Dec 14, 2022
c5c4234
Modifications to better update the IP mapping table for a replaced in…
AndyJGraham Dec 15, 2022
bb8fdea
Merge branch 'main' of ssh://gerrit.oss.arm.com/enterprise-llt/dotnet…
AndyJGraham Dec 16, 2022
65eed90
Minor formatting change.
AndyJGraham Dec 16, 2022
e03b375
Check for out of range offsets
a74nh Jan 10, 2023
2cef6fc
Don't optimise during prolog/epilog
a74nh Jan 16, 2023
41a9828
Merge branch 'dotnet:main' into LdpStp_Modifications
a74nh Jan 16, 2023
ba89fd3
Fix windows build error
a74nh Jan 16, 2023
1fbf423
Merge branch main
a74nh Jan 19, 2023
ca9a325
IGF_HAS_REMOVED_INSTR is ARM64 only
a74nh Jan 20, 2023
e66ad66
Add OptimizeLdrStr function
a74nh Jan 20, 2023
8b44843
Fix formatting
a74nh Jan 20, 2023
2e7aaf6
Ensure local variables are tracked
a74nh Jan 24, 2023
fe76782
Don't peephole local variables
a74nh Jan 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/

#ifdef DEBUG
void genIPmappingDisp(unsigned mappingNum, IPmappingDsc* ipMapping);
void genIPmappingDisp(unsigned mappingNum, const IPmappingDsc* ipMapping);
void genIPmappingListDisp();
#endif // DEBUG

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7042,7 +7042,7 @@ const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsi
* Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number.
*/

void CodeGen::genIPmappingDisp(unsigned mappingNum, IPmappingDsc* ipMapping)
void CodeGen::genIPmappingDisp(unsigned mappingNum, const IPmappingDsc* ipMapping)
{
if (mappingNum != unsigned(-1))
{
Expand Down
91 changes: 90 additions & 1 deletion src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,22 @@ void emitLocation::CaptureLocation(emitter* emit)
assert(Valid());
}

void emitLocation::SetLocation(insGroup* _ig, unsigned _codePos)
{
ig = _ig;
codePos = _codePos;

assert(Valid());
}

void emitLocation::SetLocation(emitLocation newLocation)
{
ig = newLocation.ig;
codePos = newLocation.codePos;

assert(Valid());
}

bool emitLocation::IsCurrentLocation(emitter* emit) const
{
assert(Valid());
Expand All @@ -50,6 +66,11 @@ int emitLocation::GetInsNum() const
return emitGetInsNumFromCodePos(codePos);
}

int emitLocation::GetInsOffset() const
{
return emitGetInsOfsFromCodePos(codePos);
}

// Get the instruction offset in the current instruction group, which must be a funclet prolog group.
// This is used to find an instruction offset used in unwind data.
// TODO-AMD64-Bug?: We only support a single main function prolog group, but allow for multiple funclet prolog
Expand Down Expand Up @@ -798,6 +819,7 @@ insGroup* emitter::emitSavIG(bool emitAdd)

assert((ig->igFlags & IGF_PLACEHOLDER) == 0);
ig->igData = id;
INDEBUG(ig->igDataSize = gs;)

memcpy(id, emitCurIGfreeBase, sz);

Expand Down Expand Up @@ -8718,6 +8740,14 @@ UNATIVE_OFFSET emitter::emitCodeOffset(void* blockPtr, unsigned codePos)
{
of = ig->igSize;
}
#ifdef TARGET_ARM64
else if ((ig->igFlags & IGF_HAS_REMOVED_INSTR) != 0 && no == ig->igInsCnt + 1U)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now, can we put in #ifdef TARGET_ARM64? Likewise, put #define IGF_HAS_REMOVED_INSTR 0x2000 under TARGET_ARM64? I see TP regression on x64 otherwise:

MicrosoftTeams-image (16)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. I made emitRemoveLastInstruction() ARM64 only too, rather than have it only set IGF_HAS_REMOVED_INSTR on ARM64.

{
// This can happen if a instruction was replaced, but the replacement couldn't fit into
// the same IG and instead was place in a new IG.
return ig->igNext->igOffs + emitFindOffset(ig->igNext, 1);
}
#endif
else if (ig->igFlags & IGF_UPD_ISZ)
{
/*
Expand All @@ -8736,7 +8766,6 @@ UNATIVE_OFFSET emitter::emitCodeOffset(void* blockPtr, unsigned codePos)
// printf("[IG=%02u;ID=%03u;OF=%04X] <= %08X\n", ig->igNum, emitGetInsNumFromCodePos(codePos), of, codePos);

/* Make sure the offset estimate is accurate */

assert(of == emitFindOffset(ig, emitGetInsNumFromCodePos(codePos)));
}

Expand Down Expand Up @@ -9192,6 +9221,66 @@ void emitter::emitNxtIG(bool extend)
#endif
}

//------------------------------------------------------------------------
// emitRemoveLastInstruction: Remove the last instruction emitted; it has been optimized away by the
// next instruction we are generating. `emitLastIns` must be non-null, meaning there is a
// previous instruction. The previous instruction might have already been saved, or it might
// be in the currently accumulating insGroup buffer.
//
// The `emitLastIns` is set to nullptr after this function. It is expected that a new instruction
// will be immediately generated after this, which will set it again.
//
// Removing an instruction can invalidate any captured emitter location
// (using emitLocation::CaptureLocation()) after the instruction was generated. This is because the
// emitLocation stores the current IG instruction number and code size. If the instruction is
// removed and not replaced (e.g., it is at the end of the IG, and any replacement creates a new
// EXTEND IG), then the saved instruction number is incorrect. The IGF_HAS_REMOVED_INSTR flag is
// used to check for this later.
//
// NOTE: It is expected that the GC effect of the removed instruction will be handled by the newly
// generated replacement(s).
//
#ifdef TARGET_ARM64
void emitter::emitRemoveLastInstruction()
{
assert(emitLastIns != nullptr);
assert(emitLastInsIG != nullptr);

JITDUMP("Removing saved instruction in %s:\n> ", emitLabelString(emitLastInsIG));
JITDUMPEXEC(dispIns(emitLastIns))

// We should assert it's not a jmp, as that would require updating the jump lists, e.g. emitCurIGjmpList.

BYTE* lastInsActualStartAddr = (BYTE*)emitLastIns - m_debugInfoSize;
unsigned short lastCodeSize = (unsigned short)emitLastIns->idCodeSize();

// Check that a new buffer hasn't been create since the last instruction was emitted.
assert((emitCurIGfreeBase <= lastInsActualStartAddr) && (lastInsActualStartAddr < emitCurIGfreeEndp));

// Ensure the current IG is non-empty.
assert(emitCurIGnonEmpty());
assert(lastInsActualStartAddr < emitCurIGfreeNext);
assert(emitCurIGinsCnt >= 1);
assert(emitCurIGsize >= emitLastIns->idCodeSize());

size_t insSize = emitCurIGfreeNext - lastInsActualStartAddr;

emitCurIGfreeNext = lastInsActualStartAddr;
emitCurIGinsCnt -= 1;
emitInsCount -= 1;
emitCurIGsize -= lastCodeSize;

// We're going to overwrite the memory; zero it.
memset(emitCurIGfreeNext, 0, insSize);

// Remember this happened.
emitCurIG->igFlags |= IGF_HAS_REMOVED_INSTR;

emitLastIns = nullptr;
emitLastInsIG = nullptr;
}
#endif

/*****************************************************************************
*
* emitGetInsSC: Get the instruction's constant value.
Expand Down
32 changes: 28 additions & 4 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,16 @@ class emitLocation
{
}

emitLocation(insGroup* _ig, unsigned _codePos)
{
SetLocation(_ig, _codePos);
}

emitLocation(emitter* emit)
{
CaptureLocation(emit);
}

emitLocation(void* emitCookie) : ig((insGroup*)emitCookie), codePos(0)
{
}
Expand All @@ -142,6 +152,8 @@ class emitLocation
}

void CaptureLocation(emitter* emit);
void SetLocation(insGroup* _ig, unsigned _codePos);
void SetLocation(emitLocation newLocation);

bool IsCurrentLocation(emitter* emit) const;

Expand All @@ -160,6 +172,7 @@ class emitLocation
}

int GetInsNum() const;
int GetInsOffset() const;

bool operator!=(const emitLocation& other) const
{
Expand Down Expand Up @@ -250,6 +263,7 @@ struct insGroup
#ifdef DEBUG
BasicBlock* lastGeneratedBlock; // The last block that generated code into this insGroup.
jitstd::list<BasicBlock*> igBlocks; // All the blocks that generated code into this insGroup.
size_t igDataSize; // size of instrDesc data pointed to by 'igData'
#endif

UNATIVE_OFFSET igNum; // for ordering (and display) purposes
Expand Down Expand Up @@ -280,6 +294,9 @@ struct insGroup
#define IGF_REMOVED_ALIGN 0x0800 // IG was marked as having an alignment instruction(s), but was later unmarked
// without updating the IG's size/offsets.
#define IGF_HAS_REMOVABLE_JMP 0x1000 // this group ends with an unconditional jump which is a candidate for removal
#ifdef TARGET_ARM64
#define IGF_HAS_REMOVED_INSTR 0x2000 // this group has an instruction that was removed.
#endif

// Mask of IGF_* flags that should be propagated to new blocks when they are created.
// This allows prologs and epilogs to be any number of IGs, but still be
Expand Down Expand Up @@ -2170,6 +2187,10 @@ class emitter
insGroup* emitSavIG(bool emitAdd = false);
void emitNxtIG(bool extend = false);

#ifdef TARGET_ARM64
void emitRemoveLastInstruction();
#endif

bool emitCurIGnonEmpty()
{
return (emitCurIG && emitCurIGfreeNext > emitCurIGfreeBase);
Expand Down Expand Up @@ -2823,12 +2844,15 @@ inline unsigned emitGetInsOfsFromCodePos(unsigned codePos)

inline unsigned emitter::emitCurOffset()
{
unsigned codePos = emitCurIGinsCnt + (emitCurIGsize << 16);
return emitSpecifiedOffset(emitCurIGinsCnt, emitCurIGsize);
}

assert(emitGetInsOfsFromCodePos(codePos) == emitCurIGsize);
assert(emitGetInsNumFromCodePos(codePos) == emitCurIGinsCnt);
inline unsigned emitter::emitSpecifiedOffset(unsigned insCount, unsigned igSize)
{
unsigned codePos = insCount + (igSize << 16);

// printf("[IG=%02u;ID=%03u;OF=%04X] => %08X\n", emitCurIG->igNum, emitCurIGinsCnt, emitCurIGsize, codePos);
assert(emitGetInsOfsFromCodePos(codePos) == igSize);
assert(emitGetInsNumFromCodePos(codePos) == insCount);

return codePos;
}
Expand Down
Loading