Skip to content

Commit

Permalink
Merge pull request FEX-Emu#3885 from alyssarosenzweig/opt/zero-flag
Browse files Browse the repository at this point in the history
Optimize zero x87 flags
  • Loading branch information
Sonicadvance1 authored Jul 21, 2024
2 parents f8c6baa + 592d6cc commit 77ec950
Show file tree
Hide file tree
Showing 15 changed files with 4,420 additions and 4,724 deletions.
21 changes: 1 addition & 20 deletions FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ DEF_OP(StoreContext) {
const auto OpSize = IROp->Size;

if (Op->Class == FEXCore::IR::GPRClass) {
auto Src = GetReg(Op->Value.ID());
auto Src = GetZeroableReg(Op->Value);

switch (OpSize) {
case 1: strb(Src, STATE, Op->Offset); break;
Expand Down Expand Up @@ -529,25 +529,6 @@ DEF_OP(LoadDF) {
ldrsb(Dst.X(), STATE, offsetof(FEXCore::Core::CPUState, flags[Flag]));
}

DEF_OP(LoadFlag) {
auto Op = IROp->C<IR::IROp_LoadFlag>();
auto Dst = GetReg(Node);

LOGMAN_THROW_A_FMT(Op->Flag != X86State::RFLAG_PF_RAW_LOC && Op->Flag != X86State::RFLAG_AF_RAW_LOC, "PF/AF must be accessed as "
"registers");

ldrb(Dst, STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag);
}

DEF_OP(StoreFlag) {
auto Op = IROp->C<IR::IROp_StoreFlag>();

LOGMAN_THROW_A_FMT(Op->Flag != X86State::RFLAG_PF_RAW_LOC && Op->Flag != X86State::RFLAG_AF_RAW_LOC, "PF/AF must be accessed as "
"registers");

strb(GetReg(Op->Value.ID()), STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag);
}

ARMEmitter::ExtendedMemOperand Arm64JITCore::GenerateMemOperand(
uint8_t AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset, IR::MemOffsetType OffsetType, uint8_t OffsetScale) {
if (Offset.IsInvalid()) {
Expand Down
6 changes: 3 additions & 3 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -1260,7 +1260,7 @@ class OpDispatchBuilder final : public IREmitter {
} else if (Index >= FPR0Index && Index <= FPR15Index) {
_StoreRegister(Value, Index - FPR0Index, FPRClass, VectorSize);
} else if (Index == DFIndex) {
_StoreFlag(Value, X86State::RFLAG_DF_RAW_LOC);
_StoreContext(1, GPRClass, Value, offsetof(Core::CPUState, flags[X86State::RFLAG_DF_RAW_LOC]));
} else {
bool Partial = RegCache.Partial & (1ull << Index);
unsigned Size = Partial ? 8 : CacheIndexToSize(Index);
Expand Down Expand Up @@ -1729,7 +1729,7 @@ class OpDispatchBuilder final : public IREmitter {
if (BitOffset == FEXCore::X86State::RFLAG_DF_RAW_LOC) {
StoreDF(_SubShift(OpSize::i64Bit, _Constant(1), Value, ShiftType::LSL, 1));
} else {
_StoreFlag(Value, BitOffset);
_StoreContext(1, GPRClass, Value, offsetof(FEXCore::Core::CPUState, flags[BitOffset]));
}
}
}
Expand Down Expand Up @@ -1934,7 +1934,7 @@ class OpDispatchBuilder final : public IREmitter {
// Recover the sign bit, it is the logical DF value
return _Lshr(OpSize::i64Bit, LoadDF(), _Constant(63));
} else {
return _LoadFlag(BitOffset);
return _LoadContext(1, GPRClass, offsetof(Core::CPUState, flags[BitOffset]));
}
}

Expand Down
15 changes: 0 additions & 15 deletions FEXCore/Source/Interface/IR/IR.json
Original file line number Diff line number Diff line change
Expand Up @@ -489,21 +489,6 @@
"DestSize": "8"
},

"GPR = LoadFlag u32:$Flag": {
"Desc": ["Loads an x86-64 flag from the context object",
"Specialized to allow flexible implementation of flag handling"
],
"DestSize": "1"
},

"StoreFlag GPR:$Value, u32:$Flag": {
"HasSideEffects": true,
"Desc": ["Stores 1-bit of the flag in to the specified x86-64 flag",
"Specialized to allow flexible implementation of flag handling"
],
"DestSize": "1"
},

"SSA = LoadMem RegisterClass:$Class, u8:#Size, GPR:$Addr, GPR:$Offset, u8:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": {
"DestSize": "Size"
},
Expand Down
3 changes: 2 additions & 1 deletion FEXCore/Source/Interface/IR/Passes/ConstProp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,8 @@ void ConstProp::ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR)
break;
}
case OP_ADC:
case OP_ADCWITHFLAGS: {
case OP_ADCWITHFLAGS:
case OP_STORECONTEXT: {
uint64_t Constant1 {};
if (IREmit->IsValueConstant(IROp->Args[0], &Constant1)) {
if (Constant1 == 0) {
Expand Down
46 changes: 9 additions & 37 deletions FEXCore/Source/Interface/IR/Passes/DeadStoreElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,16 @@ struct ReadWriteKill {
};

struct Info {
ReadWriteKill flag;
ReadWriteKill reg;
};

/**
* @brief This is a temporary pass to detect simple multiblock dead flag/reg stores
* @brief This is a temporary pass to detect simple multiblock dead reg stores
*
* First pass computes which flags/regs are read and written per block
* First pass computes which regs are read and written per block
*
* Second pass computes which flags/regs are stored, but overwritten by the next block(s).
* It also propagates this information a few times to catch dead flags/regs across multiple blocks.
* Second pass computes which regs are stored, but overwritten by the next block(s).
* It also propagates this information a few times to catch dead regs across multiple blocks.
*
* Third pass removes the dead stores.
*
Expand All @@ -64,28 +63,14 @@ void DeadStoreElimination::Run(IREmitter* IREmit) {
fextl::vector<Info> InfoMap(CurrentIR.GetSSACount());

// Pass 1
// Compute flags/regs read/writes per block
// Compute regs read/writes per block
// This is conservative and doesn't try to be smart about loads after writes
{
for (auto [BlockNode, BlockIROp] : CurrentIR.GetBlocks()) {
auto& BlockInfo = InfoMap[CurrentIR.GetID(BlockNode).Value];

for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) {
if (IROp->Op == OP_STOREFLAG) {
auto Op = IROp->C<IR::IROp_StoreFlag>();

BlockInfo.flag.writes |= 1UL << Op->Flag;
} else if (IROp->Op == OP_INVALIDATEFLAGS) {
auto Op = IROp->C<IR::IROp_InvalidateFlags>();

BlockInfo.flag.writes |= Op->Flags;
} else if (IROp->Op == OP_LOADFLAG) {
auto Op = IROp->C<IR::IROp_LoadFlag>();

BlockInfo.flag.reads |= 1UL << Op->Flag;
} else if (IROp->Op == OP_LOADDF) {
BlockInfo.flag.reads |= 1UL << X86State::RFLAG_DF_RAW_LOC;
} else if (IROp->Op == OP_STOREREGISTER) {
if (IROp->Op == OP_STOREREGISTER) {
auto Op = IROp->C<IR::IROp_StoreRegister>();
BlockInfo.reg.writes |= RegBit(Op->Class, Op->Reg);
} else if (IROp->Op == OP_LOADREGISTER) {
Expand All @@ -111,11 +96,9 @@ void DeadStoreElimination::Run(IREmitter* IREmit) {
auto& TargetInfo = InfoMap[Op->Header.Args[0].ID().Value];

// stores to remove are written by the next block but not read
BlockInfo.flag.kill = TargetInfo.flag.writes & ~(TargetInfo.flag.reads) & ~BlockInfo.flag.reads;
BlockInfo.reg.kill = TargetInfo.reg.writes & ~(TargetInfo.reg.reads) & ~BlockInfo.reg.reads;

// Flags that are written by the next block can be considered as written by this block, if not read
BlockInfo.flag.writes |= BlockInfo.flag.kill & ~BlockInfo.flag.reads;
// If written by the next block can be considered as written by this block, if not read
BlockInfo.reg.writes |= BlockInfo.reg.kill & ~BlockInfo.reg.reads;
} else if (IROp->Op == OP_CONDJUMP) {
auto Op = IROp->C<IR::IROp_CondJump>();
Expand All @@ -125,14 +108,10 @@ void DeadStoreElimination::Run(IREmitter* IREmit) {
auto& FalseTargetInfo = InfoMap[Op->FalseBlock.ID().Value];

// stores to remove are written by the next blocks but not read
BlockInfo.flag.kill = TrueTargetInfo.flag.writes & ~(TrueTargetInfo.flag.reads) & ~BlockInfo.flag.reads;
BlockInfo.reg.kill = TrueTargetInfo.reg.writes & ~(TrueTargetInfo.reg.reads) & ~BlockInfo.reg.reads;

BlockInfo.flag.kill &= FalseTargetInfo.flag.writes & ~(FalseTargetInfo.flag.reads) & ~BlockInfo.flag.reads;
BlockInfo.reg.kill &= FalseTargetInfo.reg.writes & ~(FalseTargetInfo.reg.reads) & ~BlockInfo.reg.reads;

// Flags that are written by the next blocks can be considered as written by this block, if not read
BlockInfo.flag.writes |= BlockInfo.flag.kill & ~BlockInfo.flag.reads;
// if written by the next blocks can be considered as written by this block, if not read
BlockInfo.reg.writes |= BlockInfo.reg.kill & ~BlockInfo.reg.reads;
}
}
Expand All @@ -145,14 +124,7 @@ void DeadStoreElimination::Run(IREmitter* IREmit) {
auto& BlockInfo = InfoMap[CurrentIR.GetID(BlockNode).Value];

for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) {
if (IROp->Op == OP_STOREFLAG) {
auto Op = IROp->C<IR::IROp_StoreFlag>();

// If this StoreFlag is never read, remove it
if (BlockInfo.flag.kill & (1UL << Op->Flag)) {
IREmit->Remove(CodeNode);
}
} else if (IROp->Op == OP_STOREREGISTER) {
if (IROp->Op == OP_STOREREGISTER) {
auto Op = IROp->C<IR::IROp_StoreRegister>();

// If this OP_STOREREGISTER is never read, remove it
Expand Down
21 changes: 10 additions & 11 deletions unittests/InstructionCountCI/FlagM/SecondaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -1503,7 +1503,7 @@
]
},
"xrstor [rax]": {
"ExpectedInstructionCount": 167,
"ExpectedInstructionCount": 166,
"Comment": "GROUP15 0x0F 0xAE /5",
"ExpectedArm64ASM": [
"sub sp, sp, #0x40 (64)",
Expand Down Expand Up @@ -1542,17 +1542,16 @@
"str q4, [x28, #1072]",
"str q3, [x28, #1056]",
"str q2, [x28, #1040]",
"b #+0x4c",
"mov w20, #0x0",
"mov w21, #0x37f",
"strh w21, [x28, #1296]",
"strb w20, [x28, #1019]",
"strb w20, [x28, #1016]",
"strb w20, [x28, #1017]",
"strb w20, [x28, #1018]",
"strb w20, [x28, #1022]",
"b #+0x48",
"mov w20, #0x37f",
"strh w20, [x28, #1296]",
"strb wzr, [x28, #1019]",
"strb wzr, [x28, #1016]",
"strb wzr, [x28, #1017]",
"strb wzr, [x28, #1018]",
"strb wzr, [x28, #1022]",
"movi v2.2d, #0x0",
"strb w20, [x28, #1298]",
"strb wzr, [x28, #1298]",
"str q2, [x28, #1152]",
"str q2, [x28, #1136]",
"str q2, [x28, #1120]",
Expand Down
Loading

0 comments on commit 77ec950

Please sign in to comment.