Skip to content

Commit

Permalink
Ensure predicate cache is reset when control flow leaves block
Browse files Browse the repository at this point in the history
Whenever the control float leaves the block, it might clobber the
predicate register so we reset the cache whenever that happens.

The difficulty here is that the cache is valid only during IR generation
so we need to make sure we catch all the cases during this pass where
the execution might leave the block.

Fixes FEX-Emu#4264
  • Loading branch information
pmatos committed Jan 17, 2025
1 parent 48c03d7 commit dccae72
Show file tree
Hide file tree
Showing 9 changed files with 183 additions and 79 deletions.
12 changes: 11 additions & 1 deletion FEXCore/Scripts/json_ir_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class OpDefinition:
NonSSAArgNum: int
DynamicDispatch: bool
LoweredX87: bool
MaybeClobbersPredRegs: bool
JITDispatch: bool
JITDispatchOverride: str
TiedSource: int
Expand All @@ -79,6 +80,7 @@ def __init__(self):
self.NonSSAArgNum = 0
self.DynamicDispatch = False
self.LoweredX87 = False
self.MaybeClobbersPredRegs = False
self.JITDispatch = True
self.JITDispatchOverride = None
self.TiedSource = -1
Expand Down Expand Up @@ -223,7 +225,7 @@ def parse_ops(ops):
(OpArg.Type == "GPR" or
OpArg.Type == "GPRPair" or
OpArg.Type == "FPR" or
OpArg.Type == "PR")):
OpArg.Type == "PRED")):
OpDef.EmitValidation.append(f"GetOpRegClass({ArgName}) == InvalidClass || WalkFindRegClass({ArgName}) == {OpArg.Type}Class")

OpArg.Name = ArgName
Expand Down Expand Up @@ -277,6 +279,10 @@ def parse_ops(ops):
assert("JITDispatch" not in op_val)
OpDef.JITDispatch = False

if "MaybeClobbersPredRegs" in op_val:
OpDef.MaybeClobbersPredRegs = op_val["MaybeClobbersPredRegs"]
# TODO: Does this imply !JITDispatch?

if "TiedSource" in op_val:
OpDef.TiedSource = op_val["TiedSource"]

Expand Down Expand Up @@ -506,6 +512,7 @@ def print_ir_hassideeffects():
("HasSideEffects", "bool"),
("ImplicitFlagClobber", "bool"),
("LoweredX87", "bool"),
("MaybeClobbersPredRegs", "bool"),
("TiedSource", "int8_t"),
]:
output_file.write(
Expand Down Expand Up @@ -707,6 +714,9 @@ def print_ir_allocator_helpers():
"\t\tif(MMXState == MMXState_MMX) ChgStateMMX_X87();\n"
)

if op.MaybeClobbersPredRegs:
output_file.write("\t\tResetInitPredicateCache();\n")

output_file.write("\t\tauto _Op = AllocateOp<IROp_{}, IROps::OP_{}>();\n".format(op.Name, op.Name.upper()))

if op.SSAArgNum != 0:
Expand Down
4 changes: 2 additions & 2 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4314,7 +4314,7 @@ Ref OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, const X86T
Ref MemSrc = LoadEffectiveAddress(A, true);
if (CTX->HostFeatures.SupportsSVE128 || CTX->HostFeatures.SupportsSVE256) {
// Using SVE we can load this with a single instruction.
auto PReg = _InitPredicate(OpSize::i16Bit, FEXCore::ToUnderlying(ARMEmitter::PredicatePattern::SVE_VL5));
auto PReg = InitPredicateCached(OpSize::i16Bit, ARMEmitter::PredicatePattern::SVE_VL5);
return _LoadMemPredicate(OpSize::i128Bit, OpSize::i16Bit, PReg, MemSrc);
} else {
// For X87 extended doubles, Split the load.
Expand Down Expand Up @@ -4448,7 +4448,7 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl
if (OpSize == OpSize::f80Bit) {
Ref MemStoreDst = LoadEffectiveAddress(A, true);
if (CTX->HostFeatures.SupportsSVE128 || CTX->HostFeatures.SupportsSVE256) {
auto PReg = _InitPredicate(OpSize::i16Bit, FEXCore::ToUnderlying(ARMEmitter::PredicatePattern::SVE_VL5));
auto PReg = InitPredicateCached(OpSize::i16Bit, ARMEmitter::PredicatePattern::SVE_VL5);
_StoreMemPredicate(OpSize::i128Bit, OpSize::i16Bit, Src, PReg, MemStoreDst);
} else {
// For X87 extended doubles, split before storing
Expand Down
6 changes: 2 additions & 4 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ class OpDispatchBuilder final : public IREmitter {
CachedNZCV = nullptr;
CFInverted = CFInvertedABI;
FlushRegisterCache();
ResetInitPredicateCache();

// New block needs to reset segment telemetry.
SegmentsNeedReadCheck = ~0U;
Expand Down Expand Up @@ -718,7 +719,6 @@ class OpDispatchBuilder final : public IREmitter {
void FNINIT(OpcodeArgs);

void X87ModifySTP(OpcodeArgs, bool Inc);
void X87SinCos(OpcodeArgs);
void X87FYL2X(OpcodeArgs, bool IsFYL2XP1);
void X87LDENV(OpcodeArgs);
void X87FLDCW(OpcodeArgs);
Expand Down Expand Up @@ -764,9 +764,6 @@ class OpDispatchBuilder final : public IREmitter {
void FTSTF64(OpcodeArgs);
void FRNDINTF64(OpcodeArgs);
void FSQRTF64(OpcodeArgs);
void X87UnaryOpF64(OpcodeArgs, FEXCore::IR::IROps IROp);
void X87BinaryOpF64(OpcodeArgs, FEXCore::IR::IROps IROp);
void X87SinCosF64(OpcodeArgs);
void X87FLDCWF64(OpcodeArgs);
void X87TANF64(OpcodeArgs);
void X87ATANF64(OpcodeArgs);
Expand Down Expand Up @@ -1175,6 +1172,7 @@ class OpDispatchBuilder final : public IREmitter {
}

void FlushRegisterCache(bool SRAOnly = false) {

// At block boundaries, fix up the carry flag.
if (!SRAOnly) {
RectifyCarryInvert(CFInvertedABI);
Expand Down
14 changes: 8 additions & 6 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,11 @@ void OpDispatchBuilder::FILD(OpcodeArgs) {

void OpDispatchBuilder::FST(OpcodeArgs, IR::OpSize Width) {
Ref Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false});
_StoreStackMemory(Mem, OpSize::i128Bit, true, Width);
Ref PredReg = Invalid();
if (CTX->HostFeatures.SupportsSVE128 || CTX->HostFeatures.SupportsSVE256) {
PredReg = InitPredicateCached(OpSize::i16Bit, ARMEmitter::PredicatePattern::SVE_VL5);
}
_StoreStackMemory(PredReg, Mem, OpSize::i128Bit, true, Width);
if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
_PopStackDestroy();
}
Expand Down Expand Up @@ -267,9 +271,9 @@ void OpDispatchBuilder::FDIV(OpcodeArgs, IR::OpSize Width, bool Integer, bool Re

void OpDispatchBuilder::FSUB(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpDispatchBuilder::OpResult ResInST0) {
if (Op->Src[0].IsNone()) {
const auto Offset = Op->OP & 7;
const auto St0 = 0;
const auto Result = (ResInST0 == OpResult::RES_STI) ? Offset : St0;
const uint8_t Offset = Op->OP & 7;
const uint8_t St0 = 0;
const uint8_t Result = (ResInST0 == OpResult::RES_STI) ? Offset : St0;

if (Reverse ^ (ResInST0 == OpResult::RES_STI)) {
_F80SubStack(Result, Offset, St0);
Expand Down Expand Up @@ -751,13 +755,11 @@ void OpDispatchBuilder::FNINIT(OpcodeArgs) {
}

void OpDispatchBuilder::X87FFREE(OpcodeArgs) {

_InvalidateStack(Op->OP & 7);
}

void OpDispatchBuilder::X87EMMS(OpcodeArgs) {
// Tags all get set to 0b11

_InvalidateStack(0xff);
}

Expand Down
2 changes: 1 addition & 1 deletion FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ void OpDispatchBuilder::FILDF64(OpcodeArgs) {

void OpDispatchBuilder::FSTF64(OpcodeArgs, IR::OpSize Width) {
Ref Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false});
_StoreStackMemory(Mem, OpSize::i64Bit, true, Width);
_StoreStackMemory(Invalid(), Mem, OpSize::i64Bit, true, Width);

if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
_PopStackDestroy();
Expand Down
Loading

0 comments on commit dccae72

Please sign in to comment.