Skip to content

Commit

Permalink
Ensure predicate cache is reset when control flow leaves block
Browse files Browse the repository at this point in the history
Whenever the control float leaves the block, it might clobber the
predicate register so we reset the cache whenever that happens.

The difficulty here is that the cache is valid only during IR generation
so we need to make sure we catch all the cases during this pass where
the execution might leave the block.

Fixes FEX-Emu#4264
  • Loading branch information
pmatos committed Jan 13, 2025
1 parent 8cfc016 commit ddbf71f
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 12 deletions.
13 changes: 9 additions & 4 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,36 +129,44 @@ class OpDispatchBuilder final : public IREmitter {

IRPair<IROp_Jump> Jump() {
FlushRegisterCache();
ResetInitPredicateCache();
return _Jump();
}
IRPair<IROp_Jump> Jump(Ref _TargetBlock) {
FlushRegisterCache();
ResetInitPredicateCache();
return _Jump(_TargetBlock);
}
IRPair<IROp_CondJump> CondJump(Ref _Cmp1, Ref _Cmp2, Ref _TrueBlock, Ref _FalseBlock, CondClassType _Cond = {COND_NEQ},
IR::OpSize _CompareSize = OpSize::iInvalid) {
FlushRegisterCache();
ResetInitPredicateCache();
return _CondJump(_Cmp1, _Cmp2, _TrueBlock, _FalseBlock, _Cond, _CompareSize);
}
IRPair<IROp_CondJump> CondJump(Ref ssa0, CondClassType cond = {COND_NEQ}) {
FlushRegisterCache();
ResetInitPredicateCache();
return _CondJump(ssa0, cond);
}
IRPair<IROp_CondJump> CondJump(Ref ssa0, Ref ssa1, Ref ssa2, CondClassType cond = {COND_NEQ}) {
FlushRegisterCache();
ResetInitPredicateCache();
return _CondJump(ssa0, ssa1, ssa2, cond);
}
IRPair<IROp_CondJump> CondJumpNZCV(CondClassType Cond) {
FlushRegisterCache();
ResetInitPredicateCache();
return _CondJump(InvalidNode, InvalidNode, InvalidNode, InvalidNode, Cond, OpSize::iInvalid, true);
}
IRPair<IROp_CondJump> CondJumpBit(Ref Src, unsigned Bit, bool Set) {
FlushRegisterCache();
ResetInitPredicateCache();
auto InlineConst = _InlineConstant(Bit);
return _CondJump(Src, InlineConst, InvalidNode, InvalidNode, {Set ? COND_TSTNZ : COND_TSTZ}, OpSize::iInvalid, false);
}
IRPair<IROp_ExitFunction> ExitFunction(Ref NewRIP) {
FlushRegisterCache();
ResetInitPredicateCache();
return _ExitFunction(NewRIP);
}
IRPair<IROp_Break> Break(BreakDefinition Reason) {
Expand All @@ -167,6 +175,7 @@ class OpDispatchBuilder final : public IREmitter {
}
IRPair<IROp_Thunk> Thunk(Ref ArgPtr, SHA256Sum ThunkNameHash) {
FlushRegisterCache();
ResetInitPredicateCache();
return _Thunk(ArgPtr, ThunkNameHash);
}

Expand Down Expand Up @@ -718,7 +727,6 @@ class OpDispatchBuilder final : public IREmitter {
void FNINIT(OpcodeArgs);

void X87ModifySTP(OpcodeArgs, bool Inc);
void X87SinCos(OpcodeArgs);
void X87FYL2X(OpcodeArgs, bool IsFYL2XP1);
void X87LDENV(OpcodeArgs);
void X87FLDCW(OpcodeArgs);
Expand Down Expand Up @@ -764,9 +772,6 @@ class OpDispatchBuilder final : public IREmitter {
void FTSTF64(OpcodeArgs);
void FRNDINTF64(OpcodeArgs);
void FSQRTF64(OpcodeArgs);
void X87UnaryOpF64(OpcodeArgs, FEXCore::IR::IROps IROp);
void X87BinaryOpF64(OpcodeArgs, FEXCore::IR::IROps IROp);
void X87SinCosF64(OpcodeArgs);
void X87FLDCWF64(OpcodeArgs);
void X87TANF64(OpcodeArgs);
void X87ATANF64(OpcodeArgs);
Expand Down
25 changes: 24 additions & 1 deletion FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,11 @@ void OpDispatchBuilder::FILD(OpcodeArgs) {

void OpDispatchBuilder::FST(OpcodeArgs, IR::OpSize Width) {
Ref Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false});
_StoreStackMemory(Mem, OpSize::i128Bit, true, Width);
Ref PredReg = Invalid();
if (CTX->HostFeatures.SupportsSVE128 || CTX->HostFeatures.SupportsSVE256) {
PredReg = InitPredicateCached(OpSize::i16Bit, ARMEmitter::PredicatePattern::SVE_VL5);
}
_StoreStackMemory(PredReg, Mem, OpSize::i128Bit, true, Width);
if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
_PopStackDestroy();
}
Expand Down Expand Up @@ -164,6 +168,9 @@ void OpDispatchBuilder::FADD(OpcodeArgs, IR::OpSize Width, bool Integer, OpDispa
if (Op->Src[0].IsNone()) { // Implicit argument case
auto Offset = Op->OP & 7;
auto St0 = 0;
if (!ReducedPrecisionMode) {
ResetInitPredicateCache();
}
if (ResInST0 == OpResult::RES_STI) {
_F80AddStack(Offset, St0);
} else {
Expand Down Expand Up @@ -194,6 +201,9 @@ void OpDispatchBuilder::FMUL(OpcodeArgs, IR::OpSize Width, bool Integer, OpDispa
if (Op->Src[0].IsNone()) { // Implicit argument case
auto offset = Op->OP & 7;
auto st0 = 0;
if (!ReducedPrecisionMode) {
ResetInitPredicateCache();
}
if (ResInST0 == OpResult::RES_STI) {
_F80MulStack(offset, st0);
} else {
Expand Down Expand Up @@ -230,6 +240,9 @@ void OpDispatchBuilder::FDIV(OpcodeArgs, IR::OpSize Width, bool Integer, bool Re
const auto St0 = 0;
const auto Result = (ResInST0 == OpResult::RES_STI) ? Offset : St0;

if (!ReducedPrecisionMode) {
ResetInitPredicateCache();
}
if (Reverse ^ (ResInST0 == OpResult::RES_STI)) {
_F80DivStack(Result, Offset, St0);
} else {
Expand Down Expand Up @@ -271,6 +284,9 @@ void OpDispatchBuilder::FSUB(OpcodeArgs, IR::OpSize Width, bool Integer, bool Re
const auto St0 = 0;
const auto Result = (ResInST0 == OpResult::RES_STI) ? Offset : St0;

if (!ReducedPrecisionMode) {
ResetInitPredicateCache();
}
if (Reverse ^ (ResInST0 == OpResult::RES_STI)) {
_F80SubStack(Result, Offset, St0);
} else {
Expand Down Expand Up @@ -589,6 +605,10 @@ void OpDispatchBuilder::FXCH(OpcodeArgs) {
}

void OpDispatchBuilder::X87FYL2X(OpcodeArgs, bool IsFYL2XP1) {
if (!ReducedPrecisionMode) {
ResetInitPredicateCache();
}

if (IsFYL2XP1) {
// create an add between top of stack and 1.
Ref One = ReducedPrecisionMode ? _VCastFromGPR(OpSize::i64Bit, OpSize::i64Bit, _Constant(0x3FF0000000000000)) :
Expand Down Expand Up @@ -671,6 +691,9 @@ void OpDispatchBuilder::FTST(OpcodeArgs) {

void OpDispatchBuilder::X87OpHelper(OpcodeArgs, FEXCore::IR::IROps IROp, bool ZeroC2) {
DeriveOp(Result, IROp, _F80SCALEStack());
if (!ReducedPrecisionMode) {
ResetInitPredicateCache();
}
if (ZeroC2) {
SetRFLAG<FEXCore::X86State::X87FLAG_C2_LOC>(_Constant(0));
}
Expand Down
2 changes: 1 addition & 1 deletion FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ void OpDispatchBuilder::FILDF64(OpcodeArgs) {

void OpDispatchBuilder::FSTF64(OpcodeArgs, IR::OpSize Width) {
Ref Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false});
_StoreStackMemory(Mem, OpSize::i64Bit, true, Width);
_StoreStackMemory(Invalid(), Mem, OpSize::i64Bit, true, Width);

if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
_PopStackDestroy();
Expand Down
5 changes: 3 additions & 2 deletions FEXCore/Source/Interface/IR/IR.json
Original file line number Diff line number Diff line change
Expand Up @@ -2788,13 +2788,14 @@
"HasSideEffects": true,
"X87": true
},
"StoreStackMemory GPR:$Addr, OpSize:$SourceSize, i1:$Float, OpSize:$StoreSize": {
"StoreStackMemory PRED:$PredReg, GPR:$Addr, OpSize:$SourceSize, i1:$Float, OpSize:$StoreSize": {
"Desc": [
"Takes the top value off the x87 stack and stores it to memory.",
"SourceSize is 128bit for F80 values, 64-bit for low precision.",
"StoreSize is the store size for conversion:",
"Float: 80-bit, 64-bit, or 32-bit",
"Int: 64-bit, 32-bit, 16-bit"
"Int: 64-bit, 32-bit, 16-bit",
"If possible, it will use the PredReg for an SVE store."
],
"HasSideEffects": true,
"X87": true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -822,10 +822,11 @@ void X87StackOptimization::Run(IREmitter* Emit) {
if (Op->StoreSize != OpSize::f80Bit) { // if it's not 80bits then convert
StackNode = IREmit->_F80CVT(Op->StoreSize, StackNode);
}
if (Op->StoreSize == OpSize::f80Bit) { // Part of code from StoreResult_WithOpSize()
if (Features.SupportsSVE128 || Features.SupportsSVE256) {
auto PReg = IREmit->_InitPredicate(OpSize::i16Bit, FEXCore::ToUnderlying(ARMEmitter::PredicatePattern::SVE_VL5));
IREmit->_StoreMemPredicate(OpSize::i128Bit, OpSize::i16Bit, StackNode, PReg, AddrNode);
if (Op->StoreSize == OpSize::f80Bit) {
Ref PredReg = CurrentIR.GetNode(Op->PredReg);
bool CanUsePredicateStore = (Features.SupportsSVE128 || Features.SupportsSVE256) && PredReg;
if (CanUsePredicateStore) {
IREmit->_StoreMemPredicate(OpSize::i128Bit, OpSize::i16Bit, StackNode, PredReg, AddrNode);
} else {
// For X87 extended doubles, split before storing
IREmit->_StoreMem(FPRClass, OpSize::i64Bit, AddrNode, StackNode);
Expand Down

0 comments on commit ddbf71f

Please sign in to comment.