Skip to content

Commit

Permalink
Merge pull request FEX-Emu#4093 from pmatos/FXtractFix
Browse files Browse the repository at this point in the history
Fix FXTRACT for 0.0 and -0.0
  • Loading branch information
Sonicadvance1 authored Oct 17, 2024
2 parents 0897cd8 + 5997030 commit a421ff1
Show file tree
Hide file tree
Showing 17 changed files with 447 additions and 84 deletions.
10 changes: 10 additions & 0 deletions FEXCore/Source/Common/SoftFloat.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ struct FEX_PACKED X80SoftFloat {

return Result;
#else
// Zero is a special case, the significand for +/- 0 is +/- zero.
if (lhs.Exponent == 0x0 && lhs.Significand == 0x0) {
return lhs;
}
X80SoftFloat Tmp = lhs;
Tmp.Exponent = 0x3FFF;
Tmp.Sign = lhs.Sign;
Expand All @@ -256,6 +260,12 @@ struct FEX_PACKED X80SoftFloat {

return Result;
#else
// Zero is a special case, the exponent is always -inf
if (lhs.Exponent == 0x0 && lhs.Significand == 0x0) {
X80SoftFloat Result(1, 0x7FFFUL, 0x8000'0000'0000'0000UL);
return Result;
}

int32_t TrueExp = lhs.Exponent - ExponentBias;
return i32_to_extF80(TrueExp);
#endif
Expand Down
8 changes: 8 additions & 0 deletions FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1535,6 +1535,14 @@ DEF_OP(NZCVSelect) {
}
}

DEF_OP(NZCVSelectV) {
auto Op = IROp->C<IR::IROp_NZCVSelectV>();

auto cc = MapCC(Op->Cond);
const auto SubRegSize = ConvertSubRegSizePair248(IROp);
fcsel(SubRegSize.Scalar, GetVReg(Node), GetVReg(Op->TrueVal.ID()), GetVReg(Op->FalseVal.ID()), cc);
}

DEF_OP(NZCVSelectIncrement) {
auto Op = IROp->C<IR::IROp_NZCVSelectIncrement>();

Expand Down
4 changes: 2 additions & 2 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5535,7 +5535,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) {
{OPD(0xD9, 0xF1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87FYL2X, false>},
{OPD(0xD9, 0xF2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80PTANSTACK, true>},
{OPD(0xD9, 0xF3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80ATANSTACK, false>},
{OPD(0xD9, 0xF4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80XTRACTSTACK, false>},
{OPD(0xD9, 0xF4), 1, &OpDispatchBuilder::X87FXTRACTF64},
{OPD(0xD9, 0xF5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80FPREM1STACK, true>},
{OPD(0xD9, 0xF6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, false>},
{OPD(0xD9, 0xF7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, true>},
Expand Down Expand Up @@ -5765,7 +5765,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) {
{OPD(0xD9, 0xF1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87FYL2X, false>},
{OPD(0xD9, 0xF2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80PTANSTACK, true>},
{OPD(0xD9, 0xF3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80ATANSTACK, false>},
{OPD(0xD9, 0xF4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80XTRACTSTACK, false>},
{OPD(0xD9, 0xF4), 1, &OpDispatchBuilder::X87FXTRACT},
{OPD(0xD9, 0xF5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80FPREM1STACK, true>},
{OPD(0xD9, 0xF6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, false>},
{OPD(0xD9, 0xF7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, true>},
Expand Down
3 changes: 2 additions & 1 deletion FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,7 @@ class OpDispatchBuilder final : public IREmitter {
void X87FNSAVE(OpcodeArgs);
void X87FRSTOR(OpcodeArgs);
void X87FXAM(OpcodeArgs);
void X87FXTRACT(OpcodeArgs);
void X87FCMOV(OpcodeArgs);
void X87EMMS(OpcodeArgs);
void X87FFREE(OpcodeArgs);
Expand Down Expand Up @@ -768,7 +769,6 @@ class OpDispatchBuilder final : public IREmitter {
void FABSF64(OpcodeArgs);
void FTSTF64(OpcodeArgs);
void FRNDINTF64(OpcodeArgs);
void FXTRACTF64(OpcodeArgs);
void FNINITF64(OpcodeArgs);
void FSQRTF64(OpcodeArgs);
void X87UnaryOpF64(OpcodeArgs, FEXCore::IR::IROps IROp);
Expand All @@ -780,6 +780,7 @@ class OpDispatchBuilder final : public IREmitter {
void X87FNSAVEF64(OpcodeArgs);
void X87FRSTORF64(OpcodeArgs);
void X87FXAMF64(OpcodeArgs);
void X87FXTRACTF64(OpcodeArgs);
void X87LDENVF64(OpcodeArgs);

void FCOMIF64(OpcodeArgs, size_t width, bool Integer, FCOMIFlags whichflags, bool poptwice);
Expand Down
10 changes: 10 additions & 0 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -804,4 +804,14 @@ void OpDispatchBuilder::X87FXAM(OpcodeArgs) {
SetRFLAG<FEXCore::X86State::X87FLAG_C3_LOC>(C3);
}

void OpDispatchBuilder::X87FXTRACT(OpcodeArgs) {
auto Top = _ReadStackValue(0);

_PopStackDestroy();
auto Exp = _F80XTRACT_EXP(Top);
auto Sig = _F80XTRACT_SIG(Top);
_PushStack(Exp, Exp, 80, true);
_PushStack(Sig, Sig, 80, true);
}

} // namespace FEXCore::IR
34 changes: 34 additions & 0 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ desc: Handles x86/64 x87 to IR

#include "Interface/Core/OpcodeDispatcher.h"
#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/IR/IR.h"

#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/X86Enums.h>
Expand Down Expand Up @@ -519,4 +520,37 @@ void OpDispatchBuilder::X87FRSTORF64(OpcodeArgs) {
_StoreContextIndexed(Reg, Top, 8, MMBaseOffset(), 16, FPRClass);
}

void OpDispatchBuilder::X87FXTRACTF64(OpcodeArgs) {
// Split node into SIG and EXP while handling the special zero case.
// i.e. if val == 0.0, then sig = 0.0, exp = -inf
// if val == -0.0, then sig = -0.0, exp = -inf
// otherwise we just extract the 64-bit sig and exp as normal.
Ref Node = _ReadStackValue(0);

Ref Gpr = _VExtractToGPR(8, 8, Node, 0);

// zero case
Ref ExpZV = _VCastFromGPR(8, 8, _Constant(0xfff0'0000'0000'0000UL));
Ref SigZV = Node;

// non zero case
Ref ExpNZ = _Bfe(OpSize::i64Bit, 11, 52, Gpr);
ExpNZ = _Sub(OpSize::i64Bit, ExpNZ, _Constant(1023));
Ref ExpNZV = _Float_FromGPR_S(8, 8, ExpNZ);

Ref SigNZ = _And(OpSize::i64Bit, Gpr, _Constant(0x800f'ffff'ffff'ffffLL));
SigNZ = _Or(OpSize::i64Bit, SigNZ, _Constant(0x3ff0'0000'0000'0000LL));
Ref SigNZV = _VCastFromGPR(8, 8, SigNZ);

// Comparison and select to push onto stack
SaveNZCV();
_TestNZ(OpSize::i64Bit, Gpr, _Constant(0x7fff'ffff'ffff'ffffUL));

Ref Sig = _NZCVSelectV(8, {COND_EQ}, SigZV, SigNZV);
Ref Exp = _NZCVSelectV(8, {COND_EQ}, ExpZV, ExpNZV);

_PopStackDestroy();
_PushStack(Exp, Exp, 64, true);
_PushStack(Sig, Sig, 64, true);
}
} // namespace FEXCore::IR
12 changes: 8 additions & 4 deletions FEXCore/Source/Interface/IR/IR.json
Original file line number Diff line number Diff line change
Expand Up @@ -1540,6 +1540,14 @@
"ResultSize == FEXCore::IR::OpSize::i32Bit || ResultSize == FEXCore::IR::OpSize::i64Bit"
]
},
"FPR = NZCVSelectV u8:#ResultSize, CondClass:$Cond, FPR:$TrueVal, FPR:$FalseVal": {
"Desc": [
"Select based on value in NZCV flags, where TrueVal and FalseVal are both FPRs.",
"op:",
"Dest = Cond ? TrueVal : FalseVal"
],
"DestSize": "ResultSize"
},
"GPR = NZCVSelectIncrement OpSize:#ResultSize, CondClass:$Cond, GPR:$TrueVal, GPR:$FalseVal": {
"Desc": ["Select and increment based on value in NZCV flags",
"op:",
Expand Down Expand Up @@ -3043,10 +3051,6 @@
"DestSize": "16",
"JITDispatch": false
},
"F80XTRACTStack": {
"X87": true,
"HasSideEffects": true
},
"FPR = F80XTRACT_EXP FPR:$X80Src": {
"DestSize": "16",
"JITDispatch": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ constexpr FlagInfo ClassifyConst(IROps Op) {
case OP_STOREAF: return FlagInfo::Pack({.Write = FLAG_A, .CanEliminate = true});

case OP_NZCVSELECT:
case OP_NZCVSELECTV:
case OP_NZCVSELECTINCREMENT:
case OP_NEG:
case OP_CONDJUMP:
Expand Down Expand Up @@ -353,6 +354,11 @@ FlagInfo DeadFlagCalculationEliminination::Classify(IROp_Header* IROp) {
return FlagInfo::Pack({.Read = FlagsForCondClassType(Op->Cond)});
}

case OP_NZCVSELECTV: {
auto Op = IROp->CW<IR::IROp_NZCVSelectV>();
return FlagInfo::Pack({.Read = FlagsForCondClassType(Op->Cond)});
}

case OP_NEG: {
auto Op = IROp->CW<IR::IROp_Neg>();
return FlagInfo::Pack({.Read = FlagsForCondClassType(Op->Cond)});
Expand Down
39 changes: 0 additions & 39 deletions FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ class X87StackOptimization final : public Pass {
bool ReducedPrecisionMode;

// Helpers
std::tuple<Ref, Ref> SplitF64SigExp(Ref Node);
Ref RotateRight8(uint32_t V, Ref Amount);

// Handles a Unary operation.
Expand Down Expand Up @@ -518,20 +517,6 @@ Ref X87StackOptimization::SynchronizeStackValues() {
return TopValue;
}

std::tuple<Ref, Ref> X87StackOptimization::SplitF64SigExp(Ref Node) {
Ref Gpr = IREmit->_VExtractToGPR(8, 8, Node, 0);

Ref Exp = IREmit->_And(OpSize::i64Bit, Gpr, GetConstant(0x7ff0000000000000LL));
Exp = IREmit->_Lshr(OpSize::i64Bit, Exp, GetConstant(52));
Exp = IREmit->_Sub(OpSize::i64Bit, Exp, GetConstant(1023));
Exp = IREmit->_Float_FromGPR_S(8, 8, Exp);
Ref Sig = IREmit->_And(OpSize::i64Bit, Gpr, GetConstant(0x800fffffffffffffLL));
Sig = IREmit->_Or(OpSize::i64Bit, Sig, GetConstant(0x3ff0000000000000LL));
Sig = IREmit->_VCastFromGPR(8, 8, Sig);

return std::tuple {Exp, Sig};
}

void X87StackOptimization::Run(IREmitter* Emit) {
FEXCORE_PROFILE_SCOPED("PassManager::x87StackOpt");

Expand Down Expand Up @@ -964,30 +949,6 @@ void X87StackOptimization::Run(IREmitter* Emit) {
break;
}

case OP_F80XTRACTSTACK: {
Ref St0 = LoadStackValue();

Ref Exp {};
Ref Sig {};
if (ReducedPrecisionMode) {
std::tie(Exp, Sig) = SplitF64SigExp(St0);
} else {
Exp = IREmit->_F80XTRACT_EXP(St0);
Sig = IREmit->_F80XTRACT_SIG(St0);
}

if (SlowPath) {
// Write exp to top, update top for a push and set sig at new top.
StoreStackValueAtOffset_Slow(Exp, 0, false);
UpdateTopForPush_Slow();
StoreStackValueAtOffset_Slow(Sig);
} else {
StackData.setTop(StackMemberInfo {Exp});
StackData.push(StackMemberInfo {Sig});
}
break;
}

case OP_SYNCSTACKTOSLOW: {
// This synchronizes stack values but doesn't necessarily moves us off the FastPath!
Ref NewTop = SynchronizeStackValues();
Expand Down
30 changes: 30 additions & 0 deletions unittests/ASM/X87/D9_F4_02.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
%ifdef CONFIG
{
"RegData": {
"MM7": ["0x8000000000000000", "0xFFFF"],
"MM6": ["0x0000000000000000", "0x0000"],
"MM5": ["0x8000000000000000", "0xFFFF"],
"MM4": ["0x0000000000000000", "0x8000"]
},
"MemoryRegions": {
"0x100000000": "4096"
}
}
%endif

section .data
nzer: dq -0.0

section .text
global _start
_start:
finit
fldz
fxtract ; MM7 is -inf, MM6 is 0.0

lea rdx, [rel nzer]
fld qword [rdx]
fxtract ; MM5 is -inf, MM4 is -0.0

hlt

90 changes: 90 additions & 0 deletions unittests/ASM/X87/FScaleFXtract.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
%ifdef CONFIG
{
"RegData": {
"R8": "1"
}
}
%endif
; ,
; "R9": "1",
; "R10": "1",
; "R11": "1",
; "R12": "1"
section .data
num0: dq 0.0
num1: dq 125.78
num2: dq 1023.12
num3: dq -23487.152
num4: dq -1230192.123

;; Tests the FScale / FExtract inverse behaviour
section .text
global _start
_start:

; num0 == 0.0
finit
fld qword [rel num0]
fld st0
fxtract
fscale
fstp st1 ; at this point st0 and st1 should be the same
fcom
fnstsw ax
and ax, 0x4500
cmp ax, 0x4000
setz r8b

; ; num1 == 125.78
; finit
; fld qword [rel num1]
; fld st0
; fxtract
; fscale
; fstp st1 ; at this point st0 and st1 should be the same
; fcom
; fnstsw ax
; and ax, 0x4500
; cmp ax, 0x4000
; setz r9b

; ; num2 == 1023.12
; finit
; fld qword [rel num2]
; fld st0
; fxtract
; fscale
; fstp st1 ; at this point st0 and st1 should be the same
; fcom
; fnstsw ax
; and ax, 0x4500
; cmp ax, 0x4000
; setz r10b

; ; num3 == -23487.152
; finit
; fld qword [rel num3]
; fld st0
; fxtract
; fscale
; fstp st1 ; at this point st0 and st1 should be the same
; fcom
; fnstsw ax
; and ax, 0x4500
; cmp ax, 0x4000
; setz r11b

; ; num4 == -1230192.123
; finit
; fld qword [rel num4]
; fld st0
; fxtract
; fscale
; fstp st1 ; at this point st0 and st1 should be the same
; fcom
; fnstsw ax
; and ax, 0x4500
; cmp ax, 0x4000
; setz r12b

hlt
Loading

0 comments on commit a421ff1

Please sign in to comment.