diff --git a/FEXCore/Source/Common/SoftFloat.h b/FEXCore/Source/Common/SoftFloat.h index 101c810b8b..c36439b051 100644 --- a/FEXCore/Source/Common/SoftFloat.h +++ b/FEXCore/Source/Common/SoftFloat.h @@ -233,6 +233,10 @@ struct FEX_PACKED X80SoftFloat { return Result; #else + // Zero is a special case, the significand for +/- 0 is +/- zero. + if (lhs.Exponent == 0x0 && lhs.Significand == 0x0) { + return lhs; + } X80SoftFloat Tmp = lhs; Tmp.Exponent = 0x3FFF; Tmp.Sign = lhs.Sign; @@ -256,6 +260,12 @@ struct FEX_PACKED X80SoftFloat { return Result; #else + // Zero is a special case, the exponent is always -inf + if (lhs.Exponent == 0x0 && lhs.Significand == 0x0) { + X80SoftFloat Result(1, 0x7FFFUL, 0x8000'0000'0000'0000UL); + return Result; + } + int32_t TrueExp = lhs.Exponent - ExponentBias; return i32_to_extF80(TrueExp); #endif diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp index c1511f1418..4493671d99 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp @@ -1535,6 +1535,14 @@ DEF_OP(NZCVSelect) { } } +DEF_OP(NZCVSelectV) { + auto Op = IROp->C(); + + auto cc = MapCC(Op->Cond); + const auto SubRegSize = ConvertSubRegSizePair248(IROp); + fcsel(SubRegSize.Scalar, GetVReg(Node), GetVReg(Op->TrueVal.ID()), GetVReg(Op->FalseVal.ID()), cc); +} + DEF_OP(NZCVSelectIncrement) { auto Op = IROp->C(); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 31c36ee425..b3f2408823 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -5535,7 +5535,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(0xD9, 0xF1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87FYL2X, false>}, {OPD(0xD9, 0xF2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80PTANSTACK, true>}, {OPD(0xD9, 0xF3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80ATANSTACK, false>}, - {OPD(0xD9, 0xF4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80XTRACTSTACK, false>}, + {OPD(0xD9, 0xF4), 1, &OpDispatchBuilder::X87FXTRACTF64}, {OPD(0xD9, 0xF5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80FPREM1STACK, true>}, {OPD(0xD9, 0xF6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, false>}, {OPD(0xD9, 0xF7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, true>}, @@ -5765,7 +5765,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(0xD9, 0xF1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87FYL2X, false>}, {OPD(0xD9, 0xF2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80PTANSTACK, true>}, {OPD(0xD9, 0xF3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80ATANSTACK, false>}, - {OPD(0xD9, 0xF4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80XTRACTSTACK, false>}, + {OPD(0xD9, 0xF4), 1, &OpDispatchBuilder::X87FXTRACT}, {OPD(0xD9, 0xF5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80FPREM1STACK, true>}, {OPD(0xD9, 0xF6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, false>}, {OPD(0xD9, 0xF7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, true>}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 4c66d3ebac..502c0acc8b 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -735,6 +735,7 @@ class OpDispatchBuilder final : public IREmitter { void X87FNSAVE(OpcodeArgs); void X87FRSTOR(OpcodeArgs); void X87FXAM(OpcodeArgs); + void X87FXTRACT(OpcodeArgs); void X87FCMOV(OpcodeArgs); void X87EMMS(OpcodeArgs); void X87FFREE(OpcodeArgs); @@ -768,7 +769,6 @@ class OpDispatchBuilder final : public IREmitter { void FABSF64(OpcodeArgs); void FTSTF64(OpcodeArgs); void FRNDINTF64(OpcodeArgs); - void FXTRACTF64(OpcodeArgs); void FNINITF64(OpcodeArgs); void FSQRTF64(OpcodeArgs); void X87UnaryOpF64(OpcodeArgs, FEXCore::IR::IROps IROp); @@ -780,6 +780,7 @@ class OpDispatchBuilder final : public IREmitter { void X87FNSAVEF64(OpcodeArgs); void X87FRSTORF64(OpcodeArgs); void X87FXAMF64(OpcodeArgs); + void X87FXTRACTF64(OpcodeArgs); void X87LDENVF64(OpcodeArgs); void FCOMIF64(OpcodeArgs, size_t width, bool Integer, FCOMIFlags whichflags, bool poptwice); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp index 5b7344f92b..296dfe013d 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp @@ -804,4 +804,14 @@ void OpDispatchBuilder::X87FXAM(OpcodeArgs) { SetRFLAG(C3); } +void OpDispatchBuilder::X87FXTRACT(OpcodeArgs) { + auto Top = _ReadStackValue(0); + + _PopStackDestroy(); + auto Exp = _F80XTRACT_EXP(Top); + auto Sig = _F80XTRACT_SIG(Top); + _PushStack(Exp, Exp, 80, true); + _PushStack(Sig, Sig, 80, true); +} + } // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp index e5bca53d99..5e0da4daff 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp @@ -8,6 +8,7 @@ desc: Handles x86/64 x87 to IR #include "Interface/Core/OpcodeDispatcher.h" #include "Interface/Core/X86Tables/X86Tables.h" +#include "Interface/IR/IR.h" #include #include @@ -519,4 +520,37 @@ void OpDispatchBuilder::X87FRSTORF64(OpcodeArgs) { _StoreContextIndexed(Reg, Top, 8, MMBaseOffset(), 16, FPRClass); } +void OpDispatchBuilder::X87FXTRACTF64(OpcodeArgs) { + // Split node into SIG and EXP while handling the special zero case. + // i.e. if val == 0.0, then sig = 0.0, exp = -inf + // if val == -0.0, then sig = -0.0, exp = -inf + // otherwise we just extract the 64-bit sig and exp as normal. + Ref Node = _ReadStackValue(0); + + Ref Gpr = _VExtractToGPR(8, 8, Node, 0); + + // zero case + Ref ExpZV = _VCastFromGPR(8, 8, _Constant(0xfff0'0000'0000'0000UL)); + Ref SigZV = Node; + + // non zero case + Ref ExpNZ = _Bfe(OpSize::i64Bit, 11, 52, Gpr); + ExpNZ = _Sub(OpSize::i64Bit, ExpNZ, _Constant(1023)); + Ref ExpNZV = _Float_FromGPR_S(8, 8, ExpNZ); + + Ref SigNZ = _And(OpSize::i64Bit, Gpr, _Constant(0x800f'ffff'ffff'ffffLL)); + SigNZ = _Or(OpSize::i64Bit, SigNZ, _Constant(0x3ff0'0000'0000'0000LL)); + Ref SigNZV = _VCastFromGPR(8, 8, SigNZ); + + // Comparison and select to push onto stack + SaveNZCV(); + _TestNZ(OpSize::i64Bit, Gpr, _Constant(0x7fff'ffff'ffff'ffffUL)); + + Ref Sig = _NZCVSelectV(8, {COND_EQ}, SigZV, SigNZV); + Ref Exp = _NZCVSelectV(8, {COND_EQ}, ExpZV, ExpNZV); + + _PopStackDestroy(); + _PushStack(Exp, Exp, 64, true); + _PushStack(Sig, Sig, 64, true); +} } // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/IR.json b/FEXCore/Source/Interface/IR/IR.json index c061f407d9..25e2e2219b 100644 --- a/FEXCore/Source/Interface/IR/IR.json +++ b/FEXCore/Source/Interface/IR/IR.json @@ -1540,6 +1540,14 @@ "ResultSize == FEXCore::IR::OpSize::i32Bit || ResultSize == FEXCore::IR::OpSize::i64Bit" ] }, + "FPR = NZCVSelectV u8:#ResultSize, CondClass:$Cond, FPR:$TrueVal, FPR:$FalseVal": { + "Desc": [ + "Select based on value in NZCV flags, where TrueVal and FalseVal are both FPRs.", + "op:", + "Dest = Cond ? TrueVal : FalseVal" + ], + "DestSize": "ResultSize" + }, "GPR = NZCVSelectIncrement OpSize:#ResultSize, CondClass:$Cond, GPR:$TrueVal, GPR:$FalseVal": { "Desc": ["Select and increment based on value in NZCV flags", "op:", @@ -3043,10 +3051,6 @@ "DestSize": "16", "JITDispatch": false }, - "F80XTRACTStack": { - "X87": true, - "HasSideEffects": true - }, "FPR = F80XTRACT_EXP FPR:$X80Src": { "DestSize": "16", "JITDispatch": false diff --git a/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp b/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp index 19d6b137bc..29ee822f10 100644 --- a/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp +++ b/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp @@ -319,6 +319,7 @@ constexpr FlagInfo ClassifyConst(IROps Op) { case OP_STOREAF: return FlagInfo::Pack({.Write = FLAG_A, .CanEliminate = true}); case OP_NZCVSELECT: + case OP_NZCVSELECTV: case OP_NZCVSELECTINCREMENT: case OP_NEG: case OP_CONDJUMP: @@ -353,6 +354,11 @@ FlagInfo DeadFlagCalculationEliminination::Classify(IROp_Header* IROp) { return FlagInfo::Pack({.Read = FlagsForCondClassType(Op->Cond)}); } + case OP_NZCVSELECTV: { + auto Op = IROp->CW(); + return FlagInfo::Pack({.Read = FlagsForCondClassType(Op->Cond)}); + } + case OP_NEG: { auto Op = IROp->CW(); return FlagInfo::Pack({.Read = FlagsForCondClassType(Op->Cond)}); diff --git a/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp b/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp index ccab50edaa..e2c2bcdbcb 100644 --- a/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp +++ b/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp @@ -156,7 +156,6 @@ class X87StackOptimization final : public Pass { bool ReducedPrecisionMode; // Helpers - std::tuple SplitF64SigExp(Ref Node); Ref RotateRight8(uint32_t V, Ref Amount); // Handles a Unary operation. @@ -518,20 +517,6 @@ Ref X87StackOptimization::SynchronizeStackValues() { return TopValue; } -std::tuple X87StackOptimization::SplitF64SigExp(Ref Node) { - Ref Gpr = IREmit->_VExtractToGPR(8, 8, Node, 0); - - Ref Exp = IREmit->_And(OpSize::i64Bit, Gpr, GetConstant(0x7ff0000000000000LL)); - Exp = IREmit->_Lshr(OpSize::i64Bit, Exp, GetConstant(52)); - Exp = IREmit->_Sub(OpSize::i64Bit, Exp, GetConstant(1023)); - Exp = IREmit->_Float_FromGPR_S(8, 8, Exp); - Ref Sig = IREmit->_And(OpSize::i64Bit, Gpr, GetConstant(0x800fffffffffffffLL)); - Sig = IREmit->_Or(OpSize::i64Bit, Sig, GetConstant(0x3ff0000000000000LL)); - Sig = IREmit->_VCastFromGPR(8, 8, Sig); - - return std::tuple {Exp, Sig}; -} - void X87StackOptimization::Run(IREmitter* Emit) { FEXCORE_PROFILE_SCOPED("PassManager::x87StackOpt"); @@ -964,30 +949,6 @@ void X87StackOptimization::Run(IREmitter* Emit) { break; } - case OP_F80XTRACTSTACK: { - Ref St0 = LoadStackValue(); - - Ref Exp {}; - Ref Sig {}; - if (ReducedPrecisionMode) { - std::tie(Exp, Sig) = SplitF64SigExp(St0); - } else { - Exp = IREmit->_F80XTRACT_EXP(St0); - Sig = IREmit->_F80XTRACT_SIG(St0); - } - - if (SlowPath) { - // Write exp to top, update top for a push and set sig at new top. - StoreStackValueAtOffset_Slow(Exp, 0, false); - UpdateTopForPush_Slow(); - StoreStackValueAtOffset_Slow(Sig); - } else { - StackData.setTop(StackMemberInfo {Exp}); - StackData.push(StackMemberInfo {Sig}); - } - break; - } - case OP_SYNCSTACKTOSLOW: { // This synchronizes stack values but doesn't necessarily moves us off the FastPath! Ref NewTop = SynchronizeStackValues(); diff --git a/unittests/ASM/X87/D9_F4_02.asm b/unittests/ASM/X87/D9_F4_02.asm new file mode 100644 index 0000000000..4c6290e62f --- /dev/null +++ b/unittests/ASM/X87/D9_F4_02.asm @@ -0,0 +1,30 @@ +%ifdef CONFIG +{ + "RegData": { + "MM7": ["0x8000000000000000", "0xFFFF"], + "MM6": ["0x0000000000000000", "0x0000"], + "MM5": ["0x8000000000000000", "0xFFFF"], + "MM4": ["0x0000000000000000", "0x8000"] + }, + "MemoryRegions": { + "0x100000000": "4096" + } +} +%endif + +section .data + nzer: dq -0.0 + +section .text +global _start +_start: +finit +fldz +fxtract ; MM7 is -inf, MM6 is 0.0 + +lea rdx, [rel nzer] +fld qword [rdx] +fxtract ; MM5 is -inf, MM4 is -0.0 + +hlt + diff --git a/unittests/ASM/X87/FScaleFXtract.asm b/unittests/ASM/X87/FScaleFXtract.asm new file mode 100644 index 0000000000..b82df0d21d --- /dev/null +++ b/unittests/ASM/X87/FScaleFXtract.asm @@ -0,0 +1,90 @@ +%ifdef CONFIG +{ + "RegData": { + "R8": "1" + } +} +%endif +; , +; "R9": "1", +; "R10": "1", +; "R11": "1", +; "R12": "1" +section .data + num0: dq 0.0 + num1: dq 125.78 + num2: dq 1023.12 + num3: dq -23487.152 + num4: dq -1230192.123 + +;; Tests the FScale / FExtract inverse behaviour +section .text + global _start +_start: + +; num0 == 0.0 +finit +fld qword [rel num0] +fld st0 +fxtract +fscale +fstp st1 ; at this point st0 and st1 should be the same +fcom +fnstsw ax +and ax, 0x4500 +cmp ax, 0x4000 +setz r8b + +; ; num1 == 125.78 +; finit +; fld qword [rel num1] +; fld st0 +; fxtract +; fscale +; fstp st1 ; at this point st0 and st1 should be the same +; fcom +; fnstsw ax +; and ax, 0x4500 +; cmp ax, 0x4000 +; setz r9b + +; ; num2 == 1023.12 +; finit +; fld qword [rel num2] +; fld st0 +; fxtract +; fscale +; fstp st1 ; at this point st0 and st1 should be the same +; fcom +; fnstsw ax +; and ax, 0x4500 +; cmp ax, 0x4000 +; setz r10b + +; ; num3 == -23487.152 +; finit +; fld qword [rel num3] +; fld st0 +; fxtract +; fscale +; fstp st1 ; at this point st0 and st1 should be the same +; fcom +; fnstsw ax +; and ax, 0x4500 +; cmp ax, 0x4000 +; setz r11b + +; ; num4 == -1230192.123 +; finit +; fld qword [rel num4] +; fld st0 +; fxtract +; fscale +; fstp st1 ; at this point st0 and st1 should be the same +; fcom +; fnstsw ax +; and ax, 0x4500 +; cmp ax, 0x4000 +; setz r12b + +hlt diff --git a/unittests/ASM/X87_F64/D9_F4_02_F64.asm b/unittests/ASM/X87_F64/D9_F4_02_F64.asm new file mode 100644 index 0000000000..e363c8d46e --- /dev/null +++ b/unittests/ASM/X87_F64/D9_F4_02_F64.asm @@ -0,0 +1,50 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": ["0xFFF0000000000000"], + "RBX": ["0x0000000000000000"], + "RCX": ["0xFFF0000000000000"], + "RDX": ["0x8000000000000000"] + }, + "Env": { "FEX_X87REDUCEDPRECISION" : "1" }, + "MemoryRegions": { + "0x100000000": "4096" + } +} +%endif + +; Instead of checking MMX registers, +; move results to general purpose registers and check them there +; so that hostrunner tests work properly. + +section .data + nzer: dq -0.0 + +section .bss + expz: resq 1 + sigz: resq 1 + expnz: resq 1 + signz: resq 1 + +section .text +global _start +_start: +finit +fldz +fxtract +fstp qword [rel sigz] +fstp qword [rel expz] + +lea rdx, [rel nzer] +fld qword [rdx] +fxtract +fstp qword [rel signz] +fstp qword [rel expnz] + +mov rax, [rel expz] +mov rbx, [rel sigz] +mov rcx, [rel expnz] +mov rdx, [rel signz] + +hlt + diff --git a/unittests/ASM/X87_F64/FScaleFXtract_F64.asm b/unittests/ASM/X87_F64/FScaleFXtract_F64.asm new file mode 100644 index 0000000000..80d462a3c8 --- /dev/null +++ b/unittests/ASM/X87_F64/FScaleFXtract_F64.asm @@ -0,0 +1,91 @@ +%ifdef CONFIG +{ + "Env": { "FEX_X87REDUCEDPRECISION" : "1" }, + "RegData": { + "R8": "1" + } +} +%endif + ; , + ; "R9": "1", + ; "R10": "1", + ; "R11": "1", + ; "R12": "1" +section .data + num0: dq 0.0 + num1: dq 125.78 + num2: dq 1023.12 + num3: dq -23487.152 + num4: dq -1230192.123 + +;; Tests the FScale / FXtract inverse behaviour +section .text + global _start +_start: + +; num0 == 0.0 +finit +fld qword [rel num0] +fld st0 +fxtract +fscale +fstp st1 ; at this point st0 and st1 should be the same +fcom +fnstsw ax +and ax, 0x4500 +cmp ax, 0x4000 +setz r8b + +; ; num1 == 125.78 +; finit +; fld qword [rel num1] +; fld st0 +; fxtract +; fscale +; fstp st1 ; at this point st0 and st1 should be the same +; fcom +; fnstsw ax +; and ax, 0x4500 +; cmp ax, 0x4000 +; setz r9b + +; ; num2 == 1023.12 +; finit +; fld qword [rel num2] +; fld st0 +; fxtract +; fscale +; fstp st1 ; at this point st0 and st1 should be the same +; fcom +; fnstsw ax +; and ax, 0x4500 +; cmp ax, 0x4000 +; setz r10b + +; ; num3 == -23487.152 +; finit +; fld qword [rel num3] +; fld st0 +; fxtract +; fscale +; fstp st1 ; at this point st0 and st1 should be the same +; fcom +; fnstsw ax +; and ax, 0x4500 +; cmp ax, 0x4000 +; setz r11b + +; ; num4 == -1230192.123 +; finit +; fld qword [rel num4] +; fld st0 +; fxtract +; fscale +; fstp st1 ; at this point st0 and st1 should be the same +; fcom +; fnstsw ax +; and ax, 0x4500 +; cmp ax, 0x4000 +; setz r12b + +hlt diff --git a/unittests/InstructionCountCI/FlagM/x87.json b/unittests/InstructionCountCI/FlagM/x87.json index 2b9b8b1185..8d4974144b 100644 --- a/unittests/InstructionCountCI/FlagM/x87.json +++ b/unittests/InstructionCountCI/FlagM/x87.json @@ -4797,7 +4797,7 @@ ] }, "fxtract": { - "ExpectedInstructionCount": 71, + "ExpectedInstructionCount": 85, "Comment": [ "0xd9 11b 0xf4 /6" ], @@ -4805,6 +4805,14 @@ "ldrb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", "mrs x0, nzcv", "str w0, [x28, #1000]", "stp x4, x7, [x28, #280]", @@ -4861,17 +4869,23 @@ "eor v2.16b, v2.16b, v2.16b", "mov v2.d[0], x0", "mov v2.h[4], w1", + "sub w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "str q3, [x0, #1040]", - "mov w21, #0x1", + "ldrb w21, [x28, #1298]", + "lsl w23, w22, w20", + "orr w21, w21, w23", + "strb w21, [x28, #1298]", "sub w20, w20, #0x1 (1)", "and w20, w20, #0x7", "strb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "str q2, [x0, #1040]", - "ldrb w22, [x28, #1298]", - "lsl w20, w21, w20", - "orr w20, w22, w20", + "ldrb w21, [x28, #1298]", + "lsl w20, w22, w20", + "orr w20, w21, w20", "strb w20, [x28, #1298]" ] }, diff --git a/unittests/InstructionCountCI/FlagM/x87_f64.json b/unittests/InstructionCountCI/FlagM/x87_f64.json index 77a203b7ce..4ee3e6c663 100644 --- a/unittests/InstructionCountCI/FlagM/x87_f64.json +++ b/unittests/InstructionCountCI/FlagM/x87_f64.json @@ -2536,7 +2536,7 @@ ] }, "fxtract": { - "ExpectedInstructionCount": 23, + "ExpectedInstructionCount": 43, "Comment": [ "0xd9 11b 0xf4 /6" ], @@ -2545,25 +2545,45 @@ "add x0, x28, x20, lsl #4", "ldr d2, [x0, #1040]", "mov x21, v2.d[0]", - "and x22, x21, #0x7ff0000000000000", - "lsr x22, x22, #52", + "mov x22, #0xfff0000000000000", + "fmov d3, x22", + "ubfx x22, x21, #52, #11", "sub x22, x22, #0x3ff (1023)", - "scvtf d2, x22", - "and x21, x21, #0x800fffffffffffff", - "orr x21, x21, #0x3ff0000000000000", - "fmov d3, x21", - "add x0, x28, x20, lsl #4", - "str d2, [x0, #1040]", - "mov w21, #0x1", + "scvtf d4, x22", + "and x22, x21, #0x800fffffffffffff", + "orr x22, x22, #0x3ff0000000000000", + "fmov d5, x22", + "mrs x22, nzcv", + "tst x21, #0x7fffffffffffffff", + "fcsel d2, d2, d5, eq", + "fcsel d3, d3, d4, eq", + "ldrb w21, [x28, #1298]", + "mov w23, #0x1", + "lsl w24, w23, w20", + "bic w21, w21, w24", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", "sub w20, w20, #0x1 (1)", "and w20, w20, #0x7", "strb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "str d3, [x0, #1040]", - "ldrb w22, [x28, #1298]", - "lsl w20, w21, w20", - "orr w20, w22, w20", - "strb w20, [x28, #1298]" + "ldrb w21, [x28, #1298]", + "lsl w24, w23, w20", + "orr w21, w21, w24", + "strb w21, [x28, #1298]", + "sub w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x0, x28, x20, lsl #4", + "str d2, [x0, #1040]", + "ldrb w21, [x28, #1298]", + "lsl w20, w23, w20", + "orr w20, w21, w20", + "strb w20, [x28, #1298]", + "msr nzcv, x22" ] }, "fprem1": { diff --git a/unittests/InstructionCountCI/x87.json b/unittests/InstructionCountCI/x87.json index f6f1067a8f..665ec21db4 100644 --- a/unittests/InstructionCountCI/x87.json +++ b/unittests/InstructionCountCI/x87.json @@ -4796,7 +4796,7 @@ ] }, "fxtract": { - "ExpectedInstructionCount": 71, + "ExpectedInstructionCount": 85, "Comment": [ "0xd9 11b 0xf4 /6" ], @@ -4804,6 +4804,14 @@ "ldrb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", + "ldrb w21, [x28, #1298]", + "mov w22, #0x1", + "lsl w23, w22, w20", + "bic w21, w21, w23", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", "mrs x0, nzcv", "str w0, [x28, #1000]", "stp x4, x7, [x28, #280]", @@ -4860,17 +4868,23 @@ "eor v2.16b, v2.16b, v2.16b", "mov v2.d[0], x0", "mov v2.h[4], w1", + "sub w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "str q3, [x0, #1040]", - "mov w21, #0x1", + "ldrb w21, [x28, #1298]", + "lsl w23, w22, w20", + "orr w21, w21, w23", + "strb w21, [x28, #1298]", "sub w20, w20, #0x1 (1)", "and w20, w20, #0x7", "strb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "str q2, [x0, #1040]", - "ldrb w22, [x28, #1298]", - "lsl w20, w21, w20", - "orr w20, w22, w20", + "ldrb w21, [x28, #1298]", + "lsl w20, w22, w20", + "orr w20, w21, w20", "strb w20, [x28, #1298]" ] }, diff --git a/unittests/InstructionCountCI/x87_f64.json b/unittests/InstructionCountCI/x87_f64.json index b91037d209..5f598f0fb8 100644 --- a/unittests/InstructionCountCI/x87_f64.json +++ b/unittests/InstructionCountCI/x87_f64.json @@ -2554,7 +2554,7 @@ ] }, "fxtract": { - "ExpectedInstructionCount": 23, + "ExpectedInstructionCount": 43, "Comment": [ "0xd9 11b 0xf4 /6" ], @@ -2563,25 +2563,45 @@ "add x0, x28, x20, lsl #4", "ldr d2, [x0, #1040]", "mov x21, v2.d[0]", - "and x22, x21, #0x7ff0000000000000", - "lsr x22, x22, #52", + "mov x22, #0xfff0000000000000", + "fmov d3, x22", + "ubfx x22, x21, #52, #11", "sub x22, x22, #0x3ff (1023)", - "scvtf d2, x22", - "and x21, x21, #0x800fffffffffffff", - "orr x21, x21, #0x3ff0000000000000", - "fmov d3, x21", - "add x0, x28, x20, lsl #4", - "str d2, [x0, #1040]", - "mov w21, #0x1", + "scvtf d4, x22", + "and x22, x21, #0x800fffffffffffff", + "orr x22, x22, #0x3ff0000000000000", + "fmov d5, x22", + "mrs x22, nzcv", + "tst x21, #0x7fffffffffffffff", + "fcsel d2, d2, d5, eq", + "fcsel d3, d3, d4, eq", + "ldrb w21, [x28, #1298]", + "mov w23, #0x1", + "lsl w24, w23, w20", + "bic w21, w21, w24", + "strb w21, [x28, #1298]", + "add w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", "sub w20, w20, #0x1 (1)", "and w20, w20, #0x7", "strb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "str d3, [x0, #1040]", - "ldrb w22, [x28, #1298]", - "lsl w20, w21, w20", - "orr w20, w22, w20", - "strb w20, [x28, #1298]" + "ldrb w21, [x28, #1298]", + "lsl w24, w23, w20", + "orr w21, w21, w24", + "strb w21, [x28, #1298]", + "sub w20, w20, #0x1 (1)", + "and w20, w20, #0x7", + "strb w20, [x28, #1019]", + "add x0, x28, x20, lsl #4", + "str d2, [x0, #1040]", + "ldrb w21, [x28, #1298]", + "lsl w20, w23, w20", + "orr w20, w21, w20", + "strb w20, [x28, #1298]", + "msr nzcv, x22" ] }, "fprem1": {