Skip to content

Commit

Permalink
Merge pull request FEX-Emu#3901 from pmatos/TopUsage
Browse files Browse the repository at this point in the history
Reuse Top in ReconstructFSW_Helper
  • Loading branch information
alyssarosenzweig authored Jul 31, 2024
2 parents dd26b0c + 3332220 commit 941fd9c
Show file tree
Hide file tree
Showing 7 changed files with 345 additions and 167 deletions.
20 changes: 13 additions & 7 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -681,24 +681,30 @@ void OpDispatchBuilder::X87ModifySTP(OpcodeArgs, bool Inc) {
// Optionally we can pass a pre calculated value for Top, otherwise we calculate it
// during the function runtime.
Ref OpDispatchBuilder::ReconstructFSW_Helper(Ref T) {

// Start with the top value
auto Top = T ? T : GetX87Top();
Ref FSW = _Lshl(OpSize::i64Bit, Top, _Constant(11));

// We must construct the FSW from our various bits
auto C0 = GetRFLAG(FEXCore::X86State::X87FLAG_C0_LOC);
auto C1 = GetRFLAG(FEXCore::X86State::X87FLAG_C1_LOC);
auto C2 = GetRFLAG(FEXCore::X86State::X87FLAG_C2_LOC);
auto C3 = GetRFLAG(FEXCore::X86State::X87FLAG_C3_LOC);
FSW = _Orlshl(OpSize::i64Bit, FSW, C0, 8);

Ref FSW = _Lshl(OpSize::i64Bit, C0, _Constant(8));
auto C1 = GetRFLAG(FEXCore::X86State::X87FLAG_C1_LOC);
FSW = _Orlshl(OpSize::i64Bit, FSW, C1, 9);

auto C2 = GetRFLAG(FEXCore::X86State::X87FLAG_C2_LOC);
FSW = _Orlshl(OpSize::i64Bit, FSW, C2, 10);

auto C3 = GetRFLAG(FEXCore::X86State::X87FLAG_C3_LOC);
FSW = _Orlshl(OpSize::i64Bit, FSW, C3, 14);

auto Top = GetX87Top();
FSW = _Bfi(OpSize::i64Bit, 3, 11, FSW, Top);
return FSW;
}

// Store Status Word
// There's no load Status Word instruction
// There's no load Status Word instruction but you can load it through frstor
// or fldenv.
void OpDispatchBuilder::X87FNSTSW(OpcodeArgs) {

Ref TopValue = _SyncStackToSlow();
Expand Down
32 changes: 16 additions & 16 deletions unittests/InstructionCountCI/FlagM/SecondaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -1191,16 +1191,16 @@
"ExpectedArm64ASM": [
"ldrh w20, [x28, #1296]",
"strh w20, [x4]",
"ldrb w20, [x28, #1016]",
"ldrb w20, [x28, #1019]",
"lsl x20, x20, #11",
"ldrb w21, [x28, #1016]",
"orr x20, x20, x21, lsl #8",
"ldrb w21, [x28, #1017]",
"ldrb w22, [x28, #1018]",
"ldrb w23, [x28, #1022]",
"lsl x20, x20, #8",
"orr x20, x20, x21, lsl #9",
"orr x20, x20, x22, lsl #10",
"orr x20, x20, x23, lsl #14",
"ldrb w21, [x28, #1019]",
"bfi x20, x21, #11, #3",
"ldrb w21, [x28, #1018]",
"orr x20, x20, x21, lsl #10",
"ldrb w21, [x28, #1022]",
"orr x20, x20, x21, lsl #14",
"strh w20, [x4, #2]",
"ldrb w20, [x28, #1298]",
"strb w20, [x4, #4]",
Expand Down Expand Up @@ -1400,16 +1400,16 @@
"b #+0x80",
"ldrh w20, [x28, #1296]",
"strh w20, [x4]",
"ldrb w20, [x28, #1016]",
"ldrb w20, [x28, #1019]",
"lsl x20, x20, #11",
"ldrb w21, [x28, #1016]",
"orr x20, x20, x21, lsl #8",
"ldrb w21, [x28, #1017]",
"ldrb w22, [x28, #1018]",
"ldrb w23, [x28, #1022]",
"lsl x20, x20, #8",
"orr x20, x20, x21, lsl #9",
"orr x20, x20, x22, lsl #10",
"orr x20, x20, x23, lsl #14",
"ldrb w21, [x28, #1019]",
"bfi x20, x21, #11, #3",
"ldrb w21, [x28, #1018]",
"orr x20, x20, x21, lsl #10",
"ldrb w21, [x28, #1022]",
"orr x20, x20, x21, lsl #14",
"strh w20, [x4, #2]",
"ldrb w20, [x28, #1298]",
"strb w20, [x4, #4]",
Expand Down
236 changes: 204 additions & 32 deletions unittests/InstructionCountCI/FlagM/x87.json
Original file line number Diff line number Diff line change
Expand Up @@ -3942,16 +3942,16 @@
"ExpectedArm64ASM": [
"ldrh w20, [x28, #1296]",
"str w20, [x4]",
"ldrb w20, [x28, #1016]",
"ldrb w20, [x28, #1019]",
"lsl x20, x20, #11",
"ldrb w21, [x28, #1016]",
"orr x20, x20, x21, lsl #8",
"ldrb w21, [x28, #1017]",
"ldrb w22, [x28, #1018]",
"ldrb w23, [x28, #1022]",
"lsl x20, x20, #8",
"orr x20, x20, x21, lsl #9",
"orr x20, x20, x22, lsl #10",
"orr x20, x20, x23, lsl #14",
"ldrb w21, [x28, #1019]",
"bfi x20, x21, #11, #3",
"ldrb w21, [x28, #1018]",
"orr x20, x20, x21, lsl #10",
"ldrb w21, [x28, #1022]",
"orr x20, x20, x21, lsl #14",
"str w20, [x4, #4]",
"mov w20, #0x0",
"ldrb w21, [x28, #1298]",
Expand Down Expand Up @@ -11595,16 +11595,16 @@
"ldrb w20, [x28, #1019]",
"ldrh w21, [x28, #1296]",
"str w21, [x4]",
"ldrb w21, [x28, #1016]",
"ldrb w21, [x28, #1019]",
"lsl x21, x21, #11",
"ldrb w22, [x28, #1016]",
"orr x21, x21, x22, lsl #8",
"ldrb w22, [x28, #1017]",
"ldrb w23, [x28, #1018]",
"ldrb w24, [x28, #1022]",
"lsl x21, x21, #8",
"orr x21, x21, x22, lsl #9",
"orr x21, x21, x23, lsl #10",
"orr x21, x21, x24, lsl #14",
"ldrb w22, [x28, #1019]",
"bfi x21, x22, #11, #3",
"ldrb w22, [x28, #1018]",
"orr x21, x21, x22, lsl #10",
"ldrb w22, [x28, #1022]",
"orr x21, x21, x22, lsl #14",
"str w21, [x4, #4]",
"mov w21, #0x0",
"ldrb w22, [x28, #1298]",
Expand Down Expand Up @@ -11711,19 +11711,191 @@
"0xdd !11b /7"
],
"ExpectedArm64ASM": [
"ldrb w20, [x28, #1016]",
"ldrb w20, [x28, #1019]",
"lsl x20, x20, #11",
"ldrb w21, [x28, #1016]",
"orr x20, x20, x21, lsl #8",
"ldrb w21, [x28, #1017]",
"ldrb w22, [x28, #1018]",
"ldrb w23, [x28, #1022]",
"lsl x20, x20, #8",
"orr x20, x20, x21, lsl #9",
"orr x20, x20, x22, lsl #10",
"orr x20, x20, x23, lsl #14",
"ldrb w21, [x28, #1019]",
"bfi x20, x21, #11, #3",
"ldrb w21, [x28, #1018]",
"orr x20, x20, x21, lsl #10",
"ldrb w21, [x28, #1022]",
"orr x20, x20, x21, lsl #14",
"strh w20, [x4]"
]
},
"fld and fnstsw": {
"ExpectedInstructionCount": 160,
"x86Insts": [
"fld dword [rax]",
"fld dword [rax + 4]",
"fld dword [rax + 8]",
"fld dword [rax + 12]",
"fnstsw [rbx]"
],
"ExpectedArm64ASM": [
"ldr s2, [x4]",
"mrs x0, nzcv",
"str w0, [x28, #1000]",
"stp x4, x5, [x28, #280]",
"stp x6, x7, [x28, #296]",
"str x8, [x28, #312]",
"stp x16, x17, [x28, #376]",
"sub sp, sp, #0x70 (112)",
"mov x0, sp",
"st1 {v2.2d, v3.2d}, [x0], #32",
"st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
"str x30, [x0], #16",
"fmov s0, s2",
"ldrh w0, [x28, #1296]",
"ldr x1, [x28, #1424]",
"blr x1",
"ldr w4, [x28, #1000]",
"msr nzcv, x4",
"ldp x4, x5, [x28, #280]",
"ldp x6, x7, [x28, #296]",
"ldr x8, [x28, #312]",
"ldp x16, x17, [x28, #376]",
"ld1 {v2.2d, v3.2d}, [sp], #32",
"ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
"ldr x30, [sp], #16",
"eor v2.16b, v2.16b, v2.16b",
"mov v2.d[0], x0",
"mov v2.h[4], w1",
"ldr s3, [x4, #4]",
"mrs x0, nzcv",
"str w0, [x28, #1000]",
"stp x4, x5, [x28, #280]",
"stp x6, x7, [x28, #296]",
"str x8, [x28, #312]",
"stp x16, x17, [x28, #376]",
"sub sp, sp, #0x70 (112)",
"mov x0, sp",
"st1 {v2.2d, v3.2d}, [x0], #32",
"st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
"str x30, [x0], #16",
"fmov s0, s3",
"ldrh w0, [x28, #1296]",
"ldr x1, [x28, #1424]",
"blr x1",
"ldr w4, [x28, #1000]",
"msr nzcv, x4",
"ldp x4, x5, [x28, #280]",
"ldp x6, x7, [x28, #296]",
"ldr x8, [x28, #312]",
"ldp x16, x17, [x28, #376]",
"ld1 {v2.2d, v3.2d}, [sp], #32",
"ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
"ldr x30, [sp], #16",
"eor v3.16b, v3.16b, v3.16b",
"mov v3.d[0], x0",
"mov v3.h[4], w1",
"mov w20, #0x8",
"ldr s4, [x4, #8]",
"mrs x0, nzcv",
"str w0, [x28, #1000]",
"stp x4, x5, [x28, #280]",
"stp x6, x7, [x28, #296]",
"str x8, [x28, #312]",
"stp x16, x17, [x28, #376]",
"sub sp, sp, #0x70 (112)",
"mov x0, sp",
"st1 {v2.2d, v3.2d}, [x0], #32",
"st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
"str x30, [x0], #16",
"fmov s0, s4",
"ldrh w0, [x28, #1296]",
"ldr x1, [x28, #1424]",
"blr x1",
"ldr w4, [x28, #1000]",
"msr nzcv, x4",
"ldp x4, x5, [x28, #280]",
"ldp x6, x7, [x28, #296]",
"ldr x8, [x28, #312]",
"ldp x16, x17, [x28, #376]",
"ld1 {v2.2d, v3.2d}, [sp], #32",
"ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
"ldr x30, [sp], #16",
"eor v4.16b, v4.16b, v4.16b",
"mov v4.d[0], x0",
"mov v4.h[4], w1",
"ldr s5, [x4, #12]",
"mrs x0, nzcv",
"str w0, [x28, #1000]",
"stp x4, x5, [x28, #280]",
"stp x6, x7, [x28, #296]",
"str x8, [x28, #312]",
"stp x16, x17, [x28, #376]",
"sub sp, sp, #0x70 (112)",
"mov x0, sp",
"st1 {v2.2d, v3.2d}, [x0], #32",
"st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
"str x30, [x0], #16",
"fmov s0, s5",
"ldrh w0, [x28, #1296]",
"ldr x1, [x28, #1424]",
"blr x1",
"ldr w4, [x28, #1000]",
"msr nzcv, x4",
"ldp x4, x5, [x28, #280]",
"ldp x6, x7, [x28, #296]",
"ldr x8, [x28, #312]",
"ldp x16, x17, [x28, #376]",
"ld1 {v2.2d, v3.2d}, [sp], #32",
"ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
"ldr x30, [sp], #16",
"eor v5.16b, v5.16b, v5.16b",
"mov v5.d[0], x0",
"mov v5.h[4], w1",
"ldrb w21, [x28, #1019]",
"sub w21, w21, #0x4 (4)",
"and w21, w21, #0x7",
"strb w21, [x28, #1019]",
"add x0, x28, x21, lsl #4",
"str q5, [x0, #1040]",
"add w22, w21, #0x1 (1)",
"and w22, w22, #0x7",
"add x0, x28, x22, lsl #4",
"str q4, [x0, #1040]",
"add w23, w21, #0x2 (2)",
"and w23, w23, #0x7",
"add x0, x28, x23, lsl #4",
"str q3, [x0, #1040]",
"add w24, w21, #0x3 (3)",
"and w24, w24, #0x7",
"add x0, x28, x24, lsl #4",
"str q2, [x0, #1040]",
"sub w25, w20, w21",
"ldrb w30, [x28, #1298]",
"mov w18, #0xf0f",
"lsr w25, w18, w25",
"orr w25, w30, w25",
"strb w25, [x28, #1298]",
"lsl x25, x21, #11",
"ldrb w30, [x28, #1016]",
"orr x25, x25, x30, lsl #8",
"ldrb w30, [x28, #1017]",
"orr x25, x25, x30, lsl #9",
"ldrb w30, [x28, #1018]",
"orr x25, x25, x30, lsl #10",
"ldrb w30, [x28, #1022]",
"orr x25, x25, x30, lsl #14",
"strh w25, [x7]",
"add x0, x28, x21, lsl #4",
"str q5, [x0, #1040]",
"add x0, x28, x22, lsl #4",
"str q4, [x0, #1040]",
"add x0, x28, x23, lsl #4",
"str q3, [x0, #1040]",
"add x0, x28, x24, lsl #4",
"str q2, [x0, #1040]",
"sub w20, w20, w21",
"ldrb w21, [x28, #1298]",
"lsr w20, w18, w20",
"orr w20, w21, w20",
"strb w20, [x28, #1298]"
]
},
"ffree st0": {
"ExpectedInstructionCount": 6,
"Comment": [
Expand Down Expand Up @@ -16758,16 +16930,16 @@
"0xdf 11b 0xe0 /4"
],
"ExpectedArm64ASM": [
"ldrb w20, [x28, #1016]",
"ldrb w20, [x28, #1019]",
"lsl x20, x20, #11",
"ldrb w21, [x28, #1016]",
"orr x20, x20, x21, lsl #8",
"ldrb w21, [x28, #1017]",
"ldrb w22, [x28, #1018]",
"ldrb w23, [x28, #1022]",
"lsl x20, x20, #8",
"orr x20, x20, x21, lsl #9",
"orr x20, x20, x22, lsl #10",
"orr x20, x20, x23, lsl #14",
"ldrb w21, [x28, #1019]",
"bfi x20, x21, #11, #3",
"ldrb w21, [x28, #1018]",
"orr x20, x20, x21, lsl #10",
"ldrb w21, [x28, #1022]",
"orr x20, x20, x21, lsl #14",
"bfxil x4, x20, #0, #16"
]
},
Expand Down
Loading

0 comments on commit 941fd9c

Please sign in to comment.