Skip to content

Commit

Permalink
Merge pull request #3874 from alyssarosenzweig/opt/reconstructftw
Browse files Browse the repository at this point in the history
X87: save uop in ReconstructFTW
  • Loading branch information
Sonicadvance1 authored Jul 17, 2024
2 parents 09c4a55 + 19e58ca commit f72cee4
Show file tree
Hide file tree
Showing 7 changed files with 210 additions and 239 deletions.
9 changes: 4 additions & 5 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,19 +90,18 @@ void OpDispatchBuilder::SetX87Top(Ref Value) {

Ref OpDispatchBuilder::ReconstructFSW() {
// We must construct the FSW from our various bits
Ref FSW = _Constant(0);
auto Top = GetX87Top();
FSW = _Bfi(OpSize::i64Bit, 3, 11, FSW, Top);

auto C0 = GetRFLAG(FEXCore::X86State::X87FLAG_C0_LOC);
auto C1 = GetRFLAG(FEXCore::X86State::X87FLAG_C1_LOC);
auto C2 = GetRFLAG(FEXCore::X86State::X87FLAG_C2_LOC);
auto C3 = GetRFLAG(FEXCore::X86State::X87FLAG_C3_LOC);

FSW = _Orlshl(OpSize::i64Bit, FSW, C0, 8);
Ref FSW = _Lshl(OpSize::i64Bit, C0, _Constant(8));
FSW = _Orlshl(OpSize::i64Bit, FSW, C1, 9);
FSW = _Orlshl(OpSize::i64Bit, FSW, C2, 10);
FSW = _Orlshl(OpSize::i64Bit, FSW, C3, 14);

auto Top = GetX87Top();
FSW = _Bfi(OpSize::i64Bit, 3, 11, FSW, Top);
return FSW;
}

Expand Down
40 changes: 19 additions & 21 deletions unittests/InstructionCountCI/FlagM/SecondaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -1186,22 +1186,21 @@
]
},
"fxsave [rax]": {
"ExpectedInstructionCount": 57,
"ExpectedInstructionCount": 56,
"Comment": "GROUP15 0x0F 0xAE /0",
"ExpectedArm64ASM": [
"ldrh w20, [x28, #1296]",
"strh w20, [x4]",
"mov w20, #0x0",
"ldrb w20, [x28, #1016]",
"ldrb w21, [x28, #1017]",
"ldrb w22, [x28, #1018]",
"ldrb w23, [x28, #1022]",
"lsl x20, x20, #8",
"orr x20, x20, x21, lsl #9",
"orr x20, x20, x22, lsl #10",
"orr x20, x20, x23, lsl #14",
"ldrb w21, [x28, #1019]",
"bfi x20, x21, #11, #3",
"ldrb w21, [x28, #1016]",
"ldrb w22, [x28, #1017]",
"ldrb w23, [x28, #1018]",
"ldrb w24, [x28, #1022]",
"orr x20, x20, x21, lsl #8",
"orr x20, x20, x22, lsl #9",
"orr x20, x20, x23, lsl #10",
"orr x20, x20, x24, lsl #14",
"strh w20, [x4, #2]",
"ldrb w20, [x28, #1298]",
"strb w20, [x4, #4]",
Expand Down Expand Up @@ -1397,25 +1396,24 @@
]
},
"xsave [rax]": {
"ExpectedInstructionCount": 103,
"ExpectedInstructionCount": 102,
"Comment": "GROUP15 0x0F 0xAE /4",
"ExpectedArm64ASM": [
"ubfx x20, x4, #0, #1",
"cbnz x20, #+0x8",
"b #+0x84",
"b #+0x80",
"ldrh w20, [x28, #1296]",
"strh w20, [x4]",
"mov w20, #0x0",
"ldrb w20, [x28, #1016]",
"ldrb w21, [x28, #1017]",
"ldrb w22, [x28, #1018]",
"ldrb w23, [x28, #1022]",
"lsl x20, x20, #8",
"orr x20, x20, x21, lsl #9",
"orr x20, x20, x22, lsl #10",
"orr x20, x20, x23, lsl #14",
"ldrb w21, [x28, #1019]",
"bfi x20, x21, #11, #3",
"ldrb w21, [x28, #1016]",
"ldrb w22, [x28, #1017]",
"ldrb w23, [x28, #1018]",
"ldrb w24, [x28, #1022]",
"orr x20, x20, x21, lsl #8",
"orr x20, x20, x22, lsl #9",
"orr x20, x20, x23, lsl #10",
"orr x20, x20, x24, lsl #14",
"strh w20, [x4, #2]",
"ldrb w20, [x28, #1298]",
"strb w20, [x4, #4]",
Expand Down
90 changes: 42 additions & 48 deletions unittests/InstructionCountCI/FlagM/x87.json
Original file line number Diff line number Diff line change
Expand Up @@ -3967,27 +3967,25 @@
]
},
"fnstenv [rax]": {
"ExpectedInstructionCount": 64,
"ExpectedInstructionCount": 62,
"Comment": [
"0xd9 !11b /6"
],
"ExpectedArm64ASM": [
"ldrh w20, [x28, #1296]",
"str w20, [x4]",
"mov w20, #0x0",
"ldrb w20, [x28, #1016]",
"ldrb w21, [x28, #1017]",
"ldrb w22, [x28, #1018]",
"ldrb w23, [x28, #1022]",
"lsl x20, x20, #8",
"orr x20, x20, x21, lsl #9",
"orr x20, x20, x22, lsl #10",
"orr x20, x20, x23, lsl #14",
"ldrb w21, [x28, #1019]",
"mov x0, x20",
"bfi x0, x21, #11, #3",
"mov x21, x0",
"ldrb w22, [x28, #1016]",
"ldrb w23, [x28, #1017]",
"ldrb w24, [x28, #1018]",
"ldrb w25, [x28, #1022]",
"orr x21, x21, x22, lsl #8",
"orr x21, x21, x23, lsl #9",
"orr x21, x21, x24, lsl #10",
"orr x21, x21, x25, lsl #14",
"str w21, [x4, #4]",
"bfi x20, x21, #11, #3",
"str w20, [x4, #4]",
"mov w20, #0x0",
"ldrb w21, [x28, #1298]",
"and w22, w21, #0x1",
"mov w23, #0x3",
Expand Down Expand Up @@ -11688,28 +11686,26 @@
]
},
"fnsave [rax]": {
"ExpectedInstructionCount": 113,
"ExpectedInstructionCount": 111,
"Comment": [
"0xdd !11b /6"
],
"ExpectedArm64ASM": [
"ldrb w20, [x28, #1019]",
"ldrh w21, [x28, #1296]",
"str w21, [x4]",
"mov w21, #0x0",
"ldrb w21, [x28, #1016]",
"ldrb w22, [x28, #1017]",
"ldrb w23, [x28, #1018]",
"ldrb w24, [x28, #1022]",
"lsl x21, x21, #8",
"orr x21, x21, x22, lsl #9",
"orr x21, x21, x23, lsl #10",
"orr x21, x21, x24, lsl #14",
"ldrb w22, [x28, #1019]",
"mov x0, x21",
"bfi x0, x22, #11, #3",
"mov x22, x0",
"ldrb w23, [x28, #1016]",
"ldrb w24, [x28, #1017]",
"ldrb w25, [x28, #1018]",
"ldrb w30, [x28, #1022]",
"orr x22, x22, x23, lsl #8",
"orr x22, x22, x24, lsl #9",
"orr x22, x22, x25, lsl #10",
"orr x22, x22, x30, lsl #14",
"str w22, [x4, #4]",
"bfi x21, x22, #11, #3",
"str w21, [x4, #4]",
"mov w21, #0x0",
"ldrb w22, [x28, #1298]",
"and w23, w22, #0x1",
"mov w24, #0x3",
Expand Down Expand Up @@ -11809,22 +11805,21 @@
]
},
"fnstsw [rax]": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 11,
"Comment": [
"0xdd !11b /7"
],
"ExpectedArm64ASM": [
"mov w20, #0x0",
"ldrb w20, [x28, #1016]",
"ldrb w21, [x28, #1017]",
"ldrb w22, [x28, #1018]",
"ldrb w23, [x28, #1022]",
"lsl x20, x20, #8",
"orr x20, x20, x21, lsl #9",
"orr x20, x20, x22, lsl #10",
"orr x20, x20, x23, lsl #14",
"ldrb w21, [x28, #1019]",
"bfi x20, x21, #11, #3",
"ldrb w21, [x28, #1016]",
"ldrb w22, [x28, #1017]",
"ldrb w23, [x28, #1018]",
"ldrb w24, [x28, #1022]",
"orr x20, x20, x21, lsl #8",
"orr x20, x20, x22, lsl #9",
"orr x20, x20, x23, lsl #10",
"orr x20, x20, x24, lsl #14",
"strh w20, [x4]"
]
},
Expand Down Expand Up @@ -16897,22 +16892,21 @@
]
},
"fnstsw ax": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 11,
"Comment": [
"0xdf 11b 0xe0 /4"
],
"ExpectedArm64ASM": [
"mov w20, #0x0",
"ldrb w20, [x28, #1016]",
"ldrb w21, [x28, #1017]",
"ldrb w22, [x28, #1018]",
"ldrb w23, [x28, #1022]",
"lsl x20, x20, #8",
"orr x20, x20, x21, lsl #9",
"orr x20, x20, x22, lsl #10",
"orr x20, x20, x23, lsl #14",
"ldrb w21, [x28, #1019]",
"bfi x20, x21, #11, #3",
"ldrb w21, [x28, #1016]",
"ldrb w22, [x28, #1017]",
"ldrb w23, [x28, #1018]",
"ldrb w24, [x28, #1022]",
"orr x20, x20, x21, lsl #8",
"orr x20, x20, x22, lsl #9",
"orr x20, x20, x23, lsl #10",
"orr x20, x20, x24, lsl #14",
"bfxil x4, x20, #0, #16"
]
},
Expand Down
90 changes: 42 additions & 48 deletions unittests/InstructionCountCI/FlagM/x87_f64.json
Original file line number Diff line number Diff line change
Expand Up @@ -1631,27 +1631,25 @@
]
},
"fnstenv [rax]": {
"ExpectedInstructionCount": 64,
"ExpectedInstructionCount": 62,
"Comment": [
"0xd9 !11b /6"
],
"ExpectedArm64ASM": [
"ldrh w20, [x28, #1296]",
"str w20, [x4]",
"mov w20, #0x0",
"ldrb w20, [x28, #1016]",
"ldrb w21, [x28, #1017]",
"ldrb w22, [x28, #1018]",
"ldrb w23, [x28, #1022]",
"lsl x20, x20, #8",
"orr x20, x20, x21, lsl #9",
"orr x20, x20, x22, lsl #10",
"orr x20, x20, x23, lsl #14",
"ldrb w21, [x28, #1019]",
"mov x0, x20",
"bfi x0, x21, #11, #3",
"mov x21, x0",
"ldrb w22, [x28, #1016]",
"ldrb w23, [x28, #1017]",
"ldrb w24, [x28, #1018]",
"ldrb w25, [x28, #1022]",
"orr x21, x21, x22, lsl #8",
"orr x21, x21, x23, lsl #9",
"orr x21, x21, x24, lsl #10",
"orr x21, x21, x25, lsl #14",
"str w21, [x4, #4]",
"bfi x20, x21, #11, #3",
"str w20, [x4, #4]",
"mov w20, #0x0",
"ldrb w21, [x28, #1298]",
"and w22, w21, #0x1",
"mov w23, #0x3",
Expand Down Expand Up @@ -6744,28 +6742,26 @@
]
},
"fnsave [rax]": {
"ExpectedInstructionCount": 329,
"ExpectedInstructionCount": 327,
"Comment": [
"0xdd !11b /6"
],
"ExpectedArm64ASM": [
"ldrb w20, [x28, #1019]",
"ldrh w21, [x28, #1296]",
"str w21, [x4]",
"mov w21, #0x0",
"ldrb w21, [x28, #1016]",
"ldrb w22, [x28, #1017]",
"ldrb w23, [x28, #1018]",
"ldrb w24, [x28, #1022]",
"lsl x21, x21, #8",
"orr x21, x21, x22, lsl #9",
"orr x21, x21, x23, lsl #10",
"orr x21, x21, x24, lsl #14",
"ldrb w22, [x28, #1019]",
"mov x0, x21",
"bfi x0, x22, #11, #3",
"mov x22, x0",
"ldrb w23, [x28, #1016]",
"ldrb w24, [x28, #1017]",
"ldrb w25, [x28, #1018]",
"ldrb w30, [x28, #1022]",
"orr x22, x22, x23, lsl #8",
"orr x22, x22, x24, lsl #9",
"orr x22, x22, x25, lsl #10",
"orr x22, x22, x30, lsl #14",
"str w22, [x4, #4]",
"bfi x21, x22, #11, #3",
"str w21, [x4, #4]",
"mov w21, #0x0",
"ldrb w22, [x28, #1298]",
"and w23, w22, #0x1",
"mov w24, #0x3",
Expand Down Expand Up @@ -7081,22 +7077,21 @@
]
},
"fnstsw [rax]": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 11,
"Comment": [
"0xdd !11b /7"
],
"ExpectedArm64ASM": [
"mov w20, #0x0",
"ldrb w20, [x28, #1016]",
"ldrb w21, [x28, #1017]",
"ldrb w22, [x28, #1018]",
"ldrb w23, [x28, #1022]",
"lsl x20, x20, #8",
"orr x20, x20, x21, lsl #9",
"orr x20, x20, x22, lsl #10",
"orr x20, x20, x23, lsl #14",
"ldrb w21, [x28, #1019]",
"bfi x20, x21, #11, #3",
"ldrb w21, [x28, #1016]",
"ldrb w22, [x28, #1017]",
"ldrb w23, [x28, #1018]",
"ldrb w24, [x28, #1022]",
"orr x20, x20, x21, lsl #8",
"orr x20, x20, x22, lsl #9",
"orr x20, x20, x23, lsl #10",
"orr x20, x20, x24, lsl #14",
"strh w20, [x4]"
]
},
Expand Down Expand Up @@ -9830,22 +9825,21 @@
]
},
"fnstsw ax": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 11,
"Comment": [
"0xdf 11b 0xe0 /4"
],
"ExpectedArm64ASM": [
"mov w20, #0x0",
"ldrb w20, [x28, #1016]",
"ldrb w21, [x28, #1017]",
"ldrb w22, [x28, #1018]",
"ldrb w23, [x28, #1022]",
"lsl x20, x20, #8",
"orr x20, x20, x21, lsl #9",
"orr x20, x20, x22, lsl #10",
"orr x20, x20, x23, lsl #14",
"ldrb w21, [x28, #1019]",
"bfi x20, x21, #11, #3",
"ldrb w21, [x28, #1016]",
"ldrb w22, [x28, #1017]",
"ldrb w23, [x28, #1018]",
"ldrb w24, [x28, #1022]",
"orr x20, x20, x21, lsl #8",
"orr x20, x20, x22, lsl #9",
"orr x20, x20, x23, lsl #10",
"orr x20, x20, x24, lsl #14",
"bfxil x4, x20, #0, #16"
]
},
Expand Down
Loading

0 comments on commit f72cee4

Please sign in to comment.