Skip to content

Commit

Permalink
instcountci: Implements explicit state switch between X87 and MMX
Browse files Browse the repository at this point in the history
  • Loading branch information
pmatos committed Oct 11, 2024
1 parent 66ab7a8 commit 2b486a4
Show file tree
Hide file tree
Showing 10 changed files with 722 additions and 360 deletions.
150 changes: 100 additions & 50 deletions unittests/InstructionCountCI/DDD.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"Instructions": {
"pi2fw mm0, mm1": {
"ExpectedInstructionCount": 5,
"ExpectedInstructionCount": 7,
"Comment": [
"0x0f 0x0f 0x0c"
],
Expand All @@ -25,22 +25,26 @@
"uzp1 v2.4h, v2.4h, v2.4h",
"sxtl v2.4s, v2.4h",
"scvtf v2.2s, v2.2s",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pi2fd mm0, mm1": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 5,
"Comment": [
"0x0f 0x0f 0x0d"
],
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"scvtf v2.2s, v2.2s",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pf2iw mm0, mm1": {
"ExpectedInstructionCount": 5,
"ExpectedInstructionCount": 7,
"Comment": [
"0x0f 0x0f 0x1c"
],
Expand All @@ -49,34 +53,40 @@
"fcvtzs v2.2s, v2.2s",
"uzp1 v2.4h, v2.4h, v2.4h",
"sxtl v2.4s, v2.4h",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pf2id mm0, mm1": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 5,
"Comment": [
"0x0f 0x0f 0x1d"
],
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"fcvtzs v2.2s, v2.2s",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfrcpv mm0, mm1": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 6,
"Comment": [
"0x0f 0x0f 0x86"
],
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"fmov v0.4s, #0x70 (1.0000)",
"fdiv v2.4s, v0.4s, v2.4s",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfrsqrtv mm0, mm1": {
"ExpectedInstructionCount": 5,
"ExpectedInstructionCount": 7,
"Comment": [
"0x0f 0x0f 0x87"
],
Expand All @@ -85,23 +95,27 @@
"fmov v0.4s, #0x70 (1.0000)",
"fsqrt v1.4s, v2.4s",
"fdiv v2.4s, v0.4s, v1.4s",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfnacc mm0, mm1": {
"ExpectedInstructionCount": 6,
"ExpectedInstructionCount": 8,
"Comment": "0x0f 0x0f 0x8a",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1040]",
"ldr d3, [x28, #1056]",
"uzp1 v4.2s, v2.2s, v3.2s",
"uzp2 v2.2s, v2.2s, v3.2s",
"fsub v2.4s, v4.4s, v2.4s",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfpnacc mm0, mm1": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 9,
"Comment": "0x0f 0x0f 0x8e",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1040]",
Expand All @@ -110,32 +124,38 @@
"fsub s2, s2, s4",
"faddp v3.4s, v3.4s, v3.4s",
"mov v2.s[1], v3.s[0]",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfcmpge mm0, mm1": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 6,
"Comment": "0x0f 0x0f 0x90",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"ldr d3, [x28, #1040]",
"fcmge v2.4s, v3.4s, v2.4s",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfmin mm0, mm1": {
"ExpectedInstructionCount": 5,
"ExpectedInstructionCount": 7,
"Comment": "0x0f 0x0f 0x94",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"ldr d3, [x28, #1040]",
"fcmgt v0.4s, v3.4s, v2.4s",
"bif v2.16b, v3.16b, v0.16b",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfrcp mm0, mm1": {
"ExpectedInstructionCount": 5,
"ExpectedInstructionCount": 7,
"Comment": [
"0x0f 0x0f 0x96"
],
Expand All @@ -144,11 +164,13 @@
"fmov s0, #0x70 (1.0000)",
"fdiv s2, s0, s2",
"dup v2.2s, v2.s[0]",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfrsqrt mm0, mm1": {
"ExpectedInstructionCount": 6,
"ExpectedInstructionCount": 8,
"Comment": [
"0x0f 0x0f 0x97"
],
Expand All @@ -158,56 +180,68 @@
"fsqrt s1, s2",
"fdiv s2, s0, s1",
"dup v2.2s, v2.s[0]",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfsub mm0, mm1": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 6,
"Comment": "0x0f 0x0f 0x9a",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"ldr d3, [x28, #1040]",
"fsub v2.4s, v3.4s, v2.4s",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfadd mm0, mm1": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 6,
"Comment": "0x0f 0x0f 0x9e",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"ldr d3, [x28, #1040]",
"fadd v2.4s, v3.4s, v2.4s",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfcmpgt mm0, mm1": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 6,
"Comment": "0x0f 0x0f 0xa0",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"ldr d3, [x28, #1040]",
"fcmgt v2.4s, v3.4s, v2.4s",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfmax mm0, mm1": {
"ExpectedInstructionCount": 5,
"ExpectedInstructionCount": 7,
"Comment": "0x0f 0x0f 0xa4",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"ldr d3, [x28, #1040]",
"fcmgt v0.4s, v3.4s, v2.4s",
"bit v2.16b, v3.16b, v0.16b",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfrcpit1 mm0, mm1": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 4,
"Comment": "0x0f 0x0f 0xa6",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfrcpit1 mm0, mm0": {
Expand All @@ -216,11 +250,13 @@
"ExpectedArm64ASM": []
},
"pfrsqit1 mm0, mm1": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 4,
"Comment": "0x0f 0x0f 0xa7",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfrsqit1 mm0, mm0": {
Expand All @@ -229,41 +265,49 @@
"ExpectedArm64ASM": []
},
"pfsubr mm0, mm1": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 6,
"Comment": "0x0f 0x0f 0xaa",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"ldr d3, [x28, #1040]",
"fsub v2.4s, v2.4s, v3.4s",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfcmpeq mm0, mm1": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 6,
"Comment": "0x0f 0x0f 0xb0",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"ldr d3, [x28, #1040]",
"fcmeq v2.4s, v3.4s, v2.4s",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfmul mm0, mm1": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 6,
"Comment": "0x0f 0x0f 0xb4",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"ldr d3, [x28, #1040]",
"fmul v2.4s, v3.4s, v2.4s",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfrcpit2 mm0, mm1": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 4,
"Comment": "0x0f 0x0f 0xb6",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pfrcpit2 mm0, mm0": {
Expand All @@ -272,7 +316,7 @@
"ExpectedArm64ASM": []
},
"db 0x0f, 0x0f, 0xc1, 0xb7": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 9,
"Comment": [
"nasm doesn't support emitting this instruction",
"pmulhrw mm0, mm1",
Expand All @@ -285,26 +329,32 @@
"movi v3.4s, #0x80, lsl #8",
"add v2.4s, v2.4s, v3.4s",
"shrn v2.4h, v2.4s, #16",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pswapd mm0, mm1": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 5,
"Comment": "0x0f 0x0f 0xbb",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"rev64 v2.2s, v2.2s",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
},
"pavgusb mm0, mm1": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 6,
"Comment": "0x0f 0x0f 0xbf",
"ExpectedArm64ASM": [
"ldr d2, [x28, #1056]",
"ldr d3, [x28, #1040]",
"urhadd v2.16b, v3.16b, v2.16b",
"str d2, [x28, #1040]"
"str d2, [x28, #1040]",
"mov w20, #0xffff",
"strh w20, [x28, #1048]"
]
}
}
Expand Down
Loading

0 comments on commit 2b486a4

Please sign in to comment.