From 143ce80a45c221e3b02a3415f784afb6f946af7e Mon Sep 17 00:00:00 2001 From: "Lv, Tao A" Date: Thu, 2 Jan 2025 00:39:59 -0800 Subject: [PATCH] benchdnn: inputs: graph: rm case sdpa-compressed-kv-int8-gs128.json rewrite sdpa-compressed-kv-int4-gs32.json for it. --- .../graph/complex_fusion/harness_mha_all | 1 - .../graph/complex_fusion/harness_mha_ci | 3 +- .../mha/sdpa-compressed-kv-int8-gs128.json | 548 ------------------ 3 files changed, 2 insertions(+), 550 deletions(-) delete mode 100644 tests/benchdnn/inputs/graph/complex_fusion/mha/sdpa-compressed-kv-int8-gs128.json diff --git a/tests/benchdnn/inputs/graph/complex_fusion/harness_mha_all b/tests/benchdnn/inputs/graph/complex_fusion/harness_mha_all index 80199e56b07..0b0691e2cf5 100644 --- a/tests/benchdnn/inputs/graph/complex_fusion/harness_mha_all +++ b/tests/benchdnn/inputs/graph/complex_fusion/harness_mha_all @@ -24,7 +24,6 @@ --reset --expected-n-partitions=0 --case=complex_fusion/mha/dynamic_quantized_mha-Bert_large-inf-int8-bs1-fake.json --reset --case=complex_fusion/mha/sdpa-plain-wo-scale-int8-bs1.json --reset --case=complex_fusion/mha/sdpa-compressed-kv-int4-gs32.json ---reset --case=complex_fusion/mha/sdpa-compressed-kv-int8-gs128.json --reset --case=complex_fusion/mha/sdpa-compressed-k-int8-gs32.json --reset --case=complex_fusion/mha/sdpa-compressed-v-int8-gs32.json diff --git a/tests/benchdnn/inputs/graph/complex_fusion/harness_mha_ci b/tests/benchdnn/inputs/graph/complex_fusion/harness_mha_ci index 8b86b687abc..63c0fe01ddf 100644 --- a/tests/benchdnn/inputs/graph/complex_fusion/harness_mha_ci +++ b/tests/benchdnn/inputs/graph/complex_fusion/harness_mha_ci @@ -21,5 +21,6 @@ --reset --expected-n-partitions=0 --case=complex_fusion/mha/MHA-starcoder-inf-int8-bs1.json --reset --expected-n-partitions=0 --case=complex_fusion/mha/dynamic_quantized_mha-Bert_large-inf-int8-bs1-fake.json --reset --case=complex_fusion/mha/sdpa-plain-wo-scale-int8-bs1.json ---reset --case=complex_fusion/mha/sdpa-compressed-kv-int8-gs128.json --reset --case=complex_fusion/mha/sdpa-compressed-v-int8-gs32.json +--reset --case=complex_fusion/mha/sdpa-compressed-kv-int4-gs32.json +--reset --dt=0:s8+2:s8+6:s8+8:s8 --case=complex_fusion/mha/sdpa-compressed-kv-int4-gs32.json diff --git a/tests/benchdnn/inputs/graph/complex_fusion/mha/sdpa-compressed-kv-int8-gs128.json b/tests/benchdnn/inputs/graph/complex_fusion/mha/sdpa-compressed-kv-int8-gs128.json deleted file mode 100644 index ec71b67c8c9..00000000000 --- a/tests/benchdnn/inputs/graph/complex_fusion/mha/sdpa-compressed-kv-int8-gs128.json +++ /dev/null @@ -1,548 +0,0 @@ -{ - "version": "3.2.0", - "engine_kind": "cpu", - "fpmath_mode": "f16", - "fpmath_mode_apply_to_int": "true", - "input_ports": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8 - ], - "output_ports": [ - 50 - ], - "graph": [ - { - "id": 34107656704, - "name": "aten::dequantize", - "kind": "DynamicDequantize", - "attrs": { - "qtype": { - "type": "string", - "value": "per_group" - }, - "group_shape": { - "type": "s64[]", - "value": [ - 1, - 1, - 128, - 1 - ] - }, - "axis": { - "type": "s64", - "value": 2 - } - }, - "inputs": [ - { - "id": 0, - "dtype": "s8", - "shape": [ - 1, - 32, - 128, - 32 - ], - "stride": [ - 131072, - 4096, - 1, - 128 - ], - "layout_type": "strided", - "property_type": "variable" - }, - { - "id": 1, - "dtype": "f16", - "shape": [ - 1, - 32, - 1, - 32 - ], - "stride": [ - 4096, - 128, - 32, - 1 - ], - "layout_type": "strided", - "property_type": "undef" - }, - { - "id": 2, - "dtype": "s8", - "shape": [ - 1, - 32, - 1, - 32 - ], - "stride": [ - 4096, - 128, - 32, - 1 - ], - "layout_type": "strided", - "property_type": "undef" - } - ], - "outputs": [ - { - "id": 10, - "dtype": "f16", - "shape": [ - 1, - 32, - 128, - 32 - ], - "stride": [ - 131072, - 4096, - 32, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - } - ] - }, - { - "id": 34107654464, - "name": "aten::matmul", - "kind": "MatMul", - "attrs": { - "transpose_a": { - "type": "bool", - "value": 0 - }, - "transpose_b": { - "type": "bool", - "value": 0 - } - }, - "inputs": [ - { - "id": 3, - "dtype": "f16", - "shape": [ - 1, - 32, - 32, - 128 - ], - "stride": [ - 131072, - 4096, - 128, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - }, - { - "id": 10, - "dtype": "f16", - "shape": [ - 1, - 32, - 128, - 32 - ], - "stride": [ - 131072, - 4096, - 32, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - } - ], - "outputs": [ - { - "id": 15, - "dtype": "f16", - "shape": [ - 1, - 32, - 32, - 32 - ], - "stride": [ - 32768, - 1024, - 32, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - } - ] - }, - { - "id": 34107661824, - "name": "aten::div", - "kind": "Divide", - "attrs": { - "auto_broadcast": { - "type": "string", - "value": "numpy" - } - }, - "inputs": [ - { - "id": 15, - "dtype": "f16", - "shape": [ - 1, - 32, - 32, - 32 - ], - "stride": [ - 32768, - 1024, - 32, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - }, - { - "id": 4, - "dtype": "f16", - "shape": [], - "stride": [], - "layout_type": "strided", - "property_type": "undef" - } - ], - "outputs": [ - { - "id": 16, - "dtype": "f16", - "shape": [ - 1, - 32, - 32, - 32 - ], - "stride": [ - 32768, - 1024, - 32, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - } - ] - }, - { - "id": 34106997632, - "name": "aten::add", - "kind": "Add", - "attrs": { - "auto_broadcast": { - "type": "string", - "value": "numpy" - } - }, - "inputs": [ - { - "id": 16, - "dtype": "f16", - "shape": [ - 1, - 32, - 32, - 32 - ], - "stride": [ - 32768, - 1024, - 32, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - }, - { - "id": 5, - "dtype": "f16", - "shape": [ - 1, - 1, - 1, - 32 - ], - "stride": [ - 1024, - 32, - 32, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - } - ], - "outputs": [ - { - "id": 18, - "dtype": "f16", - "shape": [ - 1, - 32, - 32, - 32 - ], - "stride": [ - 32768, - 1024, - 32, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - } - ] - }, - { - "id": 34426356992, - "name": "aten::softmax", - "kind": "SoftMax", - "attrs": { - "axis": { - "type": "s64", - "value": 3 - } - }, - "inputs": [ - { - "id": 18, - "dtype": "f16", - "shape": [ - 1, - 32, - 32, - 32 - ], - "stride": [ - 32768, - 1024, - 32, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - } - ], - "outputs": [ - { - "id": 27, - "dtype": "f16", - "shape": [ - 1, - 32, - 32, - 32 - ], - "stride": [ - 32768, - 1024, - 32, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - } - ] - }, - { - "id": 34107752448, - "name": "aten::dequantize", - "kind": "DynamicDequantize", - "attrs": { - "qtype": { - "type": "string", - "value": "per_group" - }, - "group_shape": { - "type": "s64[]", - "value": [ - 1, - 1, - 1, - 128 - ] - }, - "axis": { - "type": "s64", - "value": 3 - } - }, - "inputs": [ - { - "id": 6, - "dtype": "s8", - "shape": [ - 1, - 32, - 32, - 128 - ], - "stride": [ - 131072, - 4096, - 128, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - }, - { - "id": 7, - "dtype": "f16", - "shape": [ - 1, - 32, - 32, - 1 - ], - "stride": [ - 4096, - 128, - 1, - 1 - ], - "layout_type": "strided", - "property_type": "undef" - }, - { - "id": 8, - "dtype": "s8", - "shape": [ - 1, - 32, - 32, - 1 - ], - "stride": [ - 4096, - 128, - 1, - 1 - ], - "layout_type": "strided", - "property_type": "undef" - } - ], - "outputs": [ - { - "id": 45, - "dtype": "f16", - "shape": [ - 1, - 32, - 32, - 128 - ], - "stride": [ - 131072, - 4096, - 128, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - } - ] - }, - { - "id": 34105676800, - "name": "aten::matmul", - "kind": "MatMul", - "attrs": { - "transpose_a": { - "type": "bool", - "value": 0 - }, - "transpose_b": { - "type": "bool", - "value": 0 - } - }, - "inputs": [ - { - "id": 27, - "dtype": "f16", - "shape": [ - 1, - 32, - 32, - 32 - ], - "stride": [ - 32768, - 1024, - 32, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - }, - { - "id": 45, - "dtype": "f16", - "shape": [ - 1, - 32, - 32, - 128 - ], - "stride": [ - 131072, - 4096, - 128, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - } - ], - "outputs": [ - { - "id": 50, - "dtype": "f16", - "shape": [ - 1, - 32, - 32, - 128 - ], - "stride": [ - 131072, - 4096, - 128, - 1 - ], - "layout_type": "strided", - "property_type": "variable" - } - ] - } - ] -} -