-
Notifications
You must be signed in to change notification settings - Fork 241
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Solver] Check all output branches are quantized before merging quant…
…izers (#2854) ### Changes Quantizer merge logic updated to check that all output branches are quantized before quantizers merging and propagating up. ### Reason for changes To prevent merging of quantizers in case of ScaledDotProductAttention op, which should have quantizers on [0, 1] input ports and shouldn't have a quantizer on the 3 input port. ### Related tickets 148211 #2766 ### Tests * Common solver test for ScaleDotProductAttention branch merging and quantization initialization * Graph tests for torch/ov backends
- Loading branch information
1 parent
63fcb15
commit ef49c75
Showing
9 changed files
with
242 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
88 changes: 59 additions & 29 deletions
88
...s/openvino/native/data/2024.1/reference_graphs/quantized/scaled_dot_product_attention.dot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,63 @@ | ||
strict digraph { | ||
"0 Input_1" [id=0, type=Parameter]; | ||
"1 Input_2" [id=1, type=Parameter]; | ||
"2 Input_3" [id=2, type=Parameter]; | ||
"3 Input_4" [id=3, type=Parameter]; | ||
"4 Input_1/fq_output_0" [id=4, type=FakeQuantize]; | ||
"5 Input_2/fq_output_0" [id=5, type=FakeQuantize]; | ||
"6 ScaledDotProductAttention_5" [id=6, type=ScaledDotProductAttention]; | ||
"7 Result" [id=7, type=Result]; | ||
"8 Constant_2553" [id=8, type=Constant]; | ||
"9 Constant_2552" [id=9, type=Constant]; | ||
"10 Constant_2551" [id=10, type=Constant]; | ||
"11 Constant_2550" [id=11, type=Constant]; | ||
"12 Constant_2548" [id=12, type=Constant]; | ||
"13 Constant_2547" [id=13, type=Constant]; | ||
"14 Constant_2546" [id=14, type=Constant]; | ||
"15 Constant_2545" [id=15, type=Constant]; | ||
"0 Input_1" -> "4 Input_1/fq_output_0" [label="[1, 1, 1, 64]", style=solid]; | ||
"1 Input_2" -> "5 Input_2/fq_output_0" [label="[1, 1, 1, 64]", style=solid]; | ||
"2 Input_3" -> "6 ScaledDotProductAttention_5" [label="[1, 1, 1, 64]", style=solid]; | ||
"3 Input_4" -> "6 ScaledDotProductAttention_5" [label="[1, 1, 1, 1]", style=solid]; | ||
"4 Input_1/fq_output_0" -> "6 ScaledDotProductAttention_5" [label="[1, 1, 1, 64]", style=solid]; | ||
"5 Input_2/fq_output_0" -> "6 ScaledDotProductAttention_5" [label="[1, 1, 1, 64]", style=solid]; | ||
"6 ScaledDotProductAttention_5" -> "7 Result" [label="[1, 1, 1, 64]", style=solid]; | ||
"8 Constant_2553" -> "5 Input_2/fq_output_0" [label="[]", style=solid]; | ||
"9 Constant_2552" -> "5 Input_2/fq_output_0" [label="[]", style=solid]; | ||
"10 Constant_2551" -> "5 Input_2/fq_output_0" [label="[]", style=solid]; | ||
"11 Constant_2550" -> "5 Input_2/fq_output_0" [label="[]", style=solid]; | ||
"12 Constant_2548" -> "4 Input_1/fq_output_0" [label="[]", style=solid]; | ||
"13 Constant_2547" -> "4 Input_1/fq_output_0" [label="[]", style=solid]; | ||
"14 Constant_2546" -> "4 Input_1/fq_output_0" [label="[]", style=solid]; | ||
"15 Constant_2545" -> "4 Input_1/fq_output_0" [label="[]", style=solid]; | ||
"2 Reshape_3835" [id=2, type=Reshape]; | ||
"3 ScaledDotProductAttention_3850" [id=3, type=ScaledDotProductAttention]; | ||
"4 Reshape_3837" [id=4, type=Reshape]; | ||
"5 Result" [id=5, type=Result]; | ||
"6 Reshape_3839/fq_input_0" [id=6, type=FakeQuantize]; | ||
"7 Reshape_3843/fq_input_0" [id=7, type=FakeQuantize]; | ||
"8 Reshape_3847" [id=8, type=Reshape]; | ||
"9 Reshape_3839" [id=9, type=Reshape]; | ||
"10 Reshape_3843" [id=10, type=Reshape]; | ||
"11 Reshape_3849" [id=11, type=Reshape]; | ||
"12 Reshape_3841" [id=12, type=Reshape]; | ||
"13 Reshape_3845" [id=13, type=Reshape]; | ||
"14 Constant_3848" [id=14, type=Constant]; | ||
"15 Constant_3846" [id=15, type=Constant]; | ||
"16 Constant_3836" [id=16, type=Constant]; | ||
"17 Constant_3834" [id=17, type=Constant]; | ||
"18 Constant_3844" [id=18, type=Constant]; | ||
"19 Constant_3842" [id=19, type=Constant]; | ||
"20 Reshape_3843/fq_input_0/output_high" [id=20, type=Constant]; | ||
"21 Reshape_3843/fq_input_0/output_low" [id=21, type=Constant]; | ||
"22 Reshape_3843/fq_input_0/input_high" [id=22, type=Constant]; | ||
"23 Reshape_3843/fq_input_0/input_low" [id=23, type=Constant]; | ||
"24 Constant_3840" [id=24, type=Constant]; | ||
"25 Constant_3838" [id=25, type=Constant]; | ||
"26 Reshape_3839/fq_input_0/output_high" [id=26, type=Constant]; | ||
"27 Reshape_3839/fq_input_0/output_low" [id=27, type=Constant]; | ||
"28 Reshape_3839/fq_input_0/input_high" [id=28, type=Constant]; | ||
"29 Reshape_3839/fq_input_0/input_low" [id=29, type=Constant]; | ||
"0 Input_1" -> "2 Reshape_3835" [label="[1, 1, 1, 64]", style=solid]; | ||
"1 Input_2" -> "3 ScaledDotProductAttention_3850" [label="[1, 1, 1, 1]", style=solid]; | ||
"2 Reshape_3835" -> "4 Reshape_3837" [label="[64]", style=solid]; | ||
"3 ScaledDotProductAttention_3850" -> "5 Result" [label="[1, 1, 1, 64]", style=solid]; | ||
"4 Reshape_3837" -> "6 Reshape_3839/fq_input_0" [label="[1, 1, 1, 64]", style=solid]; | ||
"4 Reshape_3837" -> "7 Reshape_3843/fq_input_0" [label="[1, 1, 1, 64]", style=solid]; | ||
"4 Reshape_3837" -> "8 Reshape_3847" [label="[1, 1, 1, 64]", style=solid]; | ||
"6 Reshape_3839/fq_input_0" -> "9 Reshape_3839" [label="[1, 1, 1, 64]", style=solid]; | ||
"7 Reshape_3843/fq_input_0" -> "10 Reshape_3843" [label="[1, 1, 1, 64]", style=solid]; | ||
"8 Reshape_3847" -> "11 Reshape_3849" [label="[64]", style=solid]; | ||
"9 Reshape_3839" -> "12 Reshape_3841" [label="[64]", style=solid]; | ||
"10 Reshape_3843" -> "13 Reshape_3845" [label="[64]", style=solid]; | ||
"11 Reshape_3849" -> "3 ScaledDotProductAttention_3850" [label="[1, 1, 1, 64]", style=solid]; | ||
"12 Reshape_3841" -> "3 ScaledDotProductAttention_3850" [label="[1, 1, 1, 64]", style=solid]; | ||
"13 Reshape_3845" -> "3 ScaledDotProductAttention_3850" [label="[1, 1, 1, 64]", style=solid]; | ||
"14 Constant_3848" -> "11 Reshape_3849" [label="[4]", style=dashed]; | ||
"15 Constant_3846" -> "8 Reshape_3847" [label="[1]", style=dashed]; | ||
"16 Constant_3836" -> "4 Reshape_3837" [label="[4]", style=dashed]; | ||
"17 Constant_3834" -> "2 Reshape_3835" [label="[1]", style=dashed]; | ||
"18 Constant_3844" -> "13 Reshape_3845" [label="[4]", style=dashed]; | ||
"19 Constant_3842" -> "10 Reshape_3843" [label="[1]", style=dashed]; | ||
"20 Reshape_3843/fq_input_0/output_high" -> "7 Reshape_3843/fq_input_0" [label="[]", style=solid]; | ||
"21 Reshape_3843/fq_input_0/output_low" -> "7 Reshape_3843/fq_input_0" [label="[]", style=solid]; | ||
"22 Reshape_3843/fq_input_0/input_high" -> "7 Reshape_3843/fq_input_0" [label="[]", style=solid]; | ||
"23 Reshape_3843/fq_input_0/input_low" -> "7 Reshape_3843/fq_input_0" [label="[]", style=solid]; | ||
"24 Constant_3840" -> "12 Reshape_3841" [label="[4]", style=dashed]; | ||
"25 Constant_3838" -> "9 Reshape_3839" [label="[1]", style=dashed]; | ||
"26 Reshape_3839/fq_input_0/output_high" -> "6 Reshape_3839/fq_input_0" [label="[]", style=solid]; | ||
"27 Reshape_3839/fq_input_0/output_low" -> "6 Reshape_3839/fq_input_0" [label="[]", style=solid]; | ||
"28 Reshape_3839/fq_input_0/input_high" -> "6 Reshape_3839/fq_input_0" [label="[]", style=solid]; | ||
"29 Reshape_3839/fq_input_0/input_low" -> "6 Reshape_3839/fq_input_0" [label="[]", style=solid]; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.