diff --git a/python/perf-kernels/flash-attention.py b/python/perf-kernels/flash-attention.py index a6d5c16eed9f..ff4ec00e30e6 100644 --- a/python/perf-kernels/flash-attention.py +++ b/python/perf-kernels/flash-attention.py @@ -1211,7 +1211,6 @@ def test_op_fwd(Z, HQ, HK, N_CTX_Q, N_CTX_K, D_HEAD, causal, use_alibi, layout, (4, 12, 8192, 8192, 64), (2, 4, 16384, 16384, 128), (2, 16, 1020, 987, 128), - (2, 16, 15498, 2, 128), (2, 4, 7, 16219, 64), (4, 48, 1, 1, 64), (4, 48, 1, 1, 128), @@ -1223,7 +1222,8 @@ def test_op_fwd(Z, HQ, HK, N_CTX_Q, N_CTX_K, D_HEAD, causal, use_alibi, layout, (4, 4, 65, 1019, 65), (4, 4, 128, 128, 65), # TODO: This config fails. Disabled until triaged and fixed. - # (4, 4, 113, 123, 1), + # (4, 4, 113, 123, 1), + # (2, 16, 15498, 2, 128), ]) @pytest.mark.parametrize('causal', [True, False]) @pytest.mark.parametrize('use_bias', [True])