feat(optimizers): add HQQ optimizer

huggingface · Jul 10, 2024 · 9dc93ce · 9dc93ce
1 parent b8aeab4
commit 9dc93ce
Show file tree

Hide file tree

Showing 3 changed files with 141 additions and 0 deletions.
diff --git a/optimum/quanto/tensor/optimizers/__init__.py b/optimum/quanto/tensor/optimizers/__init__.py
@@ -14,6 +14,7 @@
 
 from .absmax_optimizer import *
 from .affine_optimizer import *
+from .hqq_optimizer import *
 from .max_optimizer import *
 from .optimizer import *
 from .symmetric_optimizer import *
diff --git a/optimum/quanto/tensor/optimizers/hqq_optimizer.py b/optimum/quanto/tensor/optimizers/hqq_optimizer.py
@@ -0,0 +1,88 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Tuple, Union
+
+import torch
+
+from ..qtype import qint2, qint4
+from ..quantizers import AffineQuantizer
+from .max_optimizer import MaxOptimizer
+
+
+__all__ = ["HqqOptimizer"]
+
+
+# Shrinking operator
+def shrink_lp_op(x: torch.Tensor, beta: float, lp_norm: float) -> torch.Tensor:
+    if lp_norm == 1:
+        return torch.sign(x) * torch.nn.functional.relu(torch.abs(x) - 1.0 / beta)
+    else:
+        return torch.sign(x) * torch.nn.functional.relu(
+            torch.abs(x) - (1.0 / beta) * torch.pow(torch.abs(x), lp_norm - 1)
+        )
+
+
+class HqqOptimizer(MaxOptimizer):
+    """Implementation of the HQQ algorithm
+
+    This is an implementation of the algorithm described in "Half-Quadratic Quantization of Large Machine Learning Models",
+    by Hicham Badri and Appu Shaji (https://mobiusml.github.io/hqq_blog/).
+    This is an adaption of the original implementation at https://github.com/mobiusml/hqq.
+
+    """
+
+    def __init__(
+        self,
+        lp_norm: Optional[float] = 0.7,
+        beta: Optional[int] = 1e1,
+        kappa: Optional[float] = 1.01,
+        iters: Optional[int] = 20,
+        verbose: Optional[bool] = False,
+    ) -> None:
+        self.lp_norm = lp_norm
+        self.beta = beta
+        self.kappa = kappa
+        self.iters = iters
+        self.verbose = verbose
+
+    def optimize(
+        self, base: torch.Tensor, bits: int, axis: int
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+        scale, shift = super().optimize(base, bits, axis)
+        best_error = None
+        beta = self.beta
+        qtype = qint2 if bits == 2 else qint4
+        base_q = AffineQuantizer.apply(base, qtype, axis, None, scale, shift)
+        for i in range(self.iters):
+            error = base - base_q
+            if best_error is None:
+                best_error = float(torch.abs(base - base_q).mean())
+                if self.verbose:
+                    print(f"Start error: {best_error:.6f}")
+            e = shrink_lp_op(error, beta, self.lp_norm)
+            mean_axis = 0 if axis == -1 else -1
+            hqq_shift = torch.mean(base_q._data * scale - (base - e), axis=mean_axis, keepdim=True)
+            base_q = AffineQuantizer.apply(base, qtype, axis, None, scale, hqq_shift)
+            mean_error = float(torch.abs(base - base_q).mean())
+            if self.verbose:
+                print(f"HQQ error at it #{i}: {mean_error:.6f}")
+            if mean_error < best_error:
+                best_error = mean_error
+                shift = hqq_shift
+                beta *= self.kappa
+            else:
+                break
+
+        return scale, shift
diff --git a/test/tensor/optimizers/test_hqq_optimizer.py b/test/tensor/optimizers/test_hqq_optimizer.py
@@ -0,0 +1,52 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import torch
+from helpers import random_tensor
+
+from optimum.quanto import (
+    AffineQuantizer,
+    HqqOptimizer,
+    MaxOptimizer,
+    qint2,
+    qint4,
+)
+
+
+def compare_quantized_tensor(a, qtype, axis, group_size, scale, shift):
+    qa = AffineQuantizer.apply(a, qtype, axis, group_size, scale, shift)
+    # Evaluate mean absolute error
+    mean_error = torch.mean(torch.abs(a - qa))
+    # Also evaluate cosine similarity
+    sim = torch.nn.functional.cosine_similarity(a.flatten(), qa.flatten(), dim=0)
+    return mean_error, sim
+
+
+@pytest.mark.parametrize("input_shape", [(1024, 1024), (1024, 10, 1024)])
+@pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16], ids=["bf16", "fp16"])
+@pytest.mark.parametrize("qtype", [qint2, qint4], ids=["qint2", "qint4"])
+@pytest.mark.parametrize("axis", [0, -1], ids=["first-axis", "last-axis"])
+@pytest.mark.parametrize("group_size", [32, 64, 128])
+def test_hqq_optimizer(input_shape, dtype, qtype, axis, group_size, device):
+    a = random_tensor(input_shape, dtype=dtype).to(device)
+    max_scale, max_shift = MaxOptimizer()(a, bits=qtype.bits, axis=axis, group_size=group_size)
+    max_mean_error, max_sim = compare_quantized_tensor(a, qtype, axis, group_size, max_scale, max_shift)
+    hqq_scale, hqq_shift = HqqOptimizer(verbose=True)(a, bits=qtype.bits, axis=axis, group_size=group_size)
+    hqq_mean_error, hqq_sim = compare_quantized_tensor(a, qtype, axis, group_size, hqq_scale, hqq_shift)
+    # HQQ optimizes the mean error, so it should be lower
+    assert hqq_mean_error <= max_mean_error
+    # HQQ cosine similarity should be also closer to 1
+    print(max_sim, hqq_sim)
+    # assert torch.abs(1 - hqq_sim) <= torch.abs(1 - max_sim)