fix: remove extension load on unsupported system

huggingface · Jul 29, 2024 · 07a5652 · 07a5652
1 parent efdd147
commit 07a5652
Show file tree

Hide file tree

Showing 6 changed files with 6 additions and 13 deletions.
diff --git a/optimum/quanto/library/extensions/extension.py b/optimum/quanto/library/extensions/extension.py
@@ -23,9 +23,6 @@ def __init__(
         self.build_directory = os.path.join(root_dir, "build")
         self._lib = None
 
-        # There is no reason not to build ahead of runtime.
-        tmp = self.lib  # noqa
-
     @property
     def lib(self):
         if self._lib is None:

diff --git a/optimum/quanto/library/ops.py b/optimum/quanto/library/ops.py
@@ -18,6 +18,12 @@
 import torch
 
 
+if torch.cuda.is_available():
+    from .extensions.cuda import ext
+
+    # This is required to be able to access `torch.ops.quanto_ext.*` members defined in C++ through `TORCH_LIBRARY`.
+    _ = ext.lib
+
 # This file contains the definitions of all operations under torch.ops.quanto
 
 

diff --git a/optimum/quanto/nn/qlinear.py b/optimum/quanto/nn/qlinear.py
@@ -16,9 +16,6 @@
 
 import torch
 
-# This is required to be able to access `torch.ops.quanto_ext.*` members defined in C++ through `TORCH_LIBRARY`.
-from optimum.quanto.library.extensions.cuda import ext  # noqa: F401
-
 from ..tensor import Optimizer, QBytesTensor, qtype
 from ..tensor.qbits.awq.qbits import AWQBitsTensor
 from ..tensor.qbits.tinygemm.qbits import TinyGemmQBitsTensor

diff --git a/optimum/quanto/tensor/qbytes.py b/optimum/quanto/tensor/qbytes.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 
-import torch
 from torch.autograd import Function
 
 from .qtensor import QTensor

diff --git a/optimum/quanto/tensor/weights/marlin/packed.py b/optimum/quanto/tensor/weights/marlin/packed.py
@@ -17,9 +17,6 @@
 import torch
 from torch.utils import _pytree as pytree
 
-# This is required to be able to access `torch.ops.quanto_ext.*` members defined in C++ through `TORCH_LIBRARY`.
-from optimum.quanto.library.extensions.cuda import ext  # noqa: F401
-
 
 def pack_fp8_as_int32(fp8_tensor: torch.Tensor) -> torch.Tensor:
     """

diff --git a/test/library/test_mm.py b/test/library/test_mm.py
@@ -103,9 +103,6 @@ def test_gemm_fp16_int4(batch_size, tokens, in_features, out_features):
 @pytest.mark.parametrize("in_features, out_features", [(256, 1024), (512, 2048)])
 @pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16], ids=["bf16", "fp16"])
 def test_fp8_marlin(tokens, in_features, out_features, dtype):
-    # This is required to be able to access `torch.ops.quanto_ext.*` members defined in C++ through `TORCH_LIBRARY`.
-    from optimum.quanto.library.extensions.cuda import ext  # noqa: F401
-
     device = torch.device("cuda")
     input_shape = (tokens, in_features)
     inputs = torch.rand(input_shape, dtype=dtype, device=device)