refactor(marlin): prepare the introduciton of int4 kernel

huggingface · Sep 20, 2024 · c23f155 · c23f155
1 parent d184901
commit c23f155
Show file tree

Hide file tree

Showing 4 changed files with 8 additions and 4 deletions.
diff --git a/optimum/quanto/tensor/weights/marlin/__init__.py b/optimum/quanto/tensor/weights/marlin/__init__.py
@@ -1 +1 @@
-from .marlin import MarlinF8QBytesTensor
+from .fp8 import *
diff --git a/optimum/quanto/tensor/weights/marlin/fp8/__init__.py b/optimum/quanto/tensor/weights/marlin/fp8/__init__.py
@@ -0,0 +1 @@
+from .qbits import *
diff --git a/...um/quanto/tensor/weights/marlin/packed.py → ...uanto/tensor/weights/marlin/fp8/packed.py b/...um/quanto/tensor/weights/marlin/packed.py → ...uanto/tensor/weights/marlin/fp8/packed.py
diff --git a/...um/quanto/tensor/weights/marlin/marlin.py → ...quanto/tensor/weights/marlin/fp8/qbits.py b/...um/quanto/tensor/weights/marlin/marlin.py → ...quanto/tensor/weights/marlin/fp8/qbits.py
@@ -16,12 +16,15 @@
 
 import torch
 
-from ...function import QuantizedLinearFunction
-from ...qtype import qfloat8_e4m3fn, qtypes
-from ..qbytes import WeightQBytesTensor
+from ....function import QuantizedLinearFunction
+from ....qtype import qfloat8_e4m3fn, qtypes
+from ...qbytes import WeightQBytesTensor
 from .packed import MarlinF8PackedTensor, get_scale_perms
 
 
+__all__ = ["MarlinF8QBytesTensor"]
+
+
 class MarlinF8QBytesLinearFunction(QuantizedLinearFunction):
     @staticmethod
     def forward(ctx, input, other, bias=None):