Skip to content

Commit

Permalink
refactor(marlin): prepare the introduciton of int4 kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
dacorvo committed Sep 20, 2024
1 parent d184901 commit c23f155
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 4 deletions.
2 changes: 1 addition & 1 deletion optimum/quanto/tensor/weights/marlin/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .marlin import MarlinF8QBytesTensor
from .fp8 import *
1 change: 1 addition & 0 deletions optimum/quanto/tensor/weights/marlin/fp8/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .qbits import *
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@

import torch

from ...function import QuantizedLinearFunction
from ...qtype import qfloat8_e4m3fn, qtypes
from ..qbytes import WeightQBytesTensor
from ....function import QuantizedLinearFunction
from ....qtype import qfloat8_e4m3fn, qtypes
from ...qbytes import WeightQBytesTensor
from .packed import MarlinF8PackedTensor, get_scale_perms


__all__ = ["MarlinF8QBytesTensor"]


class MarlinF8QBytesLinearFunction(QuantizedLinearFunction):
@staticmethod
def forward(ctx, input, other, bias=None):
Expand Down

0 comments on commit c23f155

Please sign in to comment.