fix(qbytes_mm): reshape input

huggingface · Oct 1, 2024 · 4121d1e · 4121d1e
1 parent 3303f71
commit 4121d1e
Showing 1 changed file with 5 additions and 1 deletion.
diff --git a/optimum/quanto/tensor/weights/qbytes.py b/optimum/quanto/tensor/weights/qbytes.py
@@ -72,7 +72,11 @@ def forward(ctx, input, other, bias=None):
         if isinstance(input, QBytesTensor):
             output = torch.ops.quanto.qbytes_mm(input._data, other._data, input._scale * other._scale)
         else:
-            output = torch.ops.quanto.qbytes_mm(input, other._data, other._scale)
+            in_features = input.shape[-1]
+            out_features = other.shape[0]
+            output_shape = input.shape[:-1] + (out_features,)
+            output = torch.ops.quanto.qbytes_mm(input.view(-1, in_features), other._data, other._scale)
+            output = output.view(output_shape)
         if bias is not None:
             output = output + bias
         return output