Skip to content

Commit

Permalink
Fix use_hqq for int4_weight_only quantize (#1707)
Browse files Browse the repository at this point in the history
Fix HQQ call for int4_weight_only quantize
  • Loading branch information
jainapurva authored Feb 13, 2025
1 parent d3306b2 commit dff29c0
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions torchao/_models/llama/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,9 @@ def ffn_or_attn_only(mod, fqn):
else:
quantize_(model, int8_dynamic_activation_int8_weight())
if "int4wo" in quantization:
use_hqq = False
if "hqq" in quantization:
use_hqq = True
else:
use_hqq = False
group_size = int(quantization.split("-")[1])
assert (
group_size
Expand All @@ -434,7 +433,7 @@ def ffn_or_attn_only(mod, fqn):
256,
]
), f"int4wo group_size needs to be one of [32,64,128,256] but got {group_size}"
quantize_(model, int4_weight_only(group_size=group_size))
quantize_(model, int4_weight_only(group_size=group_size, use_hqq=use_hqq))
elif "int8adq-int4w-symm" in quantization:
from torchao.dtypes import CutlassInt4PackedLayout

Expand Down

0 comments on commit dff29c0

Please sign in to comment.