diff --git a/examples/llama7b_one_shot_quantization.md b/examples/llama7b_one_shot_quantization.md index d3ee50e1aa..af64489706 100644 --- a/examples/llama7b_one_shot_quantization.md +++ b/examples/llama7b_one_shot_quantization.md @@ -23,7 +23,8 @@ from sparseml.modifiers.quantization.gptq import GPTQModifier gptq = GPTQModifier( targets="Linear", - scheme="W4A16" + scheme="W4A16", + ignore=["lm_head"], ) ``` diff --git a/examples/llama7b_sparse_quantized/2:4_w4a16_group-128_recipe.yaml b/examples/llama7b_sparse_quantized/2:4_w4a16_group-128_recipe.yaml index aeddebb8cb..6f35f51139 100644 --- a/examples/llama7b_sparse_quantized/2:4_w4a16_group-128_recipe.yaml +++ b/examples/llama7b_sparse_quantized/2:4_w4a16_group-128_recipe.yaml @@ -23,6 +23,7 @@ quantization_stage: run_type: oneshot quantization_modifiers: GPTQModifier: + ignore: [ "lm_head" ] sequential_update: false config_groups: group_0: