diff --git a/src/sparseml/modifiers/quantization/gptq/base.py b/src/sparseml/modifiers/quantization/gptq/base.py index b91fa2dad60..7f1ca823cc7 100644 --- a/src/sparseml/modifiers/quantization/gptq/base.py +++ b/src/sparseml/modifiers/quantization/gptq/base.py @@ -68,6 +68,10 @@ class GPTQModifier(Modifier): not be updated. Leave None to not disable observers during QAT. Default is None :param num_calibration_steps: Number of steps to run post training calibration for. When None, the entire calibration_dataloader is used + :param scheme: [Used, if a quantization modifier is not specified], the quantization + scheme to apply to the model, this is a dictionary that supports all keys from + QuantizationScheme except targets, which will be set to the targets parameter + set at the modifier level. """ sequential_update: Optional[bool] = False @@ -79,6 +83,7 @@ class GPTQModifier(Modifier): ignore: List[str] = Field(default_factory=list) disable_quantization_observer_epoch: Optional[float] = None num_calibration_steps: Optional[int] = None + scheme: Optional[Dict[str, Any]] = None compressible_layers_: Optional[List] = None quantization_modifier_: Any = None @@ -156,6 +161,14 @@ def _build_quant_modifier(self, framework): if getattr(self, key, False) } + if self.scheme is not None: + # takes precedence over config_groups + targets = self.targets or ["Linear"] + config_group = QuantizationScheme.model_validate( + {"targets": targets, **self.scheme} + ) + quant_args["config_groups"] = {"config_group_0": config_group} + if "config_groups" not in quant_args: default_quant_scheme = QuantizationScheme.default_scheme( targets=self.targets