diff --git a/src/sparseml/modifiers/quantization/base.py b/src/sparseml/modifiers/quantization/base.py index e66f5b9ea72..9b9f1569f09 100644 --- a/src/sparseml/modifiers/quantization/base.py +++ b/src/sparseml/modifiers/quantization/base.py @@ -17,17 +17,17 @@ from sparseml.core import Event, Modifier -__all__ = ["QuantizationModifier"] +__all__ = ["LegacyQuantizationModifier"] -class QuantizationModifier(Modifier): +class LegacyQuantizationModifier(Modifier): """ Enables quantization aware training (QAT) for a given module or its submodules After the start epoch, the specified module(s) forward pass will emulate quantized execution and the modifier will be enabled until training is completed. | Sample yaml: - | QuantizationModifier: + | LegacyQuantizationModifier: | start: 0.0 | scheme: | input_activations: diff --git a/src/sparseml/modifiers/quantization/gptq/base.py b/src/sparseml/modifiers/quantization/gptq/base.py index cb0023d1919..004fce2ee7a 100644 --- a/src/sparseml/modifiers/quantization/gptq/base.py +++ b/src/sparseml/modifiers/quantization/gptq/base.py @@ -194,7 +194,7 @@ def _build_quant_modifier(self, framework): ) quant_args["config_groups"] = {"config_group_0": default_quant_scheme} _LOGGER.info(f"Building quantization modifier with args: {quant_args}") - vllm_quant_config = {"vLLMQuantizationModifier": quant_args} + vllm_quant_config = {"QuantizationModifier": quant_args} self._build_quant_modifier_from_dict(vllm_quant_config, framework) def compressible_layers(self) -> Dict: diff --git a/src/sparseml/modifiers/quantization/pytorch.py b/src/sparseml/modifiers/quantization/pytorch.py index 927d8db79d3..0bedd489e9d 100644 --- a/src/sparseml/modifiers/quantization/pytorch.py +++ b/src/sparseml/modifiers/quantization/pytorch.py @@ -19,7 +19,7 @@ from torch.nn import Module from sparseml.core import Event, EventType, State -from sparseml.modifiers.quantization.base import QuantizationModifier +from sparseml.modifiers.quantization.base import LegacyQuantizationModifier from sparseml.modifiers.quantization.modification import modify_model from sparseml.modifiers.quantization.utils.helpers import ( configure_module_bn_wrappers, @@ -42,7 +42,7 @@ _LOGGER = logging.getLogger(__name__) -class QuantizationModifierPyTorch(QuantizationModifier): +class LegacyQuantizationModifierPyTorch(LegacyQuantizationModifier): """ Pytorch-specific implementation of quantization modifier diff --git a/src/sparseml/modifiers/quantization_vllm/base.py b/src/sparseml/modifiers/quantization_vllm/base.py index c8b2522ecee..e6af6485aa3 100644 --- a/src/sparseml/modifiers/quantization_vllm/base.py +++ b/src/sparseml/modifiers/quantization_vllm/base.py @@ -24,10 +24,10 @@ from sparseml.core import Event, Modifier -__all__ = ["vLLMQuantizationModifier"] +__all__ = ["QuantizationModifier"] -class vLLMQuantizationModifier(Modifier): +class QuantizationModifier(Modifier): """ Enables post training quantization (PTQ) and quantization aware training (QAT) for a given module or its submodules. After calibration (PTQ) or the start epoch (QAT), diff --git a/src/sparseml/modifiers/quantization_vllm/pytorch.py b/src/sparseml/modifiers/quantization_vllm/pytorch.py index a6e7f179525..a6b5e1bc288 100644 --- a/src/sparseml/modifiers/quantization_vllm/pytorch.py +++ b/src/sparseml/modifiers/quantization_vllm/pytorch.py @@ -23,16 +23,16 @@ set_module_for_calibration, ) from sparseml.core import Event, EventType, State -from sparseml.modifiers.quantization_vllm.base import vLLMQuantizationModifier +from sparseml.modifiers.quantization_vllm.base import QuantizationModifier from sparseml.modifiers.utils.pytorch_helpers import run_calibration_forward _LOGGER = logging.getLogger(__name__) -class vLLMQuantizationModifierPyTorch(vLLMQuantizationModifier): +class QuantizationModifierPyTorch(QuantizationModifier): """ - PyTorch specific implementation of vLLMQuantizationModifier + PyTorch specific implementation of QuantizationModifier Enables post training quantization (PTQ) and quantization aware training (QAT) for a given module or its submodules. After calibration (PTQ) or the start epoch (QAT), diff --git a/src/sparseml/transformers/sparsification/compressed_tensors_utils.py b/src/sparseml/transformers/sparsification/compressed_tensors_utils.py index c62a1eb9bf9..0a7e914393c 100644 --- a/src/sparseml/transformers/sparsification/compressed_tensors_utils.py +++ b/src/sparseml/transformers/sparsification/compressed_tensors_utils.py @@ -83,10 +83,10 @@ def save_pretrained_wrapper( # check if we are in the old quantization framework if qat_active(model) and not is_model_quantized(model): _LOGGER.info( - "Compression for models quantized with QuantizationModifer is not " - "supported. Save will be run without compression and no sparsity " - "statistics will be calculated. To save a quantized model in a " - "compressed state please use vLLMQuantizationModifier instead." + "Compression for models quantized with LegacyQuantizationModifer " + "is not supported. Save will be run without compression and no " + "sparsity statistics will be calculated. To save a quantized model " + "in a compressed state please use QuantizationModifier instead." ) original_save_pretrained.__get__(model, model_class)( diff --git a/tests/sparseml/modifiers/quantization/test_base.py b/tests/sparseml/modifiers/quantization/test_base.py index 064d8dcb671..d0bd316c534 100644 --- a/tests/sparseml/modifiers/quantization/test_base.py +++ b/tests/sparseml/modifiers/quantization/test_base.py @@ -19,7 +19,7 @@ from sparseml.core.event import Event from sparseml.core.factory import ModifierFactory from sparseml.core.framework import Framework -from sparseml.modifiers.quantization import QuantizationModifier +from sparseml.modifiers.quantization import LegacyQuantizationModifier from tests.sparseml.modifiers.conf import setup_modifier_factory @@ -31,14 +31,14 @@ def setUp(self): def test_quantization_registered(self): quant_obj = ModifierFactory.create( - type_="QuantizationModifier", + type_="LegacyQuantizationModifier", framework=Framework.general, allow_experimental=False, allow_registered=True, **self.kwargs, ) - self.assertIsInstance(quant_obj, QuantizationModifier) + self.assertIsInstance(quant_obj, LegacyQuantizationModifier) @pytest.mark.unit @@ -52,7 +52,7 @@ def setUp(self): def test_end_epochs(self): disable_quant_epoch, freeze_bn_epoch = None, None - obj_modifier = QuantizationModifier( + obj_modifier = LegacyQuantizationModifier( start=self.start, scheme=self.scheme, disable_quantization_observer_epoch=disable_quant_epoch, @@ -68,7 +68,7 @@ def test_end_epochs(self): assert not obj_modifier.check_should_freeze_bn_stats(event) disable_quant_epoch, freeze_bn_epoch = 3.5, 5.0 - obj_modifier = QuantizationModifier( + obj_modifier = LegacyQuantizationModifier( start=self.start, scheme=self.scheme, disable_quantization_observer_epoch=disable_quant_epoch, diff --git a/tests/sparseml/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py b/tests/sparseml/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py index e52b6e2ef23..7f962d5b017 100644 --- a/tests/sparseml/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py +++ b/tests/sparseml/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py @@ -21,8 +21,8 @@ from sparseml.core.model import ModifiableModel from sparseml.modifiers.obcq.pytorch import SparseGPTModifierPyTorch from sparseml.modifiers.quantization.gptq.pytorch import GPTQModifierPyTorch -from sparseml.modifiers.quantization.pytorch import QuantizationModifierPyTorch -from sparseml.modifiers.quantization_vllm.base import vLLMQuantizationModifier +from sparseml.modifiers.quantization.pytorch import LegacyQuantizationModifierPyTorch +from sparseml.modifiers.quantization_vllm.base import QuantizationModifier from tests.sparseml.modifiers.conf import LifecyleTestingHarness, setup_modifier_factory from tests.sparseml.pytorch.helpers import LinearNet from tests.testing_utils import requires_torch @@ -92,13 +92,13 @@ def test_create_default_quant_modifier(self): testing_harness = LifecyleTestingHarness(model=LinearNet()) modifier.on_initialize_structure(testing_harness.get_state()) assert modifier.quantize - assert isinstance(modifier.quantization_modifier_, vLLMQuantizationModifier) + assert isinstance(modifier.quantization_modifier_, QuantizationModifier) default_config_group_name = "config_group_0" should_be_default_quant_scheme = modifier.quantization_modifier_.config_groups[ default_config_group_name ] self.assertEqual(should_be_default_quant_scheme.input_activations.num_bits, 8) - # input activations are symmetric by default in vLLMQuantizationModifier + # input activations are symmetric by default in QuantizationModifier assert should_be_default_quant_scheme.input_activations.symmetric self.assertEqual(should_be_default_quant_scheme.weights.num_bits, 8) @@ -120,7 +120,7 @@ def test_set_quant_if_modifer_already_exists(self): ), ) - modifier = QuantizationModifierPyTorch(**kwargs) + modifier = LegacyQuantizationModifierPyTorch(**kwargs) testing_harness = LifecyleTestingHarness(model=model, start=-1) assert not testing_harness.get_state().model.qat_active() @@ -159,7 +159,7 @@ def setUp(self): } } } - self.quant_config = {"vLLMQuantizationModifier": self.quant_kwargs} + self.quant_config = {"QuantizationModifier": self.quant_kwargs} def test_set_quant_in_gptq(self): kwargs = dict(block_size=128, quantize=self.quant_config) @@ -170,7 +170,7 @@ def test_set_quant_in_gptq(self): testing_harness = LifecyleTestingHarness(model=LinearNet()) modifier.on_initialize_structure(testing_harness.get_state()) assert modifier.quantize - self.assertIsInstance(modifier.quantization_modifier_, vLLMQuantizationModifier) + self.assertIsInstance(modifier.quantization_modifier_, QuantizationModifier) dict_scheme = dict(modifier.quantization_modifier_.config_groups) self._check_config( diff --git a/tests/sparseml/pytorch/modifiers/quantization/test_pytorch.py b/tests/sparseml/pytorch/modifiers/quantization/test_pytorch.py index 6b258b884cb..b8ece5d4180 100644 --- a/tests/sparseml/pytorch/modifiers/quantization/test_pytorch.py +++ b/tests/sparseml/pytorch/modifiers/quantization/test_pytorch.py @@ -21,7 +21,7 @@ from sparseml.core.event import Event, EventType from sparseml.core.factory import ModifierFactory from sparseml.core.framework import Framework -from sparseml.modifiers.quantization.pytorch import QuantizationModifierPyTorch +from sparseml.modifiers.quantization.pytorch import LegacyQuantizationModifierPyTorch from sparseml.pytorch.sparsification.quantization.quantize import ( is_qat_helper_module, is_quantizable_module, @@ -45,14 +45,14 @@ def setUp(self): def test_quantization_registered(self): quant_obj = ModifierFactory.create( - type_="QuantizationModifier", + type_="LegacyQuantizationModifier", framework=Framework.pytorch, allow_experimental=False, allow_registered=True, **self.kwargs, ) - self.assertIsInstance(quant_obj, QuantizationModifierPyTorch) + self.assertIsInstance(quant_obj, LegacyQuantizationModifierPyTorch) @pytest.mark.unit @@ -71,7 +71,7 @@ def test_quantization_oneshot(self, model_class): state = State(framework=Framework.pytorch, start_event=Event()) state.update(model=model, start=-1) - modifier = QuantizationModifierPyTorch(**self.kwargs) + modifier = LegacyQuantizationModifierPyTorch(**self.kwargs) modifier.initialize(state) @@ -108,7 +108,7 @@ def setUp(self): def test_quantization_training(self, model_class): model = model_class() - modifier = QuantizationModifierPyTorch(**self.kwargs) + modifier = LegacyQuantizationModifierPyTorch(**self.kwargs) testing_harness = LifecyleTestingHarness(model=model) modifier.initialize(testing_harness.get_state()) diff --git a/tests/sparseml/transformers/compression/recipes/new_quant_channel.yaml b/tests/sparseml/transformers/compression/recipes/new_quant_channel.yaml index 48df197537c..2fa7af9d567 100644 --- a/tests/sparseml/transformers/compression/recipes/new_quant_channel.yaml +++ b/tests/sparseml/transformers/compression/recipes/new_quant_channel.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - vLLMQuantizationModifier: + QuantizationModifier: ignore: ["lm_head", "model.layers.0.mlp.down_proj"] config_groups: group_0: diff --git a/tests/sparseml/transformers/compression/recipes/new_quant_full.yaml b/tests/sparseml/transformers/compression/recipes/new_quant_full.yaml index 924dcd6e3f6..931f4e80ca5 100644 --- a/tests/sparseml/transformers/compression/recipes/new_quant_full.yaml +++ b/tests/sparseml/transformers/compression/recipes/new_quant_full.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - vLLMQuantizationModifier: + QuantizationModifier: ignore: ["lm_head", "model.layers.0.mlp.down_proj"] config_groups: group_0: diff --git a/tests/sparseml/transformers/compression/recipes/new_quant_simple.yaml b/tests/sparseml/transformers/compression/recipes/new_quant_simple.yaml index 753605fc1dd..b0c7051425d 100644 --- a/tests/sparseml/transformers/compression/recipes/new_quant_simple.yaml +++ b/tests/sparseml/transformers/compression/recipes/new_quant_simple.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - vLLMQuantizationModifier: + QuantizationModifier: ignore: ["lm_head"] config_groups: group_0: diff --git a/tests/sparseml/transformers/compression/recipes/new_quant_weight.yaml b/tests/sparseml/transformers/compression/recipes/new_quant_weight.yaml index 19b9d196e6a..34e0a77e052 100644 --- a/tests/sparseml/transformers/compression/recipes/new_quant_weight.yaml +++ b/tests/sparseml/transformers/compression/recipes/new_quant_weight.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - vLLMQuantizationModifier: + QuantizationModifier: ignore: ["lm_head", "model.layers.0.mlp.down_proj"] config_groups: group_0: diff --git a/tests/sparseml/transformers/compression/recipes/old_quant_channel.yaml b/tests/sparseml/transformers/compression/recipes/old_quant_channel.yaml index 350d07ce1c2..7d090943915 100644 --- a/tests/sparseml/transformers/compression/recipes/old_quant_channel.yaml +++ b/tests/sparseml/transformers/compression/recipes/old_quant_channel.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - model.layers.0.mlp.down_proj - lm_head diff --git a/tests/sparseml/transformers/compression/recipes/old_quant_full.yaml b/tests/sparseml/transformers/compression/recipes/old_quant_full.yaml index 9d67e334fef..2540787d8dd 100644 --- a/tests/sparseml/transformers/compression/recipes/old_quant_full.yaml +++ b/tests/sparseml/transformers/compression/recipes/old_quant_full.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - model.layers.0.mlp.down_proj - lm_head diff --git a/tests/sparseml/transformers/compression/recipes/old_quant_weight.yaml b/tests/sparseml/transformers/compression/recipes/old_quant_weight.yaml index 78e49595fe2..adab8340c2e 100644 --- a/tests/sparseml/transformers/compression/recipes/old_quant_weight.yaml +++ b/tests/sparseml/transformers/compression/recipes/old_quant_weight.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - model.layers.0.mlp.down_proj - lm_head diff --git a/tests/sparseml/transformers/finetune/test_quantization.yaml b/tests/sparseml/transformers/finetune/test_quantization.yaml index 89381c31006..eb2d4afdc39 100644 --- a/tests/sparseml/transformers/finetune/test_quantization.yaml +++ b/tests/sparseml/transformers/finetune/test_quantization.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm diff --git a/tests/sparseml/transformers/obcq/obcq_configs/repeat_quants/tiny_llama_repeat_quant.yaml b/tests/sparseml/transformers/obcq/obcq_configs/repeat_quants/tiny_llama_repeat_quant.yaml index 5bef2cae22d..a91b7b4d56a 100644 --- a/tests/sparseml/transformers/obcq/obcq_configs/repeat_quants/tiny_llama_repeat_quant.yaml +++ b/tests/sparseml/transformers/obcq/obcq_configs/repeat_quants/tiny_llama_repeat_quant.yaml @@ -5,7 +5,7 @@ dataset: open_platypus first_recipe: | first_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm @@ -17,7 +17,7 @@ first_recipe: | second_recipe: | second_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm diff --git a/tests/sparseml/transformers/obcq/obcq_configs/separate_quants/tiny_llama_separate_quant.yaml b/tests/sparseml/transformers/obcq/obcq_configs/separate_quants/tiny_llama_separate_quant.yaml index 1b7cab983f4..64a43cbd943 100644 --- a/tests/sparseml/transformers/obcq/obcq_configs/separate_quants/tiny_llama_separate_quant.yaml +++ b/tests/sparseml/transformers/obcq/obcq_configs/separate_quants/tiny_llama_separate_quant.yaml @@ -5,7 +5,7 @@ dataset: open_platypus first_recipe: | first_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm @@ -17,7 +17,7 @@ first_recipe: | second_recipe: | second_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm diff --git a/tests/sparseml/transformers/obcq/recipes/additional_sparsity_with_quant.yaml b/tests/sparseml/transformers/obcq/recipes/additional_sparsity_with_quant.yaml index 42538955b5e..72ca3c08fc7 100644 --- a/tests/sparseml/transformers/obcq/recipes/additional_sparsity_with_quant.yaml +++ b/tests/sparseml/transformers/obcq/recipes/additional_sparsity_with_quant.yaml @@ -6,7 +6,7 @@ test_stage: [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"], [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"] ] - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm diff --git a/tests/sparseml/transformers/obcq/recipes/quant.yaml b/tests/sparseml/transformers/obcq/recipes/quant.yaml index 756373fcf89..f5436b3873f 100644 --- a/tests/sparseml/transformers/obcq/recipes/quant.yaml +++ b/tests/sparseml/transformers/obcq/recipes/quant.yaml @@ -6,7 +6,7 @@ test_stage: [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"], [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"] ] - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm diff --git a/tests/sparseml/transformers/obcq/recipes/quant_and_sparse.yaml b/tests/sparseml/transformers/obcq/recipes/quant_and_sparse.yaml index b8c9f3451e0..198b32f0e3c 100644 --- a/tests/sparseml/transformers/obcq/recipes/quant_and_sparse.yaml +++ b/tests/sparseml/transformers/obcq/recipes/quant_and_sparse.yaml @@ -6,7 +6,7 @@ test_stage: [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"], [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"] ] - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm diff --git a/tests/sparseml/transformers/obcq/test_obcq_fake_quant_wrapper.py b/tests/sparseml/transformers/obcq/test_obcq_fake_quant_wrapper.py index 6fafab075b7..ea677db787f 100644 --- a/tests/sparseml/transformers/obcq/test_obcq_fake_quant_wrapper.py +++ b/tests/sparseml/transformers/obcq/test_obcq_fake_quant_wrapper.py @@ -37,7 +37,7 @@ def setUp(self): self.recipe = """ first_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - Embedding scheme_overrides: diff --git a/tests/sparseml/transformers/sparsification/modification/test_modifying_llama.py b/tests/sparseml/transformers/sparsification/modification/test_modifying_llama.py index 9091d28b29e..d1532378c27 100644 --- a/tests/sparseml/transformers/sparsification/modification/test_modifying_llama.py +++ b/tests/sparseml/transformers/sparsification/modification/test_modifying_llama.py @@ -23,7 +23,7 @@ def llama_recipe(): return """test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - MatMulRightInput_QK - MatMulLeftInput_QK diff --git a/tests/sparseml/transformers/sparsification/modification/test_modifying_mistral.py b/tests/sparseml/transformers/sparsification/modification/test_modifying_mistral.py index e71364a53e7..f47fafe0749 100644 --- a/tests/sparseml/transformers/sparsification/modification/test_modifying_mistral.py +++ b/tests/sparseml/transformers/sparsification/modification/test_modifying_mistral.py @@ -23,7 +23,7 @@ def mistral_recipe(): return """test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - MatMulRightInput_QK - MatMulLeftInput_QK diff --git a/tests/sparseml/transformers/sparsification/modification/test_modifying_opt.py b/tests/sparseml/transformers/sparsification/modification/test_modifying_opt.py index 411371b0bbf..7af36872500 100644 --- a/tests/sparseml/transformers/sparsification/modification/test_modifying_opt.py +++ b/tests/sparseml/transformers/sparsification/modification/test_modifying_opt.py @@ -24,7 +24,7 @@ def opt_recipe(): return """test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - BMMLeftInput_QK - BMMRightInput_QK diff --git a/tests/sparseml/transformers/test_recipe_compatibility.py b/tests/sparseml/transformers/test_recipe_compatibility.py index b0d303b1a4f..e0d7d2708ba 100644 --- a/tests/sparseml/transformers/test_recipe_compatibility.py +++ b/tests/sparseml/transformers/test_recipe_compatibility.py @@ -31,7 +31,7 @@ def model_path(tmp_path): def recipe(): return """test_stage: obcq_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm diff --git a/tests/sparseml/transformers/utils/test_initializers.py b/tests/sparseml/transformers/utils/test_initializers.py index 4a85e286d30..f00adb3dd09 100644 --- a/tests/sparseml/transformers/utils/test_initializers.py +++ b/tests/sparseml/transformers/utils/test_initializers.py @@ -34,7 +34,7 @@ def save_recipe_for_text_classification(source_path): recipe = """test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: post_oneshot_calibration: False scheme_overrides: Embedding: