From 1fcec3fb4e98bd8303164fb4b190d0b1acb19cb8 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Wed, 29 May 2024 11:08:49 -0400 Subject: [PATCH] Replace Quantization Modifier (#2307) * convert old modifier to legacy * redo folder structure * fixing imports * update import * fix imports --- src/sparseml/modifiers/quantization/__init__.py | 2 -- .../modifiers/quantization/gptq/base.py | 2 +- .../quantization}/__init__.py | 0 .../quantization}/base.py | 4 ++-- .../quantization}/pytorch.py | 6 +++--- .../modifiers/quantization_legacy/__init__.py | 17 +++++++++++++++++ .../base.py | 6 +++--- .../modification/__init__.py | 0 .../modification/modification_objects.py | 0 .../modification/modify_model.py | 4 +++- .../modification/registry.py | 0 .../pytorch.py | 12 ++++++------ .../utils/__init__.py | 0 .../utils/constants.py | 0 .../utils/fake_quant_wrapper.py | 0 .../utils/helpers.py | 2 +- .../utils/quantization_scheme.py | 4 +++- .../utils/quantize.py | 12 ++++++++---- .../sparsification/compressed_tensors_utils.py | 8 ++++---- .../modification/modifying_bert.py | 8 ++++++-- .../modification/modifying_distilbert.py | 8 ++++++-- .../modification/modifying_llama.py | 6 ++++-- .../modification/modifying_mistral.py | 6 ++++-- .../modification/modifying_mobilebert.py | 8 ++++++-- .../modification/modifying_opt.py | 6 ++++-- .../transformers/sparsification/sparse_model.py | 2 +- .../modification/test_modify_model.py | 6 ++++-- .../modifiers/quantization/test_base.py | 10 +++++----- .../modifiers/pruning/sparsegpt/test_pytorch.py | 16 +++++++++------- .../modifiers/quantization/test_pytorch.py | 12 +++++++----- .../compression/recipes/new_quant_channel.yaml | 2 +- .../compression/recipes/new_quant_full.yaml | 2 +- .../compression/recipes/new_quant_simple.yaml | 2 +- .../compression/recipes/new_quant_weight.yaml | 2 +- .../compression/recipes/old_quant_channel.yaml | 2 +- .../compression/recipes/old_quant_full.yaml | 2 +- .../compression/recipes/old_quant_weight.yaml | 2 +- .../finetune/test_quantization.yaml | 2 +- .../repeat_quants/tiny_llama_repeat_quant.yaml | 4 ++-- .../tiny_llama_separate_quant.yaml | 4 ++-- .../recipes/additional_sparsity_with_quant.yaml | 2 +- .../transformers/obcq/recipes/quant.yaml | 2 +- .../obcq/recipes/quant_and_sparse.yaml | 2 +- .../obcq/test_obcq_fake_quant_wrapper.py | 2 +- .../sparsification/modification/conftest.py | 2 +- .../modification/test_modifying_llama.py | 2 +- .../modification/test_modifying_mistral.py | 2 +- .../modification/test_modifying_opt.py | 2 +- .../transformers/test_recipe_compatibility.py | 2 +- .../transformers/utils/test_initializers.py | 2 +- 50 files changed, 129 insertions(+), 82 deletions(-) rename src/sparseml/modifiers/{quantization_vllm => quantization/quantization}/__init__.py (100%) rename src/sparseml/modifiers/{quantization_vllm => quantization/quantization}/base.py (97%) rename src/sparseml/modifiers/{quantization_vllm => quantization/quantization}/pytorch.py (96%) create mode 100644 src/sparseml/modifiers/quantization_legacy/__init__.py rename src/sparseml/modifiers/{quantization => quantization_legacy}/base.py (97%) rename src/sparseml/modifiers/{quantization => quantization_legacy}/modification/__init__.py (100%) rename src/sparseml/modifiers/{quantization => quantization_legacy}/modification/modification_objects.py (100%) rename src/sparseml/modifiers/{quantization => quantization_legacy}/modification/modify_model.py (96%) rename src/sparseml/modifiers/{quantization => quantization_legacy}/modification/registry.py (100%) rename src/sparseml/modifiers/{quantization => quantization_legacy}/pytorch.py (95%) rename src/sparseml/modifiers/{quantization => quantization_legacy}/utils/__init__.py (100%) rename src/sparseml/modifiers/{quantization => quantization_legacy}/utils/constants.py (100%) rename src/sparseml/modifiers/{quantization => quantization_legacy}/utils/fake_quant_wrapper.py (100%) rename src/sparseml/modifiers/{quantization => quantization_legacy}/utils/helpers.py (99%) rename src/sparseml/modifiers/{quantization => quantization_legacy}/utils/quantization_scheme.py (99%) rename src/sparseml/modifiers/{quantization => quantization_legacy}/utils/quantize.py (98%) diff --git a/src/sparseml/modifiers/quantization/__init__.py b/src/sparseml/modifiers/quantization/__init__.py index 9cdf715c135..ebdf28a6d5b 100644 --- a/src/sparseml/modifiers/quantization/__init__.py +++ b/src/sparseml/modifiers/quantization/__init__.py @@ -13,5 +13,3 @@ # limitations under the License. # flake8: noqa - -from .base import * diff --git a/src/sparseml/modifiers/quantization/gptq/base.py b/src/sparseml/modifiers/quantization/gptq/base.py index cb0023d1919..004fce2ee7a 100644 --- a/src/sparseml/modifiers/quantization/gptq/base.py +++ b/src/sparseml/modifiers/quantization/gptq/base.py @@ -194,7 +194,7 @@ def _build_quant_modifier(self, framework): ) quant_args["config_groups"] = {"config_group_0": default_quant_scheme} _LOGGER.info(f"Building quantization modifier with args: {quant_args}") - vllm_quant_config = {"vLLMQuantizationModifier": quant_args} + vllm_quant_config = {"QuantizationModifier": quant_args} self._build_quant_modifier_from_dict(vllm_quant_config, framework) def compressible_layers(self) -> Dict: diff --git a/src/sparseml/modifiers/quantization_vllm/__init__.py b/src/sparseml/modifiers/quantization/quantization/__init__.py similarity index 100% rename from src/sparseml/modifiers/quantization_vllm/__init__.py rename to src/sparseml/modifiers/quantization/quantization/__init__.py diff --git a/src/sparseml/modifiers/quantization_vllm/base.py b/src/sparseml/modifiers/quantization/quantization/base.py similarity index 97% rename from src/sparseml/modifiers/quantization_vllm/base.py rename to src/sparseml/modifiers/quantization/quantization/base.py index c8b2522ecee..e6af6485aa3 100644 --- a/src/sparseml/modifiers/quantization_vllm/base.py +++ b/src/sparseml/modifiers/quantization/quantization/base.py @@ -24,10 +24,10 @@ from sparseml.core import Event, Modifier -__all__ = ["vLLMQuantizationModifier"] +__all__ = ["QuantizationModifier"] -class vLLMQuantizationModifier(Modifier): +class QuantizationModifier(Modifier): """ Enables post training quantization (PTQ) and quantization aware training (QAT) for a given module or its submodules. After calibration (PTQ) or the start epoch (QAT), diff --git a/src/sparseml/modifiers/quantization_vllm/pytorch.py b/src/sparseml/modifiers/quantization/quantization/pytorch.py similarity index 96% rename from src/sparseml/modifiers/quantization_vllm/pytorch.py rename to src/sparseml/modifiers/quantization/quantization/pytorch.py index a6e7f179525..246fd3ce52a 100644 --- a/src/sparseml/modifiers/quantization_vllm/pytorch.py +++ b/src/sparseml/modifiers/quantization/quantization/pytorch.py @@ -23,16 +23,16 @@ set_module_for_calibration, ) from sparseml.core import Event, EventType, State -from sparseml.modifiers.quantization_vllm.base import vLLMQuantizationModifier +from sparseml.modifiers.quantization.quantization.base import QuantizationModifier from sparseml.modifiers.utils.pytorch_helpers import run_calibration_forward _LOGGER = logging.getLogger(__name__) -class vLLMQuantizationModifierPyTorch(vLLMQuantizationModifier): +class QuantizationModifierPyTorch(QuantizationModifier): """ - PyTorch specific implementation of vLLMQuantizationModifier + PyTorch specific implementation of QuantizationModifier Enables post training quantization (PTQ) and quantization aware training (QAT) for a given module or its submodules. After calibration (PTQ) or the start epoch (QAT), diff --git a/src/sparseml/modifiers/quantization_legacy/__init__.py b/src/sparseml/modifiers/quantization_legacy/__init__.py new file mode 100644 index 00000000000..9cdf715c135 --- /dev/null +++ b/src/sparseml/modifiers/quantization_legacy/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# flake8: noqa + +from .base import * diff --git a/src/sparseml/modifiers/quantization/base.py b/src/sparseml/modifiers/quantization_legacy/base.py similarity index 97% rename from src/sparseml/modifiers/quantization/base.py rename to src/sparseml/modifiers/quantization_legacy/base.py index e66f5b9ea72..9b9f1569f09 100644 --- a/src/sparseml/modifiers/quantization/base.py +++ b/src/sparseml/modifiers/quantization_legacy/base.py @@ -17,17 +17,17 @@ from sparseml.core import Event, Modifier -__all__ = ["QuantizationModifier"] +__all__ = ["LegacyQuantizationModifier"] -class QuantizationModifier(Modifier): +class LegacyQuantizationModifier(Modifier): """ Enables quantization aware training (QAT) for a given module or its submodules After the start epoch, the specified module(s) forward pass will emulate quantized execution and the modifier will be enabled until training is completed. | Sample yaml: - | QuantizationModifier: + | LegacyQuantizationModifier: | start: 0.0 | scheme: | input_activations: diff --git a/src/sparseml/modifiers/quantization/modification/__init__.py b/src/sparseml/modifiers/quantization_legacy/modification/__init__.py similarity index 100% rename from src/sparseml/modifiers/quantization/modification/__init__.py rename to src/sparseml/modifiers/quantization_legacy/modification/__init__.py diff --git a/src/sparseml/modifiers/quantization/modification/modification_objects.py b/src/sparseml/modifiers/quantization_legacy/modification/modification_objects.py similarity index 100% rename from src/sparseml/modifiers/quantization/modification/modification_objects.py rename to src/sparseml/modifiers/quantization_legacy/modification/modification_objects.py diff --git a/src/sparseml/modifiers/quantization/modification/modify_model.py b/src/sparseml/modifiers/quantization_legacy/modification/modify_model.py similarity index 96% rename from src/sparseml/modifiers/quantization/modification/modify_model.py rename to src/sparseml/modifiers/quantization_legacy/modification/modify_model.py index 1fee2d70c3c..97a1f1022da 100644 --- a/src/sparseml/modifiers/quantization/modification/modify_model.py +++ b/src/sparseml/modifiers/quantization_legacy/modification/modify_model.py @@ -15,7 +15,9 @@ import logging import os -from sparseml.modifiers.quantization.modification.registry import ModificationRegistry +from sparseml.modifiers.quantization_legacy.modification.registry import ( + ModificationRegistry, +) _LOGGER = logging.getLogger(__name__) diff --git a/src/sparseml/modifiers/quantization/modification/registry.py b/src/sparseml/modifiers/quantization_legacy/modification/registry.py similarity index 100% rename from src/sparseml/modifiers/quantization/modification/registry.py rename to src/sparseml/modifiers/quantization_legacy/modification/registry.py diff --git a/src/sparseml/modifiers/quantization/pytorch.py b/src/sparseml/modifiers/quantization_legacy/pytorch.py similarity index 95% rename from src/sparseml/modifiers/quantization/pytorch.py rename to src/sparseml/modifiers/quantization_legacy/pytorch.py index 927d8db79d3..34d8e9ac54e 100644 --- a/src/sparseml/modifiers/quantization/pytorch.py +++ b/src/sparseml/modifiers/quantization_legacy/pytorch.py @@ -19,18 +19,18 @@ from torch.nn import Module from sparseml.core import Event, EventType, State -from sparseml.modifiers.quantization.base import QuantizationModifier -from sparseml.modifiers.quantization.modification import modify_model -from sparseml.modifiers.quantization.utils.helpers import ( +from sparseml.modifiers.quantization_legacy.base import LegacyQuantizationModifier +from sparseml.modifiers.quantization_legacy.modification import modify_model +from sparseml.modifiers.quantization_legacy.utils.helpers import ( configure_module_bn_wrappers, freeze_bn_stats, fuse_module_conv_bn_relus, ) -from sparseml.modifiers.quantization.utils.quantization_scheme import ( +from sparseml.modifiers.quantization_legacy.utils.quantization_scheme import ( QuantizationScheme, QuantizationSchemeLoadable, ) -from sparseml.modifiers.quantization.utils.quantize import ( +from sparseml.modifiers.quantization_legacy.utils.quantize import ( convert_module_qat_from_schemes, raise_if_torch_quantization_not_available, set_quantization_schemes, @@ -42,7 +42,7 @@ _LOGGER = logging.getLogger(__name__) -class QuantizationModifierPyTorch(QuantizationModifier): +class LegacyQuantizationModifierPyTorch(LegacyQuantizationModifier): """ Pytorch-specific implementation of quantization modifier diff --git a/src/sparseml/modifiers/quantization/utils/__init__.py b/src/sparseml/modifiers/quantization_legacy/utils/__init__.py similarity index 100% rename from src/sparseml/modifiers/quantization/utils/__init__.py rename to src/sparseml/modifiers/quantization_legacy/utils/__init__.py diff --git a/src/sparseml/modifiers/quantization/utils/constants.py b/src/sparseml/modifiers/quantization_legacy/utils/constants.py similarity index 100% rename from src/sparseml/modifiers/quantization/utils/constants.py rename to src/sparseml/modifiers/quantization_legacy/utils/constants.py diff --git a/src/sparseml/modifiers/quantization/utils/fake_quant_wrapper.py b/src/sparseml/modifiers/quantization_legacy/utils/fake_quant_wrapper.py similarity index 100% rename from src/sparseml/modifiers/quantization/utils/fake_quant_wrapper.py rename to src/sparseml/modifiers/quantization_legacy/utils/fake_quant_wrapper.py diff --git a/src/sparseml/modifiers/quantization/utils/helpers.py b/src/sparseml/modifiers/quantization_legacy/utils/helpers.py similarity index 99% rename from src/sparseml/modifiers/quantization/utils/helpers.py rename to src/sparseml/modifiers/quantization_legacy/utils/helpers.py index 318769e22ad..dd93d46dcfb 100644 --- a/src/sparseml/modifiers/quantization/utils/helpers.py +++ b/src/sparseml/modifiers/quantization_legacy/utils/helpers.py @@ -26,7 +26,7 @@ from torch import quantization as torch_quantization from torch.nn import BatchNorm2d, Conv2d, Embedding, Module, ReLU -from sparseml.modifiers.quantization.utils.quantization_scheme import ( +from sparseml.modifiers.quantization_legacy.utils.quantization_scheme import ( QuantizationArgs, QuantizationScheme, get_observer, diff --git a/src/sparseml/modifiers/quantization/utils/quantization_scheme.py b/src/sparseml/modifiers/quantization_legacy/utils/quantization_scheme.py similarity index 99% rename from src/sparseml/modifiers/quantization/utils/quantization_scheme.py rename to src/sparseml/modifiers/quantization_legacy/utils/quantization_scheme.py index 29e64bf6477..f235cbfdf8c 100644 --- a/src/sparseml/modifiers/quantization/utils/quantization_scheme.py +++ b/src/sparseml/modifiers/quantization_legacy/utils/quantization_scheme.py @@ -30,7 +30,9 @@ except Exception: torch_quantization = None -from sparseml.modifiers.quantization.utils.fake_quant_wrapper import FakeQuantizeWrapper +from sparseml.modifiers.quantization_legacy.utils.fake_quant_wrapper import ( + FakeQuantizeWrapper, +) __all__ = [ diff --git a/src/sparseml/modifiers/quantization/utils/quantize.py b/src/sparseml/modifiers/quantization_legacy/utils/quantize.py similarity index 98% rename from src/sparseml/modifiers/quantization/utils/quantize.py rename to src/sparseml/modifiers/quantization_legacy/utils/quantize.py index 3b6d17cab65..038ae5cab92 100644 --- a/src/sparseml/modifiers/quantization/utils/quantize.py +++ b/src/sparseml/modifiers/quantization_legacy/utils/quantize.py @@ -22,17 +22,21 @@ from packaging import version from torch.nn import Identity, Module -from sparseml.modifiers.quantization.utils.constants import ( +from sparseml.modifiers.quantization_legacy.utils.constants import ( FUSED_MODULE_NAMES, NON_QUANTIZABLE_MODULE_NAMES, ) -from sparseml.modifiers.quantization.utils.fake_quant_wrapper import FakeQuantizeWrapper -from sparseml.modifiers.quantization.utils.helpers import ( +from sparseml.modifiers.quantization_legacy.utils.fake_quant_wrapper import ( + FakeQuantizeWrapper, +) +from sparseml.modifiers.quantization_legacy.utils.helpers import ( QATWrapper, configure_module_default_qconfigs, prepare_embeddings_qat, ) -from sparseml.modifiers.quantization.utils.quantization_scheme import QuantizationScheme +from sparseml.modifiers.quantization_legacy.utils.quantization_scheme import ( + QuantizationScheme, +) from sparseml.pytorch.utils import get_layer from sparseml.utils.fsdp.context import fix_fsdp_module_name diff --git a/src/sparseml/transformers/sparsification/compressed_tensors_utils.py b/src/sparseml/transformers/sparsification/compressed_tensors_utils.py index c62a1eb9bf9..0a7e914393c 100644 --- a/src/sparseml/transformers/sparsification/compressed_tensors_utils.py +++ b/src/sparseml/transformers/sparsification/compressed_tensors_utils.py @@ -83,10 +83,10 @@ def save_pretrained_wrapper( # check if we are in the old quantization framework if qat_active(model) and not is_model_quantized(model): _LOGGER.info( - "Compression for models quantized with QuantizationModifer is not " - "supported. Save will be run without compression and no sparsity " - "statistics will be calculated. To save a quantized model in a " - "compressed state please use vLLMQuantizationModifier instead." + "Compression for models quantized with LegacyQuantizationModifer " + "is not supported. Save will be run without compression and no " + "sparsity statistics will be calculated. To save a quantized model " + "in a compressed state please use QuantizationModifier instead." ) original_save_pretrained.__get__(model, model_class)( diff --git a/src/sparseml/transformers/sparsification/modification/modifying_bert.py b/src/sparseml/transformers/sparsification/modification/modifying_bert.py index b1c273999ba..fccb65ea885 100644 --- a/src/sparseml/transformers/sparsification/modification/modifying_bert.py +++ b/src/sparseml/transformers/sparsification/modification/modifying_bert.py @@ -25,8 +25,12 @@ from torch import nn from transformers.models.bert.modeling_bert import BertSelfAttention -from sparseml.modifiers.quantization.modification.modification_objects import QATMatMul -from sparseml.modifiers.quantization.modification.registry import ModificationRegistry +from sparseml.modifiers.quantization_legacy.modification.modification_objects import ( + QATMatMul, +) +from sparseml.modifiers.quantization_legacy.modification.registry import ( + ModificationRegistry, +) from sparseml.pytorch.utils.helpers import swap_modules from sparseml.transformers.sparsification.modification.base import ( check_transformers_version, diff --git a/src/sparseml/transformers/sparsification/modification/modifying_distilbert.py b/src/sparseml/transformers/sparsification/modification/modifying_distilbert.py index 2cc9915b900..d2bf92dd637 100644 --- a/src/sparseml/transformers/sparsification/modification/modifying_distilbert.py +++ b/src/sparseml/transformers/sparsification/modification/modifying_distilbert.py @@ -27,8 +27,12 @@ MultiHeadSelfAttention, ) -from sparseml.modifiers.quantization.modification.modification_objects import QATMatMul -from sparseml.modifiers.quantization.modification.registry import ModificationRegistry +from sparseml.modifiers.quantization_legacy.modification.modification_objects import ( + QATMatMul, +) +from sparseml.modifiers.quantization_legacy.modification.registry import ( + ModificationRegistry, +) from sparseml.pytorch.utils.helpers import swap_modules from sparseml.transformers.sparsification.modification.base import ( check_transformers_version, diff --git a/src/sparseml/transformers/sparsification/modification/modifying_llama.py b/src/sparseml/transformers/sparsification/modification/modifying_llama.py index d51827fc8f3..d7aea9ac1c6 100644 --- a/src/sparseml/transformers/sparsification/modification/modifying_llama.py +++ b/src/sparseml/transformers/sparsification/modification/modifying_llama.py @@ -32,11 +32,13 @@ repeat_kv, ) -from sparseml.modifiers.quantization.modification.modification_objects import ( +from sparseml.modifiers.quantization_legacy.modification.modification_objects import ( QuantizableIdentity, QuantizableMatMul, ) -from sparseml.modifiers.quantization.modification.registry import ModificationRegistry +from sparseml.modifiers.quantization_legacy.modification.registry import ( + ModificationRegistry, +) from sparseml.pytorch.utils.helpers import swap_modules from sparseml.transformers.sparsification.modification.base import ( check_transformers_version, diff --git a/src/sparseml/transformers/sparsification/modification/modifying_mistral.py b/src/sparseml/transformers/sparsification/modification/modifying_mistral.py index 1a03d635027..a27a75d5992 100644 --- a/src/sparseml/transformers/sparsification/modification/modifying_mistral.py +++ b/src/sparseml/transformers/sparsification/modification/modifying_mistral.py @@ -32,11 +32,13 @@ repeat_kv, ) -from sparseml.modifiers.quantization.modification.modification_objects import ( +from sparseml.modifiers.quantization_legacy.modification.modification_objects import ( QuantizableIdentity, QuantizableMatMul, ) -from sparseml.modifiers.quantization.modification.registry import ModificationRegistry +from sparseml.modifiers.quantization_legacy.modification.registry import ( + ModificationRegistry, +) from sparseml.pytorch.utils.helpers import swap_modules from sparseml.transformers.sparsification.modification.base import ( check_transformers_version, diff --git a/src/sparseml/transformers/sparsification/modification/modifying_mobilebert.py b/src/sparseml/transformers/sparsification/modification/modifying_mobilebert.py index 469ca36a736..2ab9d819fb5 100644 --- a/src/sparseml/transformers/sparsification/modification/modifying_mobilebert.py +++ b/src/sparseml/transformers/sparsification/modification/modifying_mobilebert.py @@ -20,8 +20,12 @@ from torch import nn from transformers.models.mobilebert.modeling_mobilebert import MobileBertEmbeddings -from sparseml.modifiers.quantization.modification.modification_objects import QATLinear -from sparseml.modifiers.quantization.modification.registry import ModificationRegistry +from sparseml.modifiers.quantization_legacy.modification.modification_objects import ( + QATLinear, +) +from sparseml.modifiers.quantization_legacy.modification.registry import ( + ModificationRegistry, +) from sparseml.pytorch.utils.helpers import swap_modules from sparseml.transformers.sparsification.modification.base import ( check_transformers_version, diff --git a/src/sparseml/transformers/sparsification/modification/modifying_opt.py b/src/sparseml/transformers/sparsification/modification/modifying_opt.py index 5f696ee36c7..eb42dd6d686 100644 --- a/src/sparseml/transformers/sparsification/modification/modifying_opt.py +++ b/src/sparseml/transformers/sparsification/modification/modifying_opt.py @@ -23,11 +23,13 @@ from torch import nn from transformers.models.opt.modeling_opt import OPTAttention, OptFlashAttention2 -from sparseml.modifiers.quantization.modification.modification_objects import ( +from sparseml.modifiers.quantization_legacy.modification.modification_objects import ( QuantizableBatchMatmul, QuantizableIdentity, ) -from sparseml.modifiers.quantization.modification.registry import ModificationRegistry +from sparseml.modifiers.quantization_legacy.modification.registry import ( + ModificationRegistry, +) from sparseml.pytorch.utils.helpers import swap_modules from sparseml.transformers.sparsification.modification.base import ( check_transformers_version, diff --git a/src/sparseml/transformers/sparsification/sparse_model.py b/src/sparseml/transformers/sparsification/sparse_model.py index 76e75862fff..3132411d332 100644 --- a/src/sparseml/transformers/sparsification/sparse_model.py +++ b/src/sparseml/transformers/sparsification/sparse_model.py @@ -31,7 +31,7 @@ from transformers.file_utils import WEIGHTS_NAME from compressed_tensors.compressors import ModelCompressor -from sparseml.modifiers.quantization.modification import modify_model +from sparseml.modifiers.quantization_legacy.modification import modify_model from sparseml.pytorch.model_load.helpers import ( apply_recipe_structure_to_model, log_model_load, diff --git a/tests/sparseml/modifiers/quantization/modification/test_modify_model.py b/tests/sparseml/modifiers/quantization/modification/test_modify_model.py index 2bde19a5757..4ad1cb6580b 100644 --- a/tests/sparseml/modifiers/quantization/modification/test_modify_model.py +++ b/tests/sparseml/modifiers/quantization/modification/test_modify_model.py @@ -17,8 +17,10 @@ import pytest -from sparseml.modifiers.quantization.modification import modify_model -from sparseml.modifiers.quantization.modification.registry import ModificationRegistry +from sparseml.modifiers.quantization_legacy.modification import modify_model +from sparseml.modifiers.quantization_legacy.modification.registry import ( + ModificationRegistry, +) from sparsezoo.utils.registry import _ALIAS_REGISTRY, _REGISTRY, standardize_lookup_name diff --git a/tests/sparseml/modifiers/quantization/test_base.py b/tests/sparseml/modifiers/quantization/test_base.py index 064d8dcb671..491f03c1866 100644 --- a/tests/sparseml/modifiers/quantization/test_base.py +++ b/tests/sparseml/modifiers/quantization/test_base.py @@ -19,7 +19,7 @@ from sparseml.core.event import Event from sparseml.core.factory import ModifierFactory from sparseml.core.framework import Framework -from sparseml.modifiers.quantization import QuantizationModifier +from sparseml.modifiers.quantization_legacy import LegacyQuantizationModifier from tests.sparseml.modifiers.conf import setup_modifier_factory @@ -31,14 +31,14 @@ def setUp(self): def test_quantization_registered(self): quant_obj = ModifierFactory.create( - type_="QuantizationModifier", + type_="LegacyQuantizationModifier", framework=Framework.general, allow_experimental=False, allow_registered=True, **self.kwargs, ) - self.assertIsInstance(quant_obj, QuantizationModifier) + self.assertIsInstance(quant_obj, LegacyQuantizationModifier) @pytest.mark.unit @@ -52,7 +52,7 @@ def setUp(self): def test_end_epochs(self): disable_quant_epoch, freeze_bn_epoch = None, None - obj_modifier = QuantizationModifier( + obj_modifier = LegacyQuantizationModifier( start=self.start, scheme=self.scheme, disable_quantization_observer_epoch=disable_quant_epoch, @@ -68,7 +68,7 @@ def test_end_epochs(self): assert not obj_modifier.check_should_freeze_bn_stats(event) disable_quant_epoch, freeze_bn_epoch = 3.5, 5.0 - obj_modifier = QuantizationModifier( + obj_modifier = LegacyQuantizationModifier( start=self.start, scheme=self.scheme, disable_quantization_observer_epoch=disable_quant_epoch, diff --git a/tests/sparseml/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py b/tests/sparseml/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py index e52b6e2ef23..0fcb66eee9c 100644 --- a/tests/sparseml/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py +++ b/tests/sparseml/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py @@ -21,8 +21,10 @@ from sparseml.core.model import ModifiableModel from sparseml.modifiers.obcq.pytorch import SparseGPTModifierPyTorch from sparseml.modifiers.quantization.gptq.pytorch import GPTQModifierPyTorch -from sparseml.modifiers.quantization.pytorch import QuantizationModifierPyTorch -from sparseml.modifiers.quantization_vllm.base import vLLMQuantizationModifier +from sparseml.modifiers.quantization.quantization.base import QuantizationModifier +from sparseml.modifiers.quantization_legacy.pytorch import ( + LegacyQuantizationModifierPyTorch, +) from tests.sparseml.modifiers.conf import LifecyleTestingHarness, setup_modifier_factory from tests.sparseml.pytorch.helpers import LinearNet from tests.testing_utils import requires_torch @@ -92,13 +94,13 @@ def test_create_default_quant_modifier(self): testing_harness = LifecyleTestingHarness(model=LinearNet()) modifier.on_initialize_structure(testing_harness.get_state()) assert modifier.quantize - assert isinstance(modifier.quantization_modifier_, vLLMQuantizationModifier) + assert isinstance(modifier.quantization_modifier_, QuantizationModifier) default_config_group_name = "config_group_0" should_be_default_quant_scheme = modifier.quantization_modifier_.config_groups[ default_config_group_name ] self.assertEqual(should_be_default_quant_scheme.input_activations.num_bits, 8) - # input activations are symmetric by default in vLLMQuantizationModifier + # input activations are symmetric by default in QuantizationModifier assert should_be_default_quant_scheme.input_activations.symmetric self.assertEqual(should_be_default_quant_scheme.weights.num_bits, 8) @@ -120,7 +122,7 @@ def test_set_quant_if_modifer_already_exists(self): ), ) - modifier = QuantizationModifierPyTorch(**kwargs) + modifier = LegacyQuantizationModifierPyTorch(**kwargs) testing_harness = LifecyleTestingHarness(model=model, start=-1) assert not testing_harness.get_state().model.qat_active() @@ -159,7 +161,7 @@ def setUp(self): } } } - self.quant_config = {"vLLMQuantizationModifier": self.quant_kwargs} + self.quant_config = {"QuantizationModifier": self.quant_kwargs} def test_set_quant_in_gptq(self): kwargs = dict(block_size=128, quantize=self.quant_config) @@ -170,7 +172,7 @@ def test_set_quant_in_gptq(self): testing_harness = LifecyleTestingHarness(model=LinearNet()) modifier.on_initialize_structure(testing_harness.get_state()) assert modifier.quantize - self.assertIsInstance(modifier.quantization_modifier_, vLLMQuantizationModifier) + self.assertIsInstance(modifier.quantization_modifier_, QuantizationModifier) dict_scheme = dict(modifier.quantization_modifier_.config_groups) self._check_config( diff --git a/tests/sparseml/pytorch/modifiers/quantization/test_pytorch.py b/tests/sparseml/pytorch/modifiers/quantization/test_pytorch.py index 6b258b884cb..2e9750c60c7 100644 --- a/tests/sparseml/pytorch/modifiers/quantization/test_pytorch.py +++ b/tests/sparseml/pytorch/modifiers/quantization/test_pytorch.py @@ -21,7 +21,9 @@ from sparseml.core.event import Event, EventType from sparseml.core.factory import ModifierFactory from sparseml.core.framework import Framework -from sparseml.modifiers.quantization.pytorch import QuantizationModifierPyTorch +from sparseml.modifiers.quantization_legacy.pytorch import ( + LegacyQuantizationModifierPyTorch, +) from sparseml.pytorch.sparsification.quantization.quantize import ( is_qat_helper_module, is_quantizable_module, @@ -45,14 +47,14 @@ def setUp(self): def test_quantization_registered(self): quant_obj = ModifierFactory.create( - type_="QuantizationModifier", + type_="LegacyQuantizationModifier", framework=Framework.pytorch, allow_experimental=False, allow_registered=True, **self.kwargs, ) - self.assertIsInstance(quant_obj, QuantizationModifierPyTorch) + self.assertIsInstance(quant_obj, LegacyQuantizationModifierPyTorch) @pytest.mark.unit @@ -71,7 +73,7 @@ def test_quantization_oneshot(self, model_class): state = State(framework=Framework.pytorch, start_event=Event()) state.update(model=model, start=-1) - modifier = QuantizationModifierPyTorch(**self.kwargs) + modifier = LegacyQuantizationModifierPyTorch(**self.kwargs) modifier.initialize(state) @@ -108,7 +110,7 @@ def setUp(self): def test_quantization_training(self, model_class): model = model_class() - modifier = QuantizationModifierPyTorch(**self.kwargs) + modifier = LegacyQuantizationModifierPyTorch(**self.kwargs) testing_harness = LifecyleTestingHarness(model=model) modifier.initialize(testing_harness.get_state()) diff --git a/tests/sparseml/transformers/compression/recipes/new_quant_channel.yaml b/tests/sparseml/transformers/compression/recipes/new_quant_channel.yaml index 48df197537c..2fa7af9d567 100644 --- a/tests/sparseml/transformers/compression/recipes/new_quant_channel.yaml +++ b/tests/sparseml/transformers/compression/recipes/new_quant_channel.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - vLLMQuantizationModifier: + QuantizationModifier: ignore: ["lm_head", "model.layers.0.mlp.down_proj"] config_groups: group_0: diff --git a/tests/sparseml/transformers/compression/recipes/new_quant_full.yaml b/tests/sparseml/transformers/compression/recipes/new_quant_full.yaml index 924dcd6e3f6..931f4e80ca5 100644 --- a/tests/sparseml/transformers/compression/recipes/new_quant_full.yaml +++ b/tests/sparseml/transformers/compression/recipes/new_quant_full.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - vLLMQuantizationModifier: + QuantizationModifier: ignore: ["lm_head", "model.layers.0.mlp.down_proj"] config_groups: group_0: diff --git a/tests/sparseml/transformers/compression/recipes/new_quant_simple.yaml b/tests/sparseml/transformers/compression/recipes/new_quant_simple.yaml index 753605fc1dd..b0c7051425d 100644 --- a/tests/sparseml/transformers/compression/recipes/new_quant_simple.yaml +++ b/tests/sparseml/transformers/compression/recipes/new_quant_simple.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - vLLMQuantizationModifier: + QuantizationModifier: ignore: ["lm_head"] config_groups: group_0: diff --git a/tests/sparseml/transformers/compression/recipes/new_quant_weight.yaml b/tests/sparseml/transformers/compression/recipes/new_quant_weight.yaml index 19b9d196e6a..34e0a77e052 100644 --- a/tests/sparseml/transformers/compression/recipes/new_quant_weight.yaml +++ b/tests/sparseml/transformers/compression/recipes/new_quant_weight.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - vLLMQuantizationModifier: + QuantizationModifier: ignore: ["lm_head", "model.layers.0.mlp.down_proj"] config_groups: group_0: diff --git a/tests/sparseml/transformers/compression/recipes/old_quant_channel.yaml b/tests/sparseml/transformers/compression/recipes/old_quant_channel.yaml index 350d07ce1c2..7d090943915 100644 --- a/tests/sparseml/transformers/compression/recipes/old_quant_channel.yaml +++ b/tests/sparseml/transformers/compression/recipes/old_quant_channel.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - model.layers.0.mlp.down_proj - lm_head diff --git a/tests/sparseml/transformers/compression/recipes/old_quant_full.yaml b/tests/sparseml/transformers/compression/recipes/old_quant_full.yaml index 9d67e334fef..2540787d8dd 100644 --- a/tests/sparseml/transformers/compression/recipes/old_quant_full.yaml +++ b/tests/sparseml/transformers/compression/recipes/old_quant_full.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - model.layers.0.mlp.down_proj - lm_head diff --git a/tests/sparseml/transformers/compression/recipes/old_quant_weight.yaml b/tests/sparseml/transformers/compression/recipes/old_quant_weight.yaml index 78e49595fe2..adab8340c2e 100644 --- a/tests/sparseml/transformers/compression/recipes/old_quant_weight.yaml +++ b/tests/sparseml/transformers/compression/recipes/old_quant_weight.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - model.layers.0.mlp.down_proj - lm_head diff --git a/tests/sparseml/transformers/finetune/test_quantization.yaml b/tests/sparseml/transformers/finetune/test_quantization.yaml index 89381c31006..eb2d4afdc39 100644 --- a/tests/sparseml/transformers/finetune/test_quantization.yaml +++ b/tests/sparseml/transformers/finetune/test_quantization.yaml @@ -1,6 +1,6 @@ test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm diff --git a/tests/sparseml/transformers/obcq/obcq_configs/repeat_quants/tiny_llama_repeat_quant.yaml b/tests/sparseml/transformers/obcq/obcq_configs/repeat_quants/tiny_llama_repeat_quant.yaml index 5bef2cae22d..a91b7b4d56a 100644 --- a/tests/sparseml/transformers/obcq/obcq_configs/repeat_quants/tiny_llama_repeat_quant.yaml +++ b/tests/sparseml/transformers/obcq/obcq_configs/repeat_quants/tiny_llama_repeat_quant.yaml @@ -5,7 +5,7 @@ dataset: open_platypus first_recipe: | first_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm @@ -17,7 +17,7 @@ first_recipe: | second_recipe: | second_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm diff --git a/tests/sparseml/transformers/obcq/obcq_configs/separate_quants/tiny_llama_separate_quant.yaml b/tests/sparseml/transformers/obcq/obcq_configs/separate_quants/tiny_llama_separate_quant.yaml index 1b7cab983f4..64a43cbd943 100644 --- a/tests/sparseml/transformers/obcq/obcq_configs/separate_quants/tiny_llama_separate_quant.yaml +++ b/tests/sparseml/transformers/obcq/obcq_configs/separate_quants/tiny_llama_separate_quant.yaml @@ -5,7 +5,7 @@ dataset: open_platypus first_recipe: | first_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm @@ -17,7 +17,7 @@ first_recipe: | second_recipe: | second_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm diff --git a/tests/sparseml/transformers/obcq/recipes/additional_sparsity_with_quant.yaml b/tests/sparseml/transformers/obcq/recipes/additional_sparsity_with_quant.yaml index 42538955b5e..72ca3c08fc7 100644 --- a/tests/sparseml/transformers/obcq/recipes/additional_sparsity_with_quant.yaml +++ b/tests/sparseml/transformers/obcq/recipes/additional_sparsity_with_quant.yaml @@ -6,7 +6,7 @@ test_stage: [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"], [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"] ] - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm diff --git a/tests/sparseml/transformers/obcq/recipes/quant.yaml b/tests/sparseml/transformers/obcq/recipes/quant.yaml index 756373fcf89..f5436b3873f 100644 --- a/tests/sparseml/transformers/obcq/recipes/quant.yaml +++ b/tests/sparseml/transformers/obcq/recipes/quant.yaml @@ -6,7 +6,7 @@ test_stage: [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"], [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"] ] - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm diff --git a/tests/sparseml/transformers/obcq/recipes/quant_and_sparse.yaml b/tests/sparseml/transformers/obcq/recipes/quant_and_sparse.yaml index b8c9f3451e0..198b32f0e3c 100644 --- a/tests/sparseml/transformers/obcq/recipes/quant_and_sparse.yaml +++ b/tests/sparseml/transformers/obcq/recipes/quant_and_sparse.yaml @@ -6,7 +6,7 @@ test_stage: [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"], [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"] ] - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm diff --git a/tests/sparseml/transformers/obcq/test_obcq_fake_quant_wrapper.py b/tests/sparseml/transformers/obcq/test_obcq_fake_quant_wrapper.py index 6fafab075b7..ea677db787f 100644 --- a/tests/sparseml/transformers/obcq/test_obcq_fake_quant_wrapper.py +++ b/tests/sparseml/transformers/obcq/test_obcq_fake_quant_wrapper.py @@ -37,7 +37,7 @@ def setUp(self): self.recipe = """ first_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - Embedding scheme_overrides: diff --git a/tests/sparseml/transformers/sparsification/modification/conftest.py b/tests/sparseml/transformers/sparsification/modification/conftest.py index d6a9fd1c0ad..9c19cc702c2 100644 --- a/tests/sparseml/transformers/sparsification/modification/conftest.py +++ b/tests/sparseml/transformers/sparsification/modification/conftest.py @@ -18,7 +18,7 @@ from transformers import AutoConfig, AutoModel from accelerate import init_empty_weights -from sparseml.modifiers.quantization.modification import modify_model +from sparseml.modifiers.quantization_legacy.modification import modify_model from sparseml.pytorch.model_load.helpers import apply_recipe_structure_to_model from sparseml.transformers import SparseAutoConfig, SparseAutoModelForCausalLM diff --git a/tests/sparseml/transformers/sparsification/modification/test_modifying_llama.py b/tests/sparseml/transformers/sparsification/modification/test_modifying_llama.py index 9091d28b29e..d1532378c27 100644 --- a/tests/sparseml/transformers/sparsification/modification/test_modifying_llama.py +++ b/tests/sparseml/transformers/sparsification/modification/test_modifying_llama.py @@ -23,7 +23,7 @@ def llama_recipe(): return """test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - MatMulRightInput_QK - MatMulLeftInput_QK diff --git a/tests/sparseml/transformers/sparsification/modification/test_modifying_mistral.py b/tests/sparseml/transformers/sparsification/modification/test_modifying_mistral.py index e71364a53e7..f47fafe0749 100644 --- a/tests/sparseml/transformers/sparsification/modification/test_modifying_mistral.py +++ b/tests/sparseml/transformers/sparsification/modification/test_modifying_mistral.py @@ -23,7 +23,7 @@ def mistral_recipe(): return """test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - MatMulRightInput_QK - MatMulLeftInput_QK diff --git a/tests/sparseml/transformers/sparsification/modification/test_modifying_opt.py b/tests/sparseml/transformers/sparsification/modification/test_modifying_opt.py index 411371b0bbf..7af36872500 100644 --- a/tests/sparseml/transformers/sparsification/modification/test_modifying_opt.py +++ b/tests/sparseml/transformers/sparsification/modification/test_modifying_opt.py @@ -24,7 +24,7 @@ def opt_recipe(): return """test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - BMMLeftInput_QK - BMMRightInput_QK diff --git a/tests/sparseml/transformers/test_recipe_compatibility.py b/tests/sparseml/transformers/test_recipe_compatibility.py index b0d303b1a4f..e0d7d2708ba 100644 --- a/tests/sparseml/transformers/test_recipe_compatibility.py +++ b/tests/sparseml/transformers/test_recipe_compatibility.py @@ -31,7 +31,7 @@ def model_path(tmp_path): def recipe(): return """test_stage: obcq_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm diff --git a/tests/sparseml/transformers/utils/test_initializers.py b/tests/sparseml/transformers/utils/test_initializers.py index 4a85e286d30..f00adb3dd09 100644 --- a/tests/sparseml/transformers/utils/test_initializers.py +++ b/tests/sparseml/transformers/utils/test_initializers.py @@ -34,7 +34,7 @@ def save_recipe_for_text_classification(source_path): recipe = """test_stage: quant_modifiers: - QuantizationModifier: + LegacyQuantizationModifier: post_oneshot_calibration: False scheme_overrides: Embedding: