diff --git a/.github/workflows/test-check.yaml b/.github/workflows/test-check.yaml index 35bdebf609d..582acfae351 100644 --- a/.github/workflows/test-check.yaml +++ b/.github/workflows/test-check.yaml @@ -164,6 +164,16 @@ jobs: run: pip3 install -U pip && pip3 install setuptools sparsezoo/ - name: "Clean sparsezoo directory" run: rm -r sparsezoo/ + - uses: actions/checkout@v2 + - uses: actions/checkout@v2 + with: + repository: "neuralmagic/compressed-tensors" + path: "compressed-tensors" + ref: ${{needs.test-setup.outputs.branch}} + - name: "⚙️ Install compressed-tensors dependencies" + run: pip3 install -U pip && pip3 install setuptools compressed-tensors/ + - name: "Clean compressed-tensors directory" + run: rm -r compressed-tensors/ - name: "⚙️ Install dependencies" run: pip3 install .[dev,torchvision,onnxruntime,transformers] - name: "🔬 Running pytorch tests" @@ -193,6 +203,16 @@ jobs: run: pip3 install -U pip && pip3 install setuptools sparsezoo/ - name: "Clean sparsezoo directory" run: rm -r sparsezoo/ + - uses: actions/checkout@v2 + - uses: actions/checkout@v2 + with: + repository: "neuralmagic/compressed-tensors" + path: "compressed-tensors" + ref: ${{needs.test-setup.outputs.branch}} + - name: "⚙️ Install compressed-tensors dependencies" + run: pip3 install -U pip && pip3 install setuptools compressed-tensors/ + - name: "Clean compressed-tensors directory" + run: rm -r compressed-tensors/ - name: "⚙️ Install dependencies" run: pip3 install .[dev,torchvision,onnxruntime,transformers] - name: "🔬 Running pytorch tests" diff --git a/src/sparseml/modifiers/quantization/gptq/pytorch.py b/src/sparseml/modifiers/quantization/gptq/pytorch.py index 4bc3a8ff953..6f1c9f40bbd 100644 --- a/src/sparseml/modifiers/quantization/gptq/pytorch.py +++ b/src/sparseml/modifiers/quantization/gptq/pytorch.py @@ -20,9 +20,9 @@ from sparseml.core.model import ModifiableModel from sparseml.core.state import State from sparseml.modifiers.quantization.gptq.base import GPTQModifier +from sparseml.modifiers.quantization.gptq.utils.gptq_wrapper import GPTQWrapper from sparseml.modifiers.utils.layer_compressor import LayerCompressor from sparseml.modifiers.utils.pytorch_helpers import run_calibration_forward -from src.sparseml.modifiers.quantization.gptq.utils.gptq_wrapper import GPTQWrapper __all__ = ["GPTQModifierPyTorch"] @@ -117,13 +117,7 @@ def initialize_compression( for idx, (name, layer) in enumerate(self.compressible_layers_.items()): _LOGGER.info(f"Preparing {name} for compression") - if isinstance(self.sparsity, Dict): - layer_sparsity = self.sparsity[name] - elif isinstance(self.sparsity, List): - layer_sparsity = self.sparsity[idx] - else: # float - layer_sparsity = self.sparsity - args = self._pruning_arguments(layer_sparsity) + args = self._pruning_arguments() comp_cls = self._compression_class() compressor = LayerCompressor(comp_cls, self.model, layer, idx, name, args) if not self.sequential_update: diff --git a/tests/sparseml/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py b/tests/sparseml/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py index b673c887c60..e52b6e2ef23 100644 --- a/tests/sparseml/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py +++ b/tests/sparseml/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py @@ -98,7 +98,9 @@ def test_create_default_quant_modifier(self): default_config_group_name ] self.assertEqual(should_be_default_quant_scheme.input_activations.num_bits, 8) - assert not should_be_default_quant_scheme.input_activations.symmetric + # input activations are symmetric by default in vLLMQuantizationModifier + assert should_be_default_quant_scheme.input_activations.symmetric + self.assertEqual(should_be_default_quant_scheme.weights.num_bits, 8) assert should_be_default_quant_scheme.weights.symmetric diff --git a/tests/sparseml/transformers/obcq/recipes/quant.yaml b/tests/sparseml/transformers/obcq/recipes/quant.yaml index 0de93074d63..756373fcf89 100644 --- a/tests/sparseml/transformers/obcq/recipes/quant.yaml +++ b/tests/sparseml/transformers/obcq/recipes/quant.yaml @@ -23,12 +23,10 @@ test_stage: weights: num_bits: 8 symmetric: False - SparseGPTModifier: - sparsity: 0.0 + GPTQModifier: block_size: 128 sequential_update: False percdamp: 0.01 - mask_structure: "0:0" targets: [ "model.layers.0", "model.layers.1", @@ -36,16 +34,4 @@ test_stage: "model.layers.3", "model.layers.4", "model.layers.5" - ] - GPTQModifier: - block_size: 128 - sequential_update: False - percdamp: 0.01 - targets: [ - "model.layers.0", - "model.layers.1", - "model.layers.2", - "model.layers.3", - "model.layers.4", - "model.layers.5" - ] \ No newline at end of file + ] \ No newline at end of file diff --git a/tests/sparseml/transformers/obcq/recipes/quant_and_sparse.yaml b/tests/sparseml/transformers/obcq/recipes/quant_and_sparse.yaml index 7af58d32815..b8c9f3451e0 100644 --- a/tests/sparseml/transformers/obcq/recipes/quant_and_sparse.yaml +++ b/tests/sparseml/transformers/obcq/recipes/quant_and_sparse.yaml @@ -24,12 +24,10 @@ test_stage: weights: num_bits: 8 symmetric: False - SparseGPTModifier: - sparsity: 0.5 + GPTQModifier: block_size: 128 sequential_update: False percdamp: 0.01 - mask_structure: "0:0" targets: [ "model.layers.0", "model.layers.1", @@ -38,10 +36,12 @@ test_stage: "model.layers.4", "model.layers.5" ] - GPTQModifier: + SparseGPTModifier: + sparsity: 0.5 block_size: 128 sequential_update: False percdamp: 0.01 + mask_structure: "0:0" targets: [ "model.layers.0", "model.layers.1", diff --git a/tests/sparseml/transformers/obcq/test_sgpt_defaults.py b/tests/sparseml/transformers/obcq/test_sgpt_defaults.py index 3612e91c69a..76a7183f76e 100644 --- a/tests/sparseml/transformers/obcq/test_sgpt_defaults.py +++ b/tests/sparseml/transformers/obcq/test_sgpt_defaults.py @@ -21,7 +21,7 @@ @pytest.mark.integration @requires_torch -class TestSGPTDefualts(unittest.TestCase): +class TestSGPTDefaults(unittest.TestCase): def test_sgpt_defaults(self): from sparseml.core.framework import Framework from sparseml.core.state import State @@ -31,18 +31,9 @@ def test_sgpt_defaults(self): sparsegpt_modifier_only_sparsity = SparseGPTModifier( framework=Framework.pytorch, **kwargs ) - assert not sparsegpt_modifier_only_sparsity.quantize self.assertEqual(sparsegpt_modifier_only_sparsity.block_size, 128) self.assertEqual(sparsegpt_modifier_only_sparsity.sparsity, 0.5) - kwargs = {"quantize": True} - sparsegpt_modifier_only_quant = SparseGPTModifier( - framework=Framework.pytorch, **kwargs - ) - assert sparsegpt_modifier_only_quant.quantize - self.assertEqual(sparsegpt_modifier_only_quant.block_size, 128) - self.assertEqual(sparsegpt_modifier_only_quant.sparsity, 0.0) - # fail if we don't pass a sparsity or enable quantization kwargs = {} sparsegpt_invalid = SparseGPTModifier(framework=Framework.pytorch, **kwargs)