Skip to content

Commit

Permalink
[Experimental][TorchFX] quantize_pt2e + X86Quantizer introduction (#3121
Browse files Browse the repository at this point in the history
)

### Changes

Introduction of `quantize_pt2e` method

### Reason for changes



### Related tickets

#2766 

### Tests
graph tests: `tests/torch/fx/test_quantizer.py`
  • Loading branch information
daniil-lyakhov authored Jan 21, 2025
1 parent 0b80812 commit d1b5229
Show file tree
Hide file tree
Showing 20 changed files with 10,263 additions and 65 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Copyright (c) 2025 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
106 changes: 106 additions & 0 deletions nncf/experimental/quantization/algorithms/post_training/algorithm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Copyright (c) 2025 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import itertools
from typing import Callable, List, Optional, TypeVar

from nncf import Dataset
from nncf.common.graph.graph import NNCFGraph
from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer
from nncf.common.utils.backend import BackendType
from nncf.experimental.quantization.algorithms.post_training.pipeline import experimental_create_ptq_pipeline
from nncf.experimental.quantization.quantizers.quantizer import Quantizer
from nncf.quantization.advanced_parameters import AdvancedBiasCorrectionParameters
from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters
from nncf.quantization.advanced_parameters import RangeEstimatorParameters
from nncf.quantization.algorithms.algorithm import Algorithm

TModel = TypeVar("TModel")
TPass = Callable[[TModel], TModel]


class ExperimentalPostTrainingQuantization(Algorithm):
"""
Implements Experimental Post-Training Quantization algorithm, which basically includes:
1) ChannelAlignment
2) MinMaxRangeInit
3) FastBiasCorrection or BiasCorrection
"""

def __init__(
self,
quantizer: Quantizer,
subset_size: int = 300,
fast_bias_correction: Optional[bool] = True,
smooth_quant: bool = False,
bias_correction_params: Optional[AdvancedBiasCorrectionParameters] = None,
smooth_quant_params: Optional[AdvancedSmoothQuantParameters] = None,
activations_range_estimator_params: Optional[RangeEstimatorParameters] = None,
weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
batchwise_statistics: bool = False,
):
"""
:param quantizer: Quantizer to use in MiMaxRangeInit algorithm.
:param subset_size: Size of a subset to calculate activations
statistics used for quantization.
:param fast_bias_correction: Setting this option to `False` enables a different
bias correction method which is more accurate, in general, and takes
more time but requires less memory. None disables the bias correction algorithm.
:param smooth_quant: Setting this option to `True` enables the SmoothQuant algorithm.
:param bias_correction_params: Contains advanced parameters for fine-tuning bias correction algorithm.
:param smooth_quant_params: Contains advanced alpha parameters for SmoothQuant algorithm.
:param activations_range_estimator_params: Contains parameters for estimating the range
of activations of the model.
:param weights_range_estimator_params: Contains parameters for estimating the range
of weights of the model.
:param batchwise_statistics: Determines whether quantizer statistics should be calculated
for each item of the batch or for the entire batch, default is False.
"""
self._pipeline = experimental_create_ptq_pipeline(
quantizer=quantizer,
subset_size=subset_size,
fast_bias_correction=fast_bias_correction,
smooth_quant=smooth_quant,
bias_correction_params=bias_correction_params,
smooth_quant_params=smooth_quant_params,
activations_range_estimator_params=activations_range_estimator_params,
weights_range_estimator_params=weights_range_estimator_params,
batchwise_statistics=batchwise_statistics,
)

@property
def available_backends(self) -> List[BackendType]:
backends = set(BackendType)
for algorithm in itertools.chain.from_iterable(self._pipeline.pipeline_steps):
backends = backends.intersection(algorithm.available_backends)
return list(backends)

def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer:
return self._pipeline.get_statistic_points_for_step(0, model, graph)

def apply(
self,
model: TModel,
graph: NNCFGraph,
statistic_points: Optional[StatisticPointsContainer] = None,
dataset: Optional[Dataset] = None,
) -> TModel:
if dataset is None and len(self._pipeline.pipeline_steps) > 1:
raise ValueError(
"A dataset is required for the post-training quantization "
"algorithm to collect statistics for intermediate models."
)

step_index_to_statistics = None
if statistic_points:
step_index_to_statistics = {0: statistic_points}

return self._pipeline.run_from_step(model, dataset, graph, 0, step_index_to_statistics)
117 changes: 117 additions & 0 deletions nncf/experimental/quantization/algorithms/post_training/pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Copyright (c) 2025 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional, TypeVar

from nncf.experimental.quantization.algorithms.range_estimator.algorithm import MinMaxRangeEstimator
from nncf.experimental.quantization.quantizers.quantizer import Quantizer
from nncf.quantization.advanced_parameters import AdvancedBiasCorrectionParameters
from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters
from nncf.quantization.advanced_parameters import RangeEstimatorParameters
from nncf.quantization.algorithms.bias_correction.algorithm import BIAS_CORRECTION_THRESHOLD
from nncf.quantization.algorithms.bias_correction.algorithm import BiasCorrection
from nncf.quantization.algorithms.fast_bias_correction.algorithm import FAST_BIAS_CORRECTION_THRESHOLD
from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection
from nncf.quantization.algorithms.pipeline import Pipeline
from nncf.quantization.algorithms.smooth_quant.algorithm import SmoothQuant

TModel = TypeVar("TModel")


def experimental_create_ptq_pipeline(
quantizer: Quantizer,
subset_size: int = 300,
fast_bias_correction: Optional[bool] = True,
smooth_quant: bool = False,
bias_correction_params: Optional[AdvancedBiasCorrectionParameters] = None,
smooth_quant_params: Optional[AdvancedSmoothQuantParameters] = None,
activations_range_estimator_params: Optional[RangeEstimatorParameters] = None,
weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
batchwise_statistics: bool = False,
) -> Pipeline:
"""
Creates an experimental post-training quantization pipeline.
The experimental post-training quantization pipeline includes the following steps:
1) SmoothQuant
2) MinMaxRangeInit
3) FastBiasCorrection or BiasCorrection
:param quantizer: Quantizer to use in MiMaxRangeInit algorithm.
:param subset_size: Size of a subset to calculate activations
statistics used for quantization.
:param fast_bias_correction: Setting this option to `False` enables a different
bias correction method which is more accurate, in general, and takes
more time but requires less memory. None disables the bias correction algorithm.
:param smooth_quant: Setting this option to `True` enables the SmoothQuant algorithm.
:param bias_correction_params: Contains advanced parameters for fine-tuning bias correction algorithm.
:param smooth_quant_params: Contains advanced alpha parameters for SmoothQuant algorithm.
:param activations_range_estimator_params: Contains parameters for estimating the range
of activations of the model.
:param weights_range_estimator_params: Contains parameters for estimating the range
of weights of the model.
:param batchwise_statistics: Determines whether quantizer statistics should be calculated
for each item of the batch or for the entire batch, default is False.
:return: An experimental post-training quantization pipeline.
"""

# Build the post-training quantization pipeline.
pipeline_steps = []

if smooth_quant_params is None:
smooth_quant_params = AdvancedSmoothQuantParameters()

if smooth_quant and (smooth_quant_params.convolution >= 0 or smooth_quant_params.matmul >= 0):
alpha_map = {"convolution": smooth_quant_params.convolution, "matmul": smooth_quant_params.matmul}
pipeline_steps.append([SmoothQuant(subset_size, False, alpha_map=alpha_map)])

# Add the `MinMaxQuantization` algorithm as the third step of the pipeline.
pipeline_steps.append(
[
MinMaxRangeEstimator(
quantizer=quantizer,
subset_size=subset_size,
inplace_statistics=False,
batchwise_statistics=batchwise_statistics,
activations_range_estimator_params=activations_range_estimator_params,
weights_range_estimator_params=weights_range_estimator_params,
)
]
)

if fast_bias_correction is not None:
# Add the `FastBiasCorrection` or `BiasCorrection` as additional algorithm
# inside the third step of the pipeline. It is added after `MinMaxQuantization`
# algorithm.
if fast_bias_correction:
threshold = FAST_BIAS_CORRECTION_THRESHOLD
bias_correction_subset_size = subset_size
bias_correction_cls = FastBiasCorrection
else:
threshold = BIAS_CORRECTION_THRESHOLD
bias_correction_subset_size = max(int(subset_size * 0.2), 1)
bias_correction_cls = BiasCorrection

if bias_correction_params is None:
bias_correction_params = AdvancedBiasCorrectionParameters()

if bias_correction_params.threshold is not None:
threshold = bias_correction_params.threshold

pipeline_steps[-1].append(
bias_correction_cls(
bias_correction_subset_size,
threshold,
bias_correction_params.apply_for_all_nodes,
)
)

return Pipeline(pipeline_steps)
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Copyright (c) 2025 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Optional, TypeVar

from nncf import Dataset
from nncf.common.graph.graph import NNCFGraph
from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer
from nncf.common.utils.backend import BackendType
from nncf.experimental.quantization.quantizers.quantizer import Quantizer
from nncf.quantization.algorithms.algorithm import Algorithm
from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
from nncf.quantization.range_estimator import RangeEstimatorParameters

TModel = TypeVar("TModel")


class MinMaxRangeEstimator(Algorithm):
def __init__(
self,
quantizer: Quantizer,
subset_size: int = 300,
inplace_statistics: bool = True,
batchwise_statistics: bool = False,
activations_range_estimator_params: Optional[RangeEstimatorParameters] = None,
weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
):
"""
:param quantizer: Instance of Quantizer to retrieve a quantization config
for the given model.
:param subset_size: Size of a subset to calculate activations statistics used
for quantization, defaults to 300.
:param inplace_statistics: Defines wheather to calculate quantizers statistics
by backend graph operations or by default Python implementation, defaults
to True.
:param batchwise_statistics: Determines whether quantizer statistics should be calculated
for each item of the batch or for the entire batch, default is False.
:param activations_range_estimator_params: Quantization range estimation
parameters for activation.
:param weights_range_estimator_params: Quantization range estimation parameters
for weights.
"""
self._quantizer = quantizer
self._min_max_algo = MinMaxQuantization(
subset_size=subset_size,
inplace_statistics=inplace_statistics,
batchwise_statistics=batchwise_statistics,
activations_range_estimator_params=activations_range_estimator_params,
weights_range_estimator_params=weights_range_estimator_params,
)

@property
def available_backends(self) -> List[BackendType]:
return [BackendType.TORCH_FX]

def apply(
self,
model: TModel,
graph: NNCFGraph,
statistic_points: Optional[StatisticPointsContainer] = None,
dataset: Optional[Dataset] = None,
) -> TModel:
if self._min_max_algo._quantization_target_points_to_qconfig is None:
raise RuntimeError(
"Statistic points are not available."
" Please call `get_statistic_points` before calling the `apply` method."
)
return self._min_max_algo.apply(model=model, graph=graph, statistic_points=statistic_points)

def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer:
quantizer_setup = self._quantizer.get_quantization_setup(model, graph)
self._min_max_algo._set_backend_entity(model)
self._min_max_algo._init_cache()
self._min_max_algo.fill_quantization_target_points(quantizer_setup, graph)
return self._min_max_algo.get_cached_statistic_points(model, graph)
10 changes: 10 additions & 0 deletions nncf/experimental/quantization/quantizers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Copyright (c) 2025 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
45 changes: 45 additions & 0 deletions nncf/experimental/quantization/quantizers/quantizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) 2025 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from abc import ABC
from abc import abstractmethod
from typing import TypeVar

from nncf.common.graph.graph import NNCFGraph
from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup

TModel = TypeVar("TModel")


class Quantizer(ABC):
"""
Quantizer is an interface for the RangeEstimator algorithm
which specifies all the required methods to retrieve quantization setup from the given model.
"""

@abstractmethod
def transform_prior_quantization(self, model: TModel) -> TModel:
"""
Transforms the given model in-place with the necessary modifications required prior to quantization.
:param model: Backend-specific model to be transformed.
:return: Transformed backend-specific model.
"""

@abstractmethod
def get_quantization_setup(self, model: TModel, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup:
"""
Builds SingleConfigQuantizerSetup for the given model.
:param model: Backend-specific model, for which Quantization Target Points are being seek.
:param nncf_graph: NNCFGraph instance.
:return: SingleConfigQuantizerSetup for the given model.
"""
Loading

0 comments on commit d1b5229

Please sign in to comment.