From dbb0094fd0cb936469e35320bf37e866ef7a1da4 Mon Sep 17 00:00:00 2001 From: DawerG Date: Wed, 1 Nov 2023 19:12:19 +0530 Subject: [PATCH] 7.1 release (#2027) CI: https://gitlab.com/coremltools1/coremltools/-/pipelines/1056684725 --- CMakeLists.txt | 12 + coremltools/_deps/__init__.py | 21 +- coremltools/converters/_converters_entry.py | 173 ++++-- .../converters/mil/backend/mil/load.py | 50 +- coremltools/converters/mil/converter.py | 7 +- .../converters/mil/frontend/milproto/load.py | 40 +- .../mil/frontend/tensorflow/test/test_ops.py | 27 + .../tensorflow2/test/test_v2_ops_tf_keras.py | 5 + .../mil/frontend/torch/converter.py | 410 +++++--------- .../mil/frontend/torch/dialect_ops.py | 55 +- .../mil/frontend/torch/edgeir_utils.py | 34 ++ .../mil/frontend/torch/internal_graph.py | 412 +++++++++----- .../converters/mil/frontend/torch/load.py | 73 +-- .../converters/mil/frontend/torch/ops.py | 491 +++++++++++------ .../mil/frontend/torch/quantization_ops.py | 48 +- .../ssa_passes/torch_tensor_assign_to_core.py | 18 +- .../mil/frontend/torch/test/test_api.py | 72 --- .../frontend/torch/test/test_custom_ops.py | 8 +- .../torch/test/test_executorch_e2e.py | 158 ++++++ .../torch/test/test_internal_graph.py | 4 +- .../torch/test/test_torch_conversion_api.py | 249 ++++++++- .../mil/frontend/torch/test/test_torch_ops.py | 348 +++++++++--- .../torch/test/test_torch_quantization_ops.py | 55 +- .../mil/frontend/torch/test/testing_utils.py | 64 ++- .../mil/frontend/torch/torch_op_registry.py | 115 +++- .../mil/frontend/torch/torchir_passes.py | 25 +- .../mil/frontend/torch/torchscript_utils.py | 201 +++++++ coremltools/converters/mil/input_types.py | 1 + coremltools/converters/mil/mil/__init__.py | 31 +- coremltools/converters/mil/mil/block.py | 31 +- coremltools/converters/mil/mil/builder.py | 112 ++-- coremltools/converters/mil/mil/input_type.py | 23 +- coremltools/converters/mil/mil/operation.py | 12 +- .../converters/mil/mil/ops/defs/_utils.py | 154 +++++- .../mil/ops/defs/iOS15/elementwise_unary.py | 2 +- .../mil/mil/ops/defs/iOS15/linear.py | 4 +- .../converters/mil/mil/ops/defs/iOS15/pool.py | 30 +- .../mil/mil/ops/defs/iOS15/random.py | 54 +- .../mil/ops/defs/iOS15/tensor_operation.py | 4 + .../ops/defs/iOS15/tensor_transformation.py | 76 +-- .../mil/mil/ops/defs/iOS16/scatter_gather.py | 81 ++- .../mil/mil/ops/defs/iOS17/scatter_gather.py | 73 ++- .../converters/mil/mil/ops/registry.py | 5 +- .../mil/mil/ops/tests/iOS14/test_conv.py | 38 ++ .../mil/mil/ops/tests/iOS14/test_linear.py | 36 +- .../mil/mil/ops/tests/iOS14/test_pool.py | 38 +- .../ops/tests/iOS14/test_scatter_gather.py | 71 ++- .../ops/tests/iOS14/test_tensor_operation.py | 11 + .../tests/iOS14/test_tensor_transformation.py | 40 +- .../mil/mil/ops/tests/iOS16/test_conv.py | 9 +- .../ops/tests/iOS16/test_scatter_gather.py | 102 +++- .../mil/mil/ops/tests/iOS17/test_linear.py | 24 + .../ops/tests/iOS17/test_scatter_gather.py | 55 +- .../mil/passes/defs/optimize_quantization.py | 130 +++++ .../passes/defs/optimize_tensor_operation.py | 77 +++ .../mil/mil/passes/defs/quantization.py | 324 +++++++---- .../mil/mil/passes/pass_pipeline.py | 10 + .../mil/passes/tests/test_pass_pipeline.py | 6 + .../mil/mil/passes/tests/test_passes.py | 85 ++- .../passes/tests/test_quantization_passes.py | 510 +++++++++++++++++- coremltools/converters/mil/mil/program.py | 121 ++++- .../converters/mil/mil/tests/test_block.py | 1 - .../converters/mil/mil/tests/test_programs.py | 155 +++++- coremltools/converters/mil/testing_utils.py | 7 + coremltools/models/__init__.py | 1 + coremltools/models/model.py | 2 +- coremltools/optimize/coreml/_config.py | 5 +- .../optimize/coreml/_quantization_passes.py | 205 ++++--- .../torch/pruning/magnitude_pruner.py | 2 +- .../test/ml_program/test_compression.py | 39 +- .../neural_network/test_numpy_nn_layers.py | 1 + .../test/neural_network/test_tf_numeric.py | 5 + .../coreml/test_post_training_quantization.py | 9 +- coremltools/version.py | 2 +- reqs/test.pip | 6 +- scripts/build.sh | 2 +- 76 files changed, 4511 insertions(+), 1486 deletions(-) create mode 100644 coremltools/converters/mil/frontend/torch/edgeir_utils.py delete mode 100644 coremltools/converters/mil/frontend/torch/test/test_api.py create mode 100644 coremltools/converters/mil/frontend/torch/test/test_executorch_e2e.py create mode 100644 coremltools/converters/mil/frontend/torch/torchscript_utils.py diff --git a/CMakeLists.txt b/CMakeLists.txt index e64d03104..d4625252a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -191,6 +191,7 @@ else() message(STATUS "CoreML.framework and dependent frameworks not found. Skipping libcoremlpython build.") endif() + # Build kmeans-1d set(KMEANS_DIR "${PROJECT_SOURCE_DIR}/deps/kmeans1d") execute_process( @@ -198,12 +199,23 @@ execute_process( WORKING_DIRECTORY ${KMEANS_DIR} ) +# Somehow Python's setuptools is building this shared object file so that it tries to load the C++ +# standard library using an rpath that only exist on the build machine. Change that so it gets +# loaded from the standard location. +if(APPLE) + file(GLOB SO_FILE "${PROJECT_SOURCE_DIR}/deps/kmeans1d/kmeans1d/_core.*.so") + execute_process( + COMMAND install_name_tool -change @rpath/libc++.1.dylib /usr/lib/libc++.1.dylib ${SO_FILE} + ) +endif() + # Copy kmeans-1d to Python deps folder execute_process( COMMAND cp -r kmeans1d ../../coremltools/_deps WORKING_DIRECTORY ${KMEANS_DIR} ) + set(PYTHON_TAG "cp${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}") if(APPLE) execute_process(COMMAND uname -m OUTPUT_VARIABLE HARDWARE_NAME OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/coremltools/_deps/__init__.py b/coremltools/_deps/__init__.py index 01e1f3eb4..4d352840f 100644 --- a/coremltools/_deps/__init__.py +++ b/coremltools/_deps/__init__.py @@ -154,10 +154,15 @@ def __get_sklearn_version(version): # --------------------------------------------------------------------------------------- _HAS_TORCH = True -_TORCH_MAX_VERSION = "2.0.0" +_TORCH_MAX_VERSION = "2.1.0" +_HAS_TORCH_EXPORT_API = False try: import torch _warn_if_above_max_supported_version("Torch", torch.__version__, _TORCH_MAX_VERSION) + + if _get_version(torch.__version__) >= _StrictVersion("2.1.0"): + _HAS_TORCH_EXPORT_API = True + except: _HAS_TORCH = False MSG_TORCH_NOT_FOUND = "PyTorch not found." @@ -170,6 +175,20 @@ def __get_sklearn_version(version): _HAS_TORCH_VISION = False MSG_TORCH_VISION_NOT_FOUND = "TorchVision not found." +_HAS_TORCH_AUDIO = True +try: + import torchaudio +except: + _HAS_TORCH_AUDIO = False +MSG_TORCH_AUDIO_NOT_FOUND = "TorchAudio not found." + + +_HAS_EXECUTORCH = True +try: + import executorch +except: + _HAS_EXECUTORCH = False +MSG_EXECUTORCH_NOT_FOUND = "Executorch not found." # --------------------------------------------------------------------------------------- try: diff --git a/coremltools/converters/_converters_entry.py b/coremltools/converters/_converters_entry.py index d808c03da..b00fa3b99 100644 --- a/coremltools/converters/_converters_entry.py +++ b/coremltools/converters/_converters_entry.py @@ -15,7 +15,7 @@ from coremltools import ComputeUnit as _ComputeUnit from coremltools import __version__ as _ct_version from coremltools import _logger as logger -from coremltools._deps import _HAS_TF_1, _HAS_TF_2, _HAS_TORCH +from coremltools._deps import _HAS_TF_1, _HAS_TF_2, _HAS_TORCH, _HAS_TORCH_EXPORT_API from coremltools.converters._profile_utils import _profile from coremltools.converters.mil._deployment_compatibility import ( AvailableTarget, @@ -36,7 +36,7 @@ from coremltools.converters.mil.mil.passes.defs.quantization import FP16ComputePrecision from coremltools.converters.mil.mil.passes.graph_pass import PassOption as _PassOption from coremltools.converters.mil.mil.passes.pass_pipeline import PassPipeline -from coremltools.models import _METADATA_SOURCE, _METADATA_VERSION +from coremltools.models import _METADATA_SOURCE, _METADATA_SOURCE_DIALECT, _METADATA_VERSION from coremltools.models.utils import _MLPACKAGE_EXTENSION if _HAS_TF_1: @@ -51,8 +51,13 @@ if _HAS_TORCH: import torch - from coremltools.converters.mil.frontend.torch.load import \ - _torchscript_from_model as pytorch_load + from coremltools.converters.mil.frontend.torch.load import ( + _torchscript_from_spec as try_load_torchscript, + ) + + if _HAS_TORCH_EXPORT_API: + from torch.export import ExportedProgram + @_profile @@ -102,8 +107,12 @@ def convert( * PyTorch - - A `TorchScript `_ object - - Path to a ``.pt`` file + - TorchScript Models: + - A `TorchScript `_ object + - Path to a ``.pt`` file + + - Torch Exported Models: + - A `ExportedProgram ` object with `EDGE` dialect source : str (optional) @@ -161,18 +170,23 @@ def convert( When ``inputs`` not provided or ``dtype`` not specified. The float 32 inputs defaults to float 16. * PyTorch: - - The ``inputs`` parameter is required. - - Number of elements in ``inputs`` must match the number of inputs - of the PyTorch model. - - ``inputs`` may be a nested list or tuple. - - ``TensorType`` and ``ImageType`` must have the ``shape`` specified. - - If the ``name`` argument is specified with ``TensorType`` or - ``ImageType``, the converted Core ML model will have inputs with - the same name. - - If ``dtype`` is missing: - * For ``minimum_deployment_target <= ct.target.macOS12``, it defaults to float 32. - * For ``minimum_deployment_target >= ct.target.macOS13``, and with ``compute_precision`` in float 16 precision. - It defaults to float 16. + + - TorchScript Models: + - The ``inputs`` parameter is required. + - Number of elements in ``inputs`` must match the number of inputs + of the PyTorch model. + - ``inputs`` may be a nested list or tuple. + - ``TensorType`` and ``ImageType`` must have the ``shape`` specified. + - If the ``name`` argument is specified with ``TensorType`` or + ``ImageType``, the converted Core ML model will have inputs with + the same name. + - If ``dtype`` is missing: + * For ``minimum_deployment_target <= ct.target.macOS12``, it defaults to float 32. + * For ``minimum_deployment_target >= ct.target.macOS13``, and with ``compute_precision`` in float 16 precision. + It defaults to float 16. + + - Torch Exported Models: + - The ``inputs`` parameter is not supported. ``inputs`` parameter is inferred from Torch ExportedProgram. outputs : list of ``TensorType`` or ``ImageType`` (optional) @@ -218,13 +232,17 @@ def convert( * PyTorch: - - If specified, the length of the list must match the number of - outputs returned by the PyTorch model. - - If ``name`` is specified, it is applied to the output names of the - converted Core ML model. - - For ``minimum_deployment_target >= ct.target.macOS13``, and with ``compute_precision`` in float 16 precision. - If ``dtype`` not specified, the outputs inferred of type float 32 - defaults to float 16. + - TorchScript Models: + - If specified, the length of the list must match the number of + outputs returned by the PyTorch model. + - If ``name`` is specified, it is applied to the output names of the + converted Core ML model. + - For ``minimum_deployment_target >= ct.target.macOS13``, and with ``compute_precision`` in float 16 precision. + If ``dtype`` not specified, the outputs inferred of type float 32 + defaults to float 16. + + - Torch Exported Models: + - The ``outputs`` parameter is not supported. ``outputs`` parameter is inferred from Torch ExportedProgram. classifier_config : ClassifierConfig class (optional) @@ -308,7 +326,7 @@ def convert( The above transform iterates through all the ops, looking at each op's inputs and outputs. If they are of type float 32, ``cast`` ops are injected to convert those tensors (also known as `vars`) to - type float 16. + type float 16. Similarly, int32 vars will also be cast to int16. - ``coremltools.precision.FLOAT32`` enum: No transform is applied. @@ -489,15 +507,17 @@ def skip_real_div_ops(op): PyTorch: - >>> model = torchvision.models.mobilenet_v2() - >>> model.eval() - >>> example_input = torch.rand(1, 3, 256, 256) - >>> traced_model = torch.jit.trace(model, example_input) + TorchScript Models: - >>> input = ct.TensorType(name='input_name', shape=(1, 3, 256, 256)) - >>> mlmodel = ct.convert(traced_model, inputs=[input]) - >>> results = mlmodel.predict({"input": example_input.numpy()}) - >>> print(results['1651']) # 1651 is the node name given by PyTorch's JIT + >>> model = torchvision.models.mobilenet_v2() + >>> model.eval() + >>> example_input = torch.rand(1, 3, 256, 256) + >>> traced_model = torch.jit.trace(model, example_input) + + >>> input = ct.TensorType(name='input_name', shape=(1, 3, 256, 256)) + >>> mlmodel = ct.convert(traced_model, inputs=[input]) + >>> results = mlmodel.predict({"input": example_input.numpy()}) + >>> print(results['1651']) # 1651 is the node name given by PyTorch's JIT See `Conversion Options `_ for more advanced options. @@ -508,6 +528,7 @@ def skip_real_div_ops(op): outputs_as_strings, outputs_as_tensor_or_image_types, outputs) + source_dialect = _determine_source_dialect(model, exact_source) exact_target = _determine_target(convert_to, minimum_deployment_target) _validate_conversion_arguments( model, @@ -525,7 +546,7 @@ def skip_real_div_ops(op): if pass_pipeline is None: pass_pipeline = PassPipeline() if not need_fp16_cast_pass: - pass_pipeline.remove_passes({"common::add_fp16_cast"}) + pass_pipeline.remove_passes({"common::add_fp16_cast", "common::add_int16_cast"}) if isinstance(compute_precision, FP16ComputePrecision): # For backward compatibility with the `op_selector` param in FP16ComputePrecision. pass_pipeline._pass_options["common::add_fp16_cast"] = [ @@ -584,7 +605,7 @@ def skip_real_div_ops(op): gc.collect() - mlmodel = _record_build_metadata(mlmodel, exact_source) + mlmodel = _record_build_metadata(mlmodel, exact_source, source_dialect=source_dialect) return mlmodel @@ -819,16 +840,45 @@ def _flatten_list(_inputs): raise ValueError("Input should be a list of TensorType or ImageType") elif exact_source == "pytorch": - if inputs is None: - raise ValueError('Expected argument for pytorch "inputs" not provided') + if _HAS_TORCH_EXPORT_API and isinstance(model, ExportedProgram): + if model.dialect != "EDGE": + raise NotImplementedError( + f"Conversion for models with only EDGE dialect is supported/tested. Provided Dialect: {model.dialect}" + ) - raise_if_duplicated(flat_inputs) - if inputs is not None and not all( - [isinstance(_input, InputType) for _input in flat_inputs] - ): - raise ValueError( - "Input should be a list/tuple (or nested lists/tuples) of TensorType or ImageType" - ) + # TODO: rdar://115845792 ([Executorch] Handle user provided inputs/outputs in the convert API) + if inputs is not None: + raise AssertionError("'inputs' argument should be None for ExportedProgram") + + if outputs is not None: + raise AssertionError("'outputs' argument should be None for ExportedProgram") + + else: + is_torch_load_successful = False + try: + try_load_torchscript(model) + is_torch_load_successful = True + except: + pass + if is_torch_load_successful: + if inputs is None: + raise ValueError( + 'Expected argument "inputs" for TorchScript models not provided' + ) + + raise_if_duplicated(flat_inputs) + if inputs is not None and not all( + [isinstance(_input, InputType) for _input in flat_inputs] + ): + raise ValueError( + "Input should be a list/tuple (or nested lists/tuples) of TensorType or ImageType" + ) + else: + raise TypeError( + "@model must either be a TorchScript object (or .pt or .pth file) or an ExportedProgram object (if using torch.export based API), received: {}".format( + type(model) + ) + ) elif exact_source == "milinternal": if not isinstance(model, Program): @@ -837,6 +887,19 @@ def _flatten_list(_inputs): ) +def _determine_source_dialect(model, exact_source): + + source_dialect = None + if exact_source == "pytorch": + + if _HAS_TORCH_EXPORT_API and isinstance(model, ExportedProgram): + return f"TorchExport::{model.dialect}" + else: + return "TorchScript" + + return source_dialect + + def _determine_source(model, source, output_names, outputs_as_tensor_or_image_types, @@ -875,9 +938,13 @@ def _determine_source(model, source, pass if source == "auto" and _HAS_TORCH: + + if _HAS_TORCH_EXPORT_API and isinstance(model, ExportedProgram): + return "pytorch" + is_torch_load_successful = False try: - pytorch_load(model) + try_load_torchscript(model) is_torch_load_successful = True except: pass @@ -953,6 +1020,12 @@ def _get_metadata_from_mlmodel(mlmodel): src_pkg_version = mlmodel.user_defined_metadata[_METADATA_SOURCE] coremltools_version = mlmodel.user_defined_metadata[_METADATA_VERSION] + src_dialect = ( + None + if _METADATA_SOURCE_DIALECT not in mlmodel.user_defined_metadata + else mlmodel.user_defined_metadata[_METADATA_SOURCE_DIALECT] + ) + src_pkg_version_list = src_pkg_version.split("==") if len(src_pkg_version_list) == 0: src_pkg, pkg_ver = None, None @@ -969,10 +1042,13 @@ def _get_metadata_from_mlmodel(mlmodel): if src_pkg is not None and pkg_ver is not None: build_info['coremltools-component-' + src_pkg] = str(pkg_ver) + if src_dialect is not None: + build_info["coremltools-source-dialect"] = src_dialect + return build_info -def _record_build_metadata(mlmodel, exact_source): +def _record_build_metadata(mlmodel, exact_source, source_dialect=None): # recording metadata: coremltools version, source framework and version if exact_source in {"tensorflow", "tensorflow2"} and (_HAS_TF_1 or _HAS_TF_2): src_pkg_version = "tensorflow=={0}".format(tf.__version__) @@ -986,6 +1062,9 @@ def _record_build_metadata(mlmodel, exact_source): mlmodel.user_defined_metadata[_METADATA_SOURCE] = src_pkg_version mlmodel.user_defined_metadata[_METADATA_VERSION] = _ct_version + if source_dialect is not None: + mlmodel.user_defined_metadata[_METADATA_SOURCE_DIALECT] = source_dialect + build_info = _get_metadata_from_mlmodel(mlmodel) mlmodel._set_build_info_mil_attributes(build_info) diff --git a/coremltools/converters/mil/backend/mil/load.py b/coremltools/converters/mil/backend/mil/load.py index 8f2c9d2ed..216eba527 100644 --- a/coremltools/converters/mil/backend/mil/load.py +++ b/coremltools/converters/mil/backend/mil/load.py @@ -5,6 +5,7 @@ import os import warnings +from typing import Optional import numpy as np @@ -22,7 +23,7 @@ from coremltools.converters.mil.backend.nn.load import _set_optional_inputs from coremltools.converters.mil.input_types import EnumeratedShapes, ImageType, RangeDim, TensorType from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import Function, mil_list, types +from coremltools.converters.mil.mil import Function, Program, mil_list, types from coremltools.converters.mil.mil.ops.registry import SSAOpRegistry from coremltools.converters.mil.mil.types.symbolic import any_symbolic, any_variadic, is_symbolic from coremltools.models.neural_network.flexible_shape_utils import ( @@ -282,21 +283,17 @@ def remove_output(block, prob_var): return out[0].name, out[1].name -def load(prog, weights_dir, resume_on_errors=False, specification_version=_SPECIFICATION_VERSION_IOS_15, **kwargs): +def _pymil_to_milproto( + prog: Program, + weights_dir: str, + specification_version: Optional[int] = _SPECIFICATION_VERSION_IOS_15, +) -> pm.Program: + """ + Convert a pymil program into mil proto. + """ if BlobWriter is None: raise RuntimeError("BlobWriter not loaded") - if "main" not in prog.functions: - raise ValueError("main function not found in program") - # if user has specified "ClassifierConfig", then add the "classify" op to the prog - classifier_config = kwargs.get("classifier_config", None) - predicted_feature_name = None - predicted_probabilities_name = None - if classifier_config is not None: - predicted_feature_name, predicted_probabilities_name = _add_classify_op(prog, classifier_config) - - input_types = prog.main_input_types - output_types = prog.main_output_types weight_path = os.path.join(weights_dir, _WEIGHTS_FILE_NAME) blob_writer = BlobWriter(weight_path) @@ -310,6 +307,33 @@ def load(prog, weights_dir, resume_on_errors=False, specification_version=_SPECI version=1, functions=function_protos, ) + return proto + + +def load( + prog: Program, + weights_dir: str, + resume_on_errors: Optional[bool] = False, + specification_version: Optional[int] = _SPECIFICATION_VERSION_IOS_15, + **kwargs, +): + if "main" not in prog.functions: + raise ValueError("main function not found in program") + + # if user has specified "ClassifierConfig", then add the "classify" op to the prog + classifier_config = kwargs.get("classifier_config", None) + predicted_feature_name = None + predicted_probabilities_name = None + if classifier_config is not None: + predicted_feature_name, predicted_probabilities_name = _add_classify_op( + prog, classifier_config + ) + + # convert pymil program into mil proto + proto = _pymil_to_milproto(prog, weights_dir, specification_version) + + input_types = prog.main_input_types + output_types = prog.main_output_types desc = kwargs.get("model_description", None) if desc and not isinstance(desc, ml.ModelDescription): diff --git a/coremltools/converters/mil/converter.py b/coremltools/converters/mil/converter.py index 3fae1cb94..72f11769c 100644 --- a/coremltools/converters/mil/converter.py +++ b/coremltools/converters/mil/converter.py @@ -277,7 +277,7 @@ def mil_convert_to_proto( # behaviour same as before, the quantization pass is removed in this situation. # TODO: rdar://106111553 ([Infra] Quantization Pass is skipped when `mil_convert` is called directly.) main_pipeline = PassPipeline() - main_pipeline.remove_passes({"common::add_fp16_cast"}) + main_pipeline.remove_passes({"common::add_fp16_cast", "common::add_int16_cast"}) frontend_pipeline, backend_pipeline = _construct_other_pipelines( main_pipeline, convert_from, convert_to ) @@ -288,12 +288,13 @@ def mil_convert_to_proto( PassPipelineManager.apply_pipeline(prog, main_pipeline) - prog._check_invalid_program() - if convert_to == 'milinternal': return None, prog PassPipelineManager.apply_pipeline(prog, backend_pipeline) + + prog._check_early_error_out_for_invalid_program() + backend_converter_type = converter_registry.backends.get(convert_to.lower()) if not backend_converter_type: raise NotImplementedError( diff --git a/coremltools/converters/mil/frontend/milproto/load.py b/coremltools/converters/mil/frontend/milproto/load.py index 054ef871c..87da83595 100644 --- a/coremltools/converters/mil/frontend/milproto/load.py +++ b/coremltools/converters/mil/frontend/milproto/load.py @@ -411,25 +411,10 @@ def _load_function(context, func_spec, spec_version): return pymil_func -def load(model_spec, specification_version, file_weights_dir="", **kwargs): +def load_mil_proto(program_spec, specification_version, file_weights_dir=""): """ - Load MILProto to Pymil. - - Set force_spec_version to force override the spec version. + Load in-memory Proto specification of MILSpec.Program(.Proto) object to PyMIL """ - if not isinstance(model_spec, ml.Model): - raise TypeError("Invalid Model sepc object") - - if specification_version < model_spec.specificationVersion: - if not kwargs.get("force_spec_version", False): - raise ValueError( - "specification_version must be greater or equal to the input model spec version" - ) - - if model_spec.WhichOneof("Type") != "mlProgram": - raise ValueError("Only MIL proto based mlmodels can be loaded") - - program_spec = model_spec.mlProgram if not isinstance(program_spec, pm.Program): raise TypeError("Invalid Program spec object") @@ -451,3 +436,24 @@ def load(model_spec, specification_version, file_weights_dir="", **kwargs): raise ValueError("Invalid attribute for program") return pymil_program + + +def load(model_spec, specification_version, file_weights_dir="", **kwargs): + """ + Load in-memory Proto specification of Model(.Proto) object to PyMIL + + Set force_spec_version to force override the spec version. + """ + if not isinstance(model_spec, ml.Model): + raise TypeError("Invalid Model sepc object") + + if specification_version < model_spec.specificationVersion: + if not kwargs.get("force_spec_version", False): + raise ValueError( + "specification_version must be greater or equal to the input model spec version" + ) + + if model_spec.WhichOneof("Type") != "mlProgram": + raise ValueError("Only MIL proto based mlmodels can be loaded") + + return load_mil_proto(model_spec.mlProgram, specification_version, file_weights_dir) diff --git a/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py b/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py index da22af453..33795f7e9 100644 --- a/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py +++ b/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py @@ -2622,6 +2622,15 @@ def test_ios17_resize_bilinear_dynamic_shape( target_shape, align_corners, ): + if ( + backend == ("mlprogram", "fp16") + and input_shape == (2, 5, 2, 3) + and target_shape == (20, 60) + ): + pytest.xfail( + "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" + ) + """ Since iOS17, dynamic shape is supported by lowering to `resize` MIL op. """ @@ -2723,6 +2732,15 @@ def test_ios17_resize_nearest_neighbor_dynamic_shape( input_shape, target_shape, ): + if ( + backend == ("mlprogram", "fp16") + and input_shape == (2, 5, 2, 3) + and target_shape == (20, 60) + ): + pytest.xfail( + "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" + ) + """ Since iOS17, dynamic shape is supported by lowering to `resize` MIL op. """ @@ -6742,6 +6760,15 @@ def build_model(x): def test_programmatic( self, compute_unit, backend, input_block_rank, dynamic_input, dynamic_crops ): + if ( + backend == ("mlprogram", "fp16") + and input_block_rank == (3, 1) + and dynamic_input + and not dynamic_crops + ): + pytest.xfail( + "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" + ) input_rank, block_rank = input_block_rank diff --git a/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py b/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py index 47c8247c5..ee673e810 100644 --- a/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py +++ b/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py @@ -1389,6 +1389,11 @@ def test_lstm_time_distributed_dense(self, compute_unit, backend): "compute_unit, backend", itertools.product(compute_units, backends) ) def test_lstm_dynamic_batch(self, compute_unit, backend): + if backend == ("mlprogram", "fp16"): + pytest.xfail( + "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" + ) + input_shape = (1, 1280) inp = tf.keras.layers.Input(shape=input_shape) out, hn, cn = tf.keras.layers.LSTM(512, diff --git a/coremltools/converters/mil/frontend/torch/converter.py b/coremltools/converters/mil/frontend/torch/converter.py index 9e0be95c3..0ae3e5218 100644 --- a/coremltools/converters/mil/frontend/torch/converter.py +++ b/coremltools/converters/mil/frontend/torch/converter.py @@ -4,17 +4,20 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause from collections import OrderedDict +from typing import List, Optional, Union import numpy as np import torch as torch +from torch.jit._script import RecursiveScriptModule from coremltools import _logger as logger -from coremltools._deps import version_lt +from coremltools._deps import _HAS_TORCH_EXPORT_API from coremltools.converters.mil._deployment_compatibility import AvailableTarget as _target -from coremltools.converters.mil.input_types import ImageType, TensorType +from coremltools.converters.mil.input_types import ImageType, InputType, TensorType from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import Function, Program, types +from coremltools.converters.mil.mil import Function, Placeholder, Program, types from coremltools.converters.mil.mil.types import is_float +from coremltools.converters.mil.mil.var import Var from .._utils import get_output_names from .internal_graph import InternalTorchIRGraph, InternalTorchIRNode @@ -28,26 +31,37 @@ remove_getattr_nodes, transform_inplace_ops, ) - -torch_to_mil_types = { - torch.bool: types.bool, - torch.float16: types.fp16, - torch.float32: types.fp32, - torch.float64: types.fp32, - torch.int32: types.int32, - torch.int64: types.int32, -} - - -mil_to_torch_types = {v: k for k, v in torch_to_mil_types.items()} - +from .torchscript_utils import torch_to_mil_types + +if _HAS_TORCH_EXPORT_API: + from torch.export import ExportedProgram + + +def _convert_to_torch_inputtype(inputs: List[TensorType]) -> List[TensorType]: + input_type = [] + for _input in inputs: + if isinstance(_input, (list, tuple)): + input_type.append(_convert_to_torch_inputtype(_input)) + elif isinstance(_input, InputType): + if _input.shape is None: + raise ValueError( + "'shape' must be provided in the 'inputs' argument for pytorch conversion" + ) + input_type.append(_input) + elif isinstance(_input, torch.Tensor): + input_type.append( + TensorType(shape=_input.shape, dtype=torch_to_mil_types[_input.dtype]) + ) + else: + raise ValueError("Unknown type {} for conversion to InputType.".format(type(_input))) + return input_type class QuantizationContext: """ Utilities to manage information pertaining to quantization of tensors in a PyTorch graph. """ - def __init__(self, context): + def __init__(self, context: "TranscriptionContext") -> None: self._context = context # Maps var name to tuple of (torch dtype, scale, zero_point) @@ -71,7 +85,7 @@ def add_quantization_info(self, name, torch_dtype, scale, zero_point, axis=None) """ self._quant_param_map[name] = (torch_dtype, scale, zero_point, axis) - def get_quantization_info(self, name): + def get_quantization_info(self, name: str) -> None: """ Retrieves the information added via add_quantization_info, if applicable. Returns None if quantization parameters could not be found. @@ -80,7 +94,7 @@ def get_quantization_info(self, name): return None return self._quant_param_map[name] - def maybe_handle_quantized_inputs(self, node: InternalTorchIRNode): + def maybe_handle_quantized_inputs(self, node: InternalTorchIRNode) -> None: """ If a node's op doesn't support quantized inputs but gets one, this will wire it to receive a dequantized version of it. @@ -91,14 +105,15 @@ def maybe_handle_quantized_inputs(self, node: InternalTorchIRNode): # Op can handle quantized inputs. Nothing to do here. return - for input_name in node.inputs: - if self.get_quantization_info(input_name) is None: + for input in node.inputs: + # In Edge IR, input can be a literal and thus have no name + if not isinstance(input, str) or self.get_quantization_info(input) is None: # Not a quantized tensor continue # We need a dequantized version of the input to feed to the op. - dequantized_var, _ = self.get_dequantized_var(input_name) - node.replace_name(input_name, dequantized_var.name) + dequantized_var, _ = self.get_dequantized_var(input) + node.replace_name(input, dequantized_var.name) def get_quantized_per_tensor(self, name, torch_dtype, scale, zero_point, quantized_name): """ @@ -179,7 +194,7 @@ class TranscriptionContext: context when stepping out. """ - def __init__(self, name=None): + def __init__(self, name: Optional[str] = None) -> None: self.name = name if name else "" self._current_graph = [{}] self._torch_graph = None @@ -192,21 +207,24 @@ def torch_graph(self): return self._torch_graph @property - def quant_context(self): + def quant_context(self) -> QuantizationContext: return self._quant_context @torch_graph.setter def torch_graph(self, graph: InternalTorchIRGraph): self._torch_graph = graph - def prepare_for_conversion(self, node: InternalTorchIRNode): + def prepare_for_conversion(self, node: InternalTorchIRNode) -> None: """ Perform any preparation necessary before node-specific frontend conversion is invoked. """ - self.quant_context.maybe_handle_quantized_inputs(node) + return - def add(self, ssa_var, torch_name=None): + def process_inplace_op(self, node: InternalTorchIRNode): + return + + def add(self, ssa_var: Var, torch_name: Optional[str] = None, override=False) -> None: """ Arguments: ssa_var: Variable to add to the graph being constructed. @@ -215,12 +233,12 @@ def add(self, ssa_var, torch_name=None): """ if torch_name is None: torch_name = ssa_var.name - if torch_name in self._current_graph[-1]: - print(f"Torch var {torch_name} is added again.") + if torch_name in self._current_graph[-1] and not override: + logger.warning(f"Torch var {torch_name} is added again.") return self._current_graph[-1][torch_name] = ssa_var - def __getitem__(self, torch_name): + def __getitem__(self, torch_name: str) -> Var: """ Lookup a name in the context. Note that since nested blocks must be able to access anything that was defined before them, we have to @@ -276,26 +294,26 @@ def __repr__(self): class TorchConverter: """ - Class that handles conversion of pytorch models represented in TorchScript - format to the MIL format. + Class that handles conversion of pytorch models to the MIL format. Models passed to the @TorchConverter go from: - TorchScript -> Expanded/Optimized Torch IR -> Internal Graph -> CoreML SSA - The internal graph representation was added to make testing easier. + Loaded-Torch Model -> Internal Graph -> PyMIL """ def __init__( self, - torchscript, - inputs, - outputs=None, - cut_at_symbols=None, - opset_version=None, - use_default_fp16_io=False, - ): + loaded_model: Union[RecursiveScriptModule, "ExportedProgram"], + inputs: Optional[List[TensorType]] = None, + outputs: Optional[List[TensorType]] = None, + cut_at_symbols: Optional[List[str]] = None, + opset_version: Optional[int] = None, + use_default_fp16_io: bool = False, + ) -> None: """ Arguments: - torchscript: torch.jit.ScriptModule object representing the model to convert. + loaded_model: It could be one of the following: + - In-memory TorchScript model of type torch.jit.ScriptModule + - In-memory EdgeIR program of type ExportedProgram inputs: Input values and optional names. See kwarg in load.py for full description. outputs: List of outputs as ct.InputType. See kwarg in load.py for full description. cut_at_symbols: A list of internal symbol name strings. Graph conversion will @@ -307,46 +325,54 @@ def __init__( and the compute precision set to fp16, this flag is True. When True, fp32 i/o defaults to fp16. """ - assert isinstance(torchscript, torch.jit.ScriptModule) - - self.inputs = inputs - for idx, inp in enumerate(self.inputs): - if isinstance(inp, ImageType) and self.inputs[idx].channel_first is None: - self.inputs[idx].channel_first = True - - self.torchscript = torchscript - self.outputs = outputs self.use_default_fp16_io = use_default_fp16_io - if self.use_default_fp16_io: - # If the input type is not specified by the user and use_default_fp16_io - # is True. Make the default input type to fp16 - self._adjust_default_input_to_fp16() + if inputs is not None: + inputs = _convert_to_torch_inputtype(inputs) + self.inputs = inputs + for idx, inp in enumerate(self.inputs): + if isinstance(inp, ImageType) and self.inputs[idx].channel_first is None: + self.inputs[idx].channel_first = True + + if self.use_default_fp16_io: + # If the input type is not specified by the user and use_default_fp16_io + # is True. Make the default input type to fp16 + self._adjust_default_input_to_fp16() + self.outputs = outputs self.output_names = get_output_names(self.outputs) self.opset_version = _target(opset_version) if opset_version is not None else None self.context = TranscriptionContext() - raw_graph, params_dict = self._expand_and_optimize_ir(self.torchscript) - self.params_dict = params_dict - self.graph = InternalTorchIRGraph( - raw_graph, params_dict, self.inputs, cut_at_symbols - ) - self.context.torch_graph = self.graph + self._prog = Program() + + if isinstance(loaded_model, torch.jit.ScriptModule): + self.graph, self.params_dict, self.buffer_dict = InternalTorchIRGraph.from_torchscript( + torchscript=loaded_model, input_values=self.inputs, cut_at_symbols=cut_at_symbols + ) + + # TODO (rdar://106161395): Register Torch IR passes and unify them into the pass pipeline. + # Apply Torch IR passes + passes = [ + transform_inplace_ops, + flatten_graph_input_values, + flatten_graph_output_values, + remove_getattr_nodes, + generate_tensor_assignment_ops, + ] + for p in passes: + p(self.graph) + + elif _HAS_TORCH_EXPORT_API and isinstance(loaded_model, ExportedProgram): + self.graph = InternalTorchIRGraph.from_edgeir(edgeir=loaded_model) + self.params_dict, self.buffer_dict = None, None + else: + raise ValueError( + "Model should be an instance of either torch.jit.ScriptModule or ExportedProgram" + ) - # TODO (rdar://106161395): Register Torch IR passes and unify them into the pass pipeline. - # Apply Torch IR passes - passes = [ - transform_inplace_ops, - flatten_graph_input_values, - flatten_graph_output_values, - remove_getattr_nodes, - generate_tensor_assignment_ops, - ] - for p in passes: - p(self.graph) + self.context.torch_graph = self.graph self.inputs = list(self.graph.inputs.values()) - self._prog = Program() def _adjust_default_input_to_fp16(self): """ @@ -389,7 +415,7 @@ def _check_ops(graph): implemented_ops = set() missing_ops = set() for node in graph.nodes: - _add_op = _TORCH_OPS_REGISTRY.get(node.kind, None) + _add_op = _TORCH_OPS_REGISTRY.get_func(node.kind) if _add_op is None: missing_ops.add(node.kind) else: @@ -401,7 +427,9 @@ def _check_ops(graph): return implemented_ops, missing_ops @staticmethod - def _create_placeholder(_input): + def _create_placeholder( + _input: TensorType, + ) -> Placeholder: """ Converts an InputType into a Placeholder. @@ -416,6 +444,14 @@ def _create_placeholder(_input): dtype = types.fp32 return mb.placeholder(shape, dtype=dtype) + @staticmethod + def _preprocess_input_vars(input_var): + if ( + types.is_tensor(input_var.sym_type) or types.is_scalar(input_var.sym_type) + ) and input_var.dtype == types.fp16: + input_var = mb.cast(x=input_var, dtype="fp32") + return input_var + def check_ops(self): """ Returns the set of ops in @self.graph that are implemented, and @@ -423,7 +459,7 @@ def check_ops(self): """ return TorchConverter._check_ops(self.graph) - def convert_const(self): + def convert_const(self) -> None: for name, val in self.graph.params.items(): if isinstance(val, torch._C.ScriptObject): logger.info(f"Encountered constant {name} of type _torch._C.ScriptObject") @@ -444,40 +480,39 @@ def convert_const(self): const = mb.const(val=val, name=name) self.context.add(const) - def convert(self): + def convert(self) -> Program: logger.info("Converting graph.") - # This will hold the converted model. - prog = self._prog - - # Construct placeholder for input to SSA function - # This is where input renaming occurs - ssa_func_inputs = OrderedDict() + # Set SSA function input name to user defined name if provided. for index, (name, spec) in enumerate(self.graph.inputs.items()): - placeholder = self._create_placeholder(spec) - # Set SSA function input name to user defined name if provided. if spec.name is not None: name = spec.name self.inputs[index].name = name - ssa_func_inputs[name] = placeholder + + # This will hold the converted model. + prog = self._prog prog.set_main_input_types(tuple(self.inputs)) + # Construct placeholder for input to SSA function + ssa_func_inputs = OrderedDict() + for spec in self.inputs: + ssa_func_inputs[spec.name] = self._create_placeholder(spec) + # Initialize the SSA for conversion with Function(ssa_func_inputs, opset_version=self.opset_version) as ssa_func: # Map internal @self.graph.inputs to user specified @ssa_func_inputs # If @self.graph.inputs == @ssa_func_inputs this just adds the inputs # to the context. - for internal_name, users_name in zip( - self.graph.inputs.keys(), ssa_func_inputs.keys() - ): - input_var = ssa_func.inputs[users_name] - if ( - types.is_tensor(input_var.sym_type) or types.is_scalar(input_var.sym_type) - ) and input_var.dtype == types.fp16: - input_var = mb.cast(x=input_var, dtype="fp32") - self.context.add(input_var, torch_name=internal_name) - + # Convert input placeholders + user_names = list(ssa_func_inputs.keys()) + internal_names = list(self.graph.inputs.keys()) + internal_names.extend(user_names[len(internal_names) :]) + for torch_name, ssa_name in zip(internal_names, user_names): + input_var = self._preprocess_input_vars(ssa_func.inputs[ssa_name]) + self.context.add(input_var, torch_name=torch_name) + + # Convert constants self.convert_const() # Add the rest of the operations @@ -513,184 +548,3 @@ def convert(self): if self.outputs is not None: prog.set_main_output_types(self.outputs) return prog - - def _jit_pass_lower_graph(graph, torchscript): - """ - This graph pass does a similar thing as torch._C._jit_pass_lower_graph does. - It does two things: - 1. Rename getattr nodes which produce a torch tensor to match the keys in torch model's state_dict - 2. Construct the params_dict, with the keys similar to state_dict - - To be more specific, this graph pass traces down series of GetAttr ops, and rename the final node to match the torch model state_dict. - It also replaces the node inputs by the first created tensor node with the same name. - - Example: - Input graph: - graph(%self.1 : __torch__.torch.nn.modules.Sequential, %input.1 : Tensor): - %2 : prim::GetAttr[name="linear"](%self.1) - %3 : prim::GetAttr[name="weight"](%2) - %4 : prim::GetAttr[name="bias"](%2) - %5 : prim::GetAttr[name="bias"](%2) # duplicated node - %6 : conv(%input.1, %3, %4) - %7 : add(%input.1, %5) - return (%6, %7) - - Output graph: - graph(%self.1 : __torch__.torch.nn.modules.Sequential, %input.1 : Tensor): - %2 : prim::GetAttr[name="linear"](%self.1) - %linear.weight : prim::GetAttr[name="weight"](%2) - %linear.bias : prim::GetAttr[name="bias"](%2) - %5 : prim::GetAttr[name="bias"](%2) # duplicated node, it is not used now - %6 : conv(%input.1, %linear.weight, %linear.bias) - %7 : add(%input.1, %linear.bias) # the second input is replaced - return (%6, %7) - - And a dictionary {"linear.weight": ..., "linear.bias": ...} is returned, to record the parameters values. - Note that, those GetAttr nodes are still in the torch ir graph, but they would be removed in a latter - graph pass in the coremltools torch internal graph - - """ - - """ - Each getattr node corresponds to a torch object in the torch IR, - it could be either: - 1. torch.nn.modules: submodule in a torch model. For instance, a linear layer in a MLP network. - 2. torch.Tensor: torch model parameters. For instance, weight for a conv layer. - 3. torch._C.ScriptObject: quantized torch model parameters. - For example, in the graph above, %2 is pointing to the __torch__.torch.nn.modules.Sequential.linear torch submodule. - node_to_module_map tracks these mapping. - - node_to_prefic_map track the name for each module, - for example, %2 has the prefix name linear and %3 is linear.weight. - These names are also keys in the state_dict - """ - node_to_module_map = {} - node_to_prefix_map = {} - first_node_with_prefix = {} - replace_input = {} - - base_module_node = list(graph.inputs())[0] - node_to_module_map[base_module_node] = torchscript - node_to_prefix_map[base_module_node] = "" - - """ - params_dict will be contructed in this graph pass. It contains all const tensors needed for the graph computation. - And the value is validated against the state_dict if the key is presented in both dictionaries. - In some rare cases, state_dict lacks parameters / buffers, so we still need to go through the while graph ourselves. - """ - params_dict = {} - state_dict = torchscript.state_dict(keep_vars=True) - - def _check_is_tensor(node, module): - if not isinstance(module, torch.Tensor): - return False - if str(node.output().type()) not in ("Tensor", "Optional[Tensor]"): - raise TypeError(f'Type "{node.output().type()}" not supported') - return True - - def _check_is_quantized_tensor(node, module): - if not isinstance(module, torch._C.ScriptObject): - return False - # We only support ScriptObjects that correspond to quantized packed params. - assert "PackedParams" in node.output().type().name() - return True - - def _lower_graph_block(graph): - for node in list(graph.nodes()): - - for block in node.blocks(): - _lower_graph_block(block) - - for idx, _input in enumerate(list(node.inputs())): - if _input in replace_input: - node.replaceInput(idx, replace_input[_input]) - - kind = node.kind().split("::")[1].lower() - if kind != "getattr": - continue - - _input = node.input() - _output = node.output() - attr_name = getattr(node, node.kindOf("name"))("name") - - module = getattr(node_to_module_map[_input], attr_name) - node_to_module_map[_output] = module - - input_prefix = node_to_prefix_map[_input] - prefix = input_prefix + '.' + attr_name if input_prefix != "" else attr_name - node_to_prefix_map[_output] = prefix - - is_tensor = _check_is_tensor(node, module) - is_quantized_tensor = _check_is_quantized_tensor(node, module) - - if is_tensor or is_quantized_tensor: - if is_tensor and prefix in state_dict: - assert torch.equal( - module.cpu(), state_dict[prefix].cpu() - ), "tensor value not consistent between torch ir and state_dict" - if prefix in params_dict: - assert torch.equal(module.cpu(), params_dict[prefix].cpu()) - replace_input[_output] = first_node_with_prefix[prefix] - else: - params_dict[prefix] = module - first_node_with_prefix[prefix] = _output - _output.setDebugName(prefix) - - _lower_graph_block(graph) - - return graph, params_dict - - @staticmethod - def _expand_and_optimize_ir(torchscript): - """ - Given a torch.jit.ScriptModule, convert it to a optimized - torch._C.Graph and dict of model parameter's names to tensors. - """ - graph = torchscript.forward.graph - - # From PyTorch code: Inline function and method calls. - torch._C._jit_pass_inline(graph) - # From PyTorch code: This inlines the forked section in the fork() - # callsite and replaces uses of the result of wait() calls with the - # values produced from the (now-inlined) forked section. - torch._C._jit_pass_inline_fork_wait(graph) - # Starting from the return node, marks all nodes that feed into the - # output, as well as nodes with side effects. Any nodes not marked are - # eliminated. - torch._C._jit_pass_dce(graph) - # From PyTorch code: checks well-formedness and invariants of graph. - torch._C._jit_pass_lint(graph) - # Replaces a couple specific ops patterns (add, sub, mul, div, chunk). - if version_lt(torch, "1.6.0"): - torch._C._jit_pass_canonicalize_ops(graph) - torch._C._jit_pass_lint(graph) - - # From PyTorch code: This pass catches all of the small, easy to catch - # peephole optimizations you might be interested in doing. - # Eliminate no-op 'expand' nodes - # Simplify x.t().t() to x - # pass disabled for v1.6.0 and onwards, wrongly captures the shape of dummy inputs during tracing. - torch._C._jit_pass_peephole(graph, addmm_fusion_enabled=False) - else: - # v1.6.0 pass renamed - torch._C._jit_pass_canonicalize_graph_fuser_ops(graph) - torch._C._jit_pass_lint(graph) - - # From PyTorch docs: Renumber the graph so that all structurally - # equivalent graphs have same numbers. - graph = torch._C._jit_pass_canonicalize(graph) - torch._C._jit_pass_lint(graph) - if version_lt(torch, "1.6.0"): - # v1.6.0 JIT changes disallows pulling list values out of - # prim::Constant. We can only pull scalar values. constant - # propagation removes `listConstruct` and results in list values. - # We disallow constant prop pass to keep them as scalars, and rely - # on our own constant prop to interpret `listConstruct`. - torch._C._jit_pass_constant_propagation(graph) - # NOTE: Don't need another DCE, it's included in constant propagation. - torch._C._jit_pass_lint(graph) - - # Get the params_dict and rename the getattr nodes in the graph - graph, params_dict = TorchConverter._jit_pass_lower_graph(graph, torchscript) - - return graph, params_dict diff --git a/coremltools/converters/mil/frontend/torch/dialect_ops.py b/coremltools/converters/mil/frontend/torch/dialect_ops.py index 101144c6c..6796328ed 100644 --- a/coremltools/converters/mil/frontend/torch/dialect_ops.py +++ b/coremltools/converters/mil/frontend/torch/dialect_ops.py @@ -4,14 +4,10 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause from coremltools.converters.mil.mil import Operation, get_new_symbol, types -from coremltools.converters.mil.mil.input_type import (DefaultInputs, - InputSpec, - TensorInputType) -from coremltools.converters.mil.mil.ops.defs._utils import \ - solve_slice_by_index_shape +from coremltools.converters.mil.mil.input_type import DefaultInputs, InputSpec, TensorInputType +from coremltools.converters.mil.mil.ops.defs._utils import get_param_val, solve_slice_by_index_shape from coremltools.converters.mil.mil.ops.registry import SSAOpRegistry -from coremltools.converters.mil.mil.types.symbolic import \ - is_compatible_symbolic_vector +from coremltools.converters.mil.mil.types.symbolic import is_compatible_symbolic_vector register_op = SSAOpRegistry.register_op @@ -60,7 +56,7 @@ class torch_upsample_nearest_neighbor(Operation): output_height=TensorInputType(type_domain=types.int32), output_width=TensorInputType(type_domain=types.int32), ) - + type_domains = { "T": (types.fp16, types.fp32), } @@ -144,11 +140,11 @@ class torch_tensor_assign(Operation): Parameters ---------- - data: tensor<*?, T> (Required) + x: tensor<*?, T> (Required) * Input tensor updates: tensor<\*K, T> (Required) * Value tensor to be inserted - * The shape of the updates tensor must match the slicing result of the input data. + * The shape of the updates tensor must match the slicing result of the input data ``x``. begin: tensor<[rank], i32> (Required) * Starting index for the dimension of slicing. end: tensor<[rank(x)], i32> (Required) @@ -164,7 +160,7 @@ class torch_tensor_assign(Operation): * If ``end_mask[i]==True``, neglect ``end[i]``, and set ``end[i]`` to ``x.shape[i]``. squeeze_mask: tensor<[rank(x)], bool> (Optional) * Default to all ``False``. - * If ``squeeze_mask[i]==true``, neglect ``end[i]``, and do the pure index at ``begin[i]``. + * If ``squeeze_mask[i]==True``, neglect ``end[i]``, and do the pure index at ``begin[i]``. Returns ------- @@ -177,7 +173,7 @@ class torch_tensor_assign(Operation): """ input_spec = InputSpec( - data=TensorInputType(type_domain="T"), + x=TensorInputType(type_domain="T"), updates=TensorInputType(type_domain="T"), begin=TensorInputType(type_domain=types.int32), end=TensorInputType(type_domain=types.int32), @@ -186,7 +182,7 @@ class torch_tensor_assign(Operation): end_mask=TensorInputType(const=True, optional=True, type_domain=types.bool), squeeze_mask=TensorInputType(const=True, optional=True, type_domain=types.bool), ) - + type_domains = { "T": (types.fp16, types.fp32, types.int32), } @@ -200,20 +196,21 @@ def default_inputs(self): ) def type_inference(self): - # Verify the updates and the data slicing have the same shape - begin = self.begin.val - end = self.end.val - data_rank = self.data.rank - stride = self.stride.val if self.stride is not None else [1] * data_rank - begin_mask = ( - self.begin_mask.val if self.begin_mask is not None else [False] * data_rank - ) - end_mask = self.end_mask.val if self.end_mask is not None else [False] * data_rank - squeeze_mask = ( - self.squeeze_mask.val if self.squeeze_mask is not None else [False] * data_rank + # solve shape + ret_shape = solve_slice_by_index_shape( + self.x.shape, + self.begin.val, + self.end.val, + get_param_val(self.stride), + get_param_val(self.begin_mask), + get_param_val(self.end_mask), + get_param_val(self.squeeze_mask), ) - data_shape = self.data.shape - expected_updates_shape = tuple(solve_slice_by_index_shape(data_shape, begin, end, stride, begin_mask, end_mask, squeeze_mask)) - if not is_compatible_symbolic_vector(expected_updates_shape, self.updates.shape): - raise ValueError("The updates tensor should have shape {}. Got {}".format(expected_updates_shape, self.updates.shape)) - return self.data.sym_type + if not is_compatible_symbolic_vector(ret_shape, self.updates.shape): + raise ValueError( + "The updates tensor should have shape {}. Got {}".format( + ret_shape, self.updates.shape + ) + ) + + return self.x.sym_type diff --git a/coremltools/converters/mil/frontend/torch/edgeir_utils.py b/coremltools/converters/mil/frontend/torch/edgeir_utils.py new file mode 100644 index 000000000..26d0ad821 --- /dev/null +++ b/coremltools/converters/mil/frontend/torch/edgeir_utils.py @@ -0,0 +1,34 @@ +# Copyright (c) 2023, Apple Inc. All rights reserved. +# +# Use of this source code is governed by a BSD-3-clause license that can be +# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + + +from typing import List + +from torch import Tensor + +import coremltools as ct + +from .torchscript_utils import torch_to_mil_types + + + +def to_coreml_tensor_type(name: str, tensor: Tensor) -> "ct.TensorType": + # TODO: rdar://115845948 ([Executorch] Handle inputs of shapes with dynamic dimensions) + return ct.TensorType(name=name, dtype=torch_to_mil_types[tensor.dtype], shape=tensor.shape) + + +def extract_inputs_from_edge_program(exported_program) -> List["ct.TensorType"]: + module = exported_program.graph_module + inputs_to_parameters = exported_program.graph_signature.inputs_to_parameters + inputs_to_buffers = exported_program.graph_signature.inputs_to_buffers + inputs = [] + for node in module.graph.nodes: + if node.op == "placeholder" and node.meta is not None and "val" in node.meta: + if isinstance(node.meta["val"], Tensor): + if node.name not in inputs_to_parameters and node.name not in inputs_to_buffers: + inputs.append(to_coreml_tensor_type(node.name, node.meta["val"])) + else: + raise NotImplementedError("Only Tensor inputs handled yet") + return inputs diff --git a/coremltools/converters/mil/frontend/torch/internal_graph.py b/coremltools/converters/mil/frontend/torch/internal_graph.py index b6fd83507..cbf782d54 100644 --- a/coremltools/converters/mil/frontend/torch/internal_graph.py +++ b/coremltools/converters/mil/frontend/torch/internal_graph.py @@ -3,10 +3,15 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause +from collections import OrderedDict + import torch +from torch.fx.node import Node -from collections import OrderedDict -from itertools import islice +from coremltools import _logger as logger + +from .edgeir_utils import extract_inputs_from_edge_program +from .torchscript_utils import _expand_and_optimize_ir _DEFAULT_OP_NAMESPACES = set(["aten", "prim"]) @@ -57,42 +62,56 @@ class InternalTorchIRBlock: coremltools internal representation of a torch IR block. """ - def __init__(self, raw_block=None, parent=None, nodes=None, inputs=None, outputs=None): - """" + def __init__(self, parent=None, nodes=None, inputs=None, outputs=None): + """ Arguments: - raw_block: The torch._C.Block to convert, or None. parent: The InternalTorchIRNode this block belongs to. - nodes: If @raw_block is None, the list of InternalTorchIRNodes in the block - inputs: If @raw_block is None, the list of input symbols. - outputs: If @raw_block is None, the list of output symbols. + nodes: list of InternalTorchIRNodes in the block + inputs: list of input symbols. + outputs: list of output symbols. """ - self.nodes = [] - node_names = set() - self.inputs = [] - self.outputs = [] + self.nodes = nodes + self.inputs = inputs + self.outputs = outputs self.parent = parent - if raw_block: - # Add nodes - for raw_node in raw_block.nodes(): - new_node = InternalTorchIRNode(raw_node, parent=self) - if new_node.name == new_node.kind: - new_node.name = _find_new_name(new_node.name, node_names) - self.nodes.append(new_node) - node_names.add(new_node.name) - - # Add inputs - for inp in raw_block.inputs(): - self.inputs.append(inp.debugName()) - - # Add outputs - for outp in raw_block.outputs(): - self.outputs.append(outp.debugName()) - else: - self.nodes = nodes - self.inputs = inputs - self.outputs = outputs + @classmethod + def from_edgeir_block(cls, block, parent): + raise NotImplementedError( + "EdgeIR: Support for Ops containing blocks not implemented yet" + ) # TODO: rdar://115846569 ([Executorch] Handle control flow ops from edge ir) + + @classmethod + def from_torchscript_block(cls, block, parent): + + node_names = set() + nodes = [] + inputs = [] + outputs = [] + + # Add inputs + for inp in block.inputs(): + inputs.append(inp.debugName()) + + # Add outputs + for outp in block.outputs(): + outputs.append(outp.debugName()) + + internal_block = cls(parent=parent, inputs=inputs, outputs=outputs, nodes=nodes) + + # Add nodes + for raw_node in block.nodes(): + new_node = InternalTorchIRNode.from_torchscript_node( + node=raw_node, parent=internal_block + ) + if new_node.name == new_node.kind: + new_node.name = _find_new_name(new_node.name, node_names) + internal_block.nodes.append(new_node) + node_names.add(new_node.name) + + return internal_block + def __str__(self, indent=2): indent_str = " " * indent @@ -131,51 +150,124 @@ class InternalTorchIRNode: """ def __init__( - self, node=None, parent=None, attr=None, inputs=None, outputs=None, kind=None, blocks=None, + self, + kind, + inputs, + outputs, + name=None, + parent=None, + attr=None, + blocks=None, ): """ Arguments: - node: The torch._C.Node to convert, or None. + name: Name of the node. + kind: the kind (op) of the node. + inputs: list of input symbols. + outputs: list of output symbols. parent: The InternalTorchIRGraph/Block this node belongs to. - attr: If @node is not specified, the dict of named attributes. - inputs: If @node is not specified, the list of input symbols. - outputs: If @node is not specified, the list of output symbols. - kind: If @node is not specified, the kind (op) of the node. - blocks: If @node is not specified, the list of InternalTorchIRBlock. + attr: dict of named attributes. + blocks: list of InternalTorchIRBlock. """ + if not name and not outputs: + self.name = "" + else: + self.name = name if name else outputs[0] + self.kind = kind + self.inputs = inputs + self.outputs = outputs self.parent = parent - if node is not None: - self.inputs = [_input.debugName() for _input in node.inputs()] - self.outputs = [output.debugName() for output in node.outputs()] - namespace = node.kind().split("::")[0].lower() - if namespace in _DEFAULT_OP_NAMESPACES: - # We conventionally skip the aten/prim namespaces in our naming. - self.kind = node.kind().split("::")[-1].lower() - else: - self.kind = node.kind().lower() - self.blocks = [InternalTorchIRBlock(raw_block=b, parent=self) for b in node.blocks()] - self.attr = { - name: getattr(node, node.kindOf(name))(name) - for name in node.attributeNames() - } - if "value" not in self.attr: - self.attr["value"] = None - # If the output is boolean, explicitly cast it so type inference - # will work correctly. - if len(self.outputs) == 1 and next(node.outputs()).type().str() == "bool": - self.attr["value"] = bool(self.attr["value"]) + self.attr = attr if attr is not None else {"value": None} + self.blocks = blocks if blocks is not None else [] + + @classmethod + def from_torchscript_node(cls, node, parent): + inputs = [_input.debugName() for _input in node.inputs()] + outputs = [output.debugName() for output in node.outputs()] + namespace = node.kind().split("::")[0].lower() + if namespace in _DEFAULT_OP_NAMESPACES: + # We conventionally skip the aten/prim namespaces in our naming. + kind = node.kind().split("::")[-1].lower() else: - self.inputs = inputs - self.outputs = outputs - self.kind = kind - self.blocks = blocks if blocks is not None else [] - self.attr = attr if attr is not None else {"value": None} + kind = node.kind().lower() + + attr = {name: getattr(node, node.kindOf(name))(name) for name in node.attributeNames()} + if "value" not in attr: + attr["value"] = None + # If the output is boolean, explicitly cast it so type inference + # will work correctly. + if len(outputs) == 1 and next(node.outputs()).type().str() == "bool": + attr["value"] = bool(attr["value"]) + # On rare occassions, a node has no outputs. In that case, the node's # name will be its kind. However, this no longer guarantees the node's # name is unique. It will be up to the graph constructing the node to # make sure names are unique. - self.name = self.outputs[0] if len(self.outputs) > 0 else self.kind + name = outputs[0] if len(outputs) > 0 else kind + + internal_node = cls( + name=name, + kind=kind, + parent=parent, + inputs=inputs, + outputs=outputs, + attr=attr, + blocks=None, + ) + internal_node.blocks = [ + InternalTorchIRBlock.from_torchscript_block(block=b, parent=internal_node) + for b in node.blocks() + ] + return internal_node + + @classmethod + def from_edgeir_node(cls, node): + def get_arguments(alist): + args = [] + for i in alist: + if isinstance(i, Node): + args.append(i.name) + elif isinstance(i, torch.fx.immutable_collections.immutable_list): + args.append(get_arguments(i)) + elif isinstance(i, (int, float)): + args.append(i) + elif i is None: + args.append(None) + else: + raise AssertionError(f"Unhandled type of the node: {type(i)}") + return tuple(args) + + inputs = get_arguments(node.args) + outputs = [ + node.name + ] # TODO: rdar://115846125 ([Executorch] Handle Models/Layers with Multiple outputs) + + try: + kind = node.target.name() + except: + if callable(node.target): + kind = node.target.__name__ + else: + kind = str(node.target) + + namespace = kind.split("::")[0].lower() + if namespace in _DEFAULT_OP_NAMESPACES: + # We conventionally skip the aten/prim namespaces in our naming. + kind = kind.split("::")[-1].lower() + else: + kind = kind.lower() + + name = node.name + return cls( + name=name, + kind=kind, + inputs=inputs, + outputs=outputs, + parent=None, + attr=None, + blocks=None, + ) def __str__(self, indent=2): node_str = " " * indent + "{} = {}".format( @@ -229,73 +321,151 @@ class InternalTorchIRGraph: """ def __init__( - self, raw_graph=None, params_dict=None, input_values=None, cut_at_symbols=None, - nodes=None, params=None, inputs=None, outputs=None, + self, + params, + inputs, + outputs, + nodes=None, ): """ Arguments: - raw_graph: raw_graph: The torch._C.Graph to convert, or None. - params_dict: A dictionary mapping graph parameter names to tensors. - Must be given if @raw_graph is not None. + params: dict mapping parameter names to their numpy value. + inputs: OrderedDict mapping input names to their example values. + outputs: list[str], list of outputs from the graph. + nodes: list of InternalTorchIRNodes in the graph. + """ + self.nodes = nodes + self.params = params + self.inputs = inputs + self.outputs = outputs + + @classmethod + def from_torchscript(cls, torchscript, input_values=None, cut_at_symbols=None): + """ + Arguments: + torchscript: TorchScript object representing the model to convert. input_values: A list of inputs to the graph. Must be given is @raw_graph if not None. cut_at_symbols: The list of desired outputs from the graph. Symbols must be present in the graph. For debugging use only. Can only be given if @raw_graph is not None. - nodes: If @raw_graph is None, the list of InternalTorchIRNodes in - the graph. - params: If @raw_graph is None, the dict mapping parameter names to - their numpy value. - inputs: If @raw_graph is None, the OrderedDict mapping input names - to their example values. - outputs: list[str], If @raw_graph is None, the list of outputs from the graph. """ + if not isinstance(torchscript, torch.jit.ScriptModule): + raise AssertionError( + f"Input should be an object of type torch.jit.ScriptModule. Provide: {type(torchscript)}" + ) - self.nodes = [] - node_names = set() - self.params = {} - self.inputs = OrderedDict() - self.outputs = [] - - if raw_graph is not None: - # Add nodes - for raw_node in raw_graph.nodes(): - new_node = InternalTorchIRNode(raw_node, parent=self) - if new_node.name == new_node.kind: - new_node.name = _find_new_name(new_node.name, node_names) - self.nodes.append(new_node) - node_names.add(new_node.name) - - # Add params - for name, param in params_dict.items(): - if isinstance(param, torch.Tensor): - if param.is_quantized: - value = param - else: - value = param.detach().cpu().numpy() - else: + if hasattr(torchscript, "training") and torchscript.training: + logger.warning( + "Model is not in eval mode. " + "Consider calling '.eval()' on your model prior to conversion" + ) + if type(torchscript) == torch.jit._script.RecursiveScriptModule: + logger.warning( + "Support for converting Torch Script Models is experimental. " + "If possible you should use a traced model for conversion." + ) + + nodes = [] + params = {} + inputs = OrderedDict() + outputs = [] + + raw_graph, params_dict, buffer_dict = _expand_and_optimize_ir(torchscript) + + # Add params + for name, param in params_dict.items(): + if isinstance(param, torch.Tensor): + if param.is_quantized: value = param - self.params[name] = value - - # Add inputs - # The first element of the raw_graph.inputs() is the 'self' of the module, which is not used. - graph_inputs = list(raw_graph.inputs())[1:] - for index, _input in enumerate(islice(graph_inputs, len(input_values))): - name = _input.debugName() - value = input_values[index] - self.inputs[name] = value - - # Add outputs, cutting if @cut_at_symbols is set - output_names = cut_at_symbols - if output_names is None: - output_names = [x.debugName() for x in raw_graph.outputs()] - for output in output_names: - self.outputs.append(output) - else: - self.nodes = nodes - self.params = params - self.inputs = inputs - self.outputs = outputs + else: + value = param.detach().cpu().numpy() + else: + value = param + params[name] = value + + # Add inputs + # The first element of the raw_graph.inputs() is the 'self' of the module, which is not used. + graph_inputs = list(raw_graph.inputs())[1:] + if len(graph_inputs) != len(input_values): + raise ValueError( + f"Number of TorchScript inputs ({len(graph_inputs)}) must match the user provided inputs ({len(input_values)})." + ) + for index, _input in enumerate(graph_inputs): + name = _input.debugName() + value = input_values[index] + inputs[name] = value + + # Add outputs, cutting if @cut_at_symbols is set + output_names = cut_at_symbols + if output_names is None: + output_names = [x.debugName() for x in raw_graph.outputs()] + for output in output_names: + outputs.append(output) + + internal_graph = cls(nodes=nodes, params=params, inputs=inputs, outputs=outputs) + + node_names = set() + # Add nodes + for raw_node in raw_graph.nodes(): + new_node = InternalTorchIRNode.from_torchscript_node( + node=raw_node, parent=internal_graph + ) + if new_node.name == new_node.kind: + new_node.name = _find_new_name(new_node.name, node_names) + internal_graph.nodes.append(new_node) + node_names.add(new_node.name) + + return internal_graph, params_dict, buffer_dict + + @classmethod + def from_edgeir(cls, edgeir): + exported_program = edgeir + + nodes = [] + params = {} + outputs = [] + inputs = OrderedDict( + [ + (i.name, i) + for i in extract_inputs_from_edge_program(exported_program=exported_program) + ] + ) + + inputs_to_parameters = exported_program.graph_signature.inputs_to_parameters + inputs_to_buffers = exported_program.graph_signature.inputs_to_buffers + + inputs_to_consts = {**inputs_to_parameters, **inputs_to_buffers} + + parameters_to_inputs = { + v: k if not k.startswith("%") else k[1:] for k, v in inputs_to_consts.items() + } + + # Add params + for name, param in exported_program.state_dict.items(): + if isinstance(param, torch.Tensor): + value = param.detach().cpu().numpy() + else: + raise NotImplementedError("Only torch.Tensor handled yet") + + params[name if name not in parameters_to_inputs else parameters_to_inputs[name]] = value + + graph = exported_program.graph + + outputs = [] + for node in graph.nodes: + if node.op == "call_function": + nodes.append(InternalTorchIRNode.from_edgeir_node(node=node)) + elif node.op == "placeholder": + continue + elif node.op == "output": + outputs = [ + node.name for node in node.args[0] + ] # TODO: rdar://115846125 ([Executorch] Handle Models/Layers with Multiple outputs) + else: + raise NotImplementedError(f"Nodes of type {node.op} not yet implemented") + + return cls(nodes=nodes, params=params, inputs=inputs, outputs=outputs) def __str__(self): graph_str = "graph(\n" diff --git a/coremltools/converters/mil/frontend/torch/load.py b/coremltools/converters/mil/frontend/torch/load.py index ca822776d..38229634b 100644 --- a/coremltools/converters/mil/frontend/torch/load.py +++ b/coremltools/converters/mil/frontend/torch/load.py @@ -4,32 +4,40 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause import os.path as _os_path +from typing import List, Optional, Union import torch as _torch +from torch.jit._script import RecursiveScriptModule -from coremltools import _logger as logger -from coremltools.converters.mil.input_types import InputType, TensorType +from coremltools._deps import _HAS_TORCH_EXPORT_API +from coremltools.converters.mil.frontend.torch.converter import TorchConverter +from coremltools.converters.mil.input_types import TensorType +from coremltools.converters.mil.mil.program import Program -from .converter import TorchConverter, torch_to_mil_types +from .converter import TorchConverter +if _HAS_TORCH_EXPORT_API: + from torch.export import ExportedProgram def load( - model_spec, - inputs, - specification_version, - debug=False, - outputs=None, - cut_at_symbols=None, - use_default_fp16_io=False, + spec: Union[RecursiveScriptModule, "ExportedProgram", str], + inputs: List[TensorType], + specification_version: int, + debug: bool = False, + outputs: Optional[List[TensorType]] = None, + cut_at_symbols: Optional[List[str]] = None, + use_default_fp16_io: bool = False, **kwargs -): +) -> Program: """ Convert PyTorch model to mil CoreML format. Parameters ---------- - model_spec: String path to .pt file, or a TorchScript object representing - the model to convert. + spec: It could be one of the following: + - String path to .pt file containing serialized torchscript model + - In memory TorchScript model of type torch.jit.ScriptModule + - In memory EdgeIR program of type ExportedProgram inputs: Can be a singular element or list of elements of the following form 1. Any subclass of InputType 2. torch.Tensor (only shape and dtype will be used) @@ -54,28 +62,25 @@ def load( and the compute precision set to fp16, this flag is True. When True, fp32 i/o defaults to fp16. """ - torchscript = _torchscript_from_model(model_spec) - if hasattr(torchscript, 'training') and torchscript.training: - logger.warning("Model is not in eval mode. " - "Consider calling '.eval()' on your model prior to conversion") - if type(torchscript) == _torch.jit._script.RecursiveScriptModule: - logger.warning("Support for converting Torch Script Models is experimental. " - "If possible you should use a traced model for conversion.") + if _HAS_TORCH_EXPORT_API and isinstance(spec, ExportedProgram): + model = spec + else: + model = _torchscript_from_spec(spec) - inputs = _convert_to_torch_inputtype(inputs) converter = TorchConverter( - torchscript, + model, inputs, outputs, cut_at_symbols, specification_version, use_default_fp16_io, ) + return _perform_torch_convert(converter, debug) -def _torchscript_from_model(model_spec): +def _torchscript_from_spec(model_spec: RecursiveScriptModule) -> RecursiveScriptModule: if isinstance(model_spec, str) and (model_spec.endswith(".pt") or model_spec.endswith(".pth")): filename = _os_path.abspath(model_spec) return _torch.jit.load(filename) @@ -88,28 +93,8 @@ def _torchscript_from_model(model_spec): ) ) -def _convert_to_torch_inputtype(inputs): - input_type = [] - for _input in inputs: - if isinstance(_input, (list, tuple)): - input_type.append(_convert_to_torch_inputtype(_input)) - elif isinstance(_input, InputType): - if _input.shape is None: - raise ValueError("'shape' must be provided in the 'inputs' argument for pytorch conversion") - input_type.append(_input) - elif isinstance(_input, _torch.Tensor): - input_type.append( - TensorType( - shape=_input.shape, dtype=torch_to_mil_types[_input.dtype] - ) - ) - else: - raise ValueError( - "Unknown type {} for conversion to InputType.".format(type(_input)) - ) - return input_type -def _perform_torch_convert(converter, debug): +def _perform_torch_convert(converter: TorchConverter, debug: bool) -> Program: try: prog = converter.convert() except RuntimeError as e: diff --git a/coremltools/converters/mil/frontend/torch/ops.py b/coremltools/converters/mil/frontend/torch/ops.py index e15adc3be..51b968d2a 100644 --- a/coremltools/converters/mil/frontend/torch/ops.py +++ b/coremltools/converters/mil/frontend/torch/ops.py @@ -8,7 +8,7 @@ import numbers import re from collections.abc import Iterable -from typing import List, Optional +from typing import Any, List, Optional import numpy as _np import numpy as np @@ -67,31 +67,29 @@ def convert_nodes(context, graph): """ for node in _tqdm(graph.nodes, desc="Converting PyTorch Frontend ==> MIL Ops", unit=" ops"): op_lookup = node.kind - if op_lookup.startswith("__") and op_lookup.endswith("__"): - # Some ops may have double underscore, such as `__and__`. - op_lookup = op_lookup[2:-2] - elif op_lookup.endswith("_"): - # This is an "in place" op. - # Look up the standard op instead by removing underscore. - op_lookup = op_lookup[:-1] - add_op = _TORCH_OPS_REGISTRY.get(op_lookup, None) - - logger.info("Converting op {} : {}".format(node.name, node.kind)) + add_op = _TORCH_OPS_REGISTRY.get_func(op_lookup) if add_op is None: - if re.match(r".*_dynamic", node.kind): + if re.match(r".*_dynamic", op_lookup): raise RuntimeError( - f"PyTorch convert function for op '{node.kind}' not implemented.\n" + f"PyTorch convert function for op '{op_lookup}' not implemented.\n" "Dynamic quantized models are not supported by Core ML.\n" "Please use static quantization or the APIs in coremltools.optimize to quantize/compress models." ) else: raise RuntimeError( - f"PyTorch convert function for op '{node.kind}' not implemented." + f"PyTorch convert function for op '{op_lookup}' not implemented." ) + logger.info("Converting op {} : {}".format(node.name, op_lookup)) + + context.quant_context.maybe_handle_quantized_inputs(node) context.prepare_for_conversion(node) + add_op(context, node) + if _TORCH_OPS_REGISTRY.is_inplace_op(op_lookup): + context.process_inplace_op(node) + # We've generated all the outputs the graph needs, terminate conversion. if _all_outputs_present(context, graph): break @@ -196,7 +194,34 @@ def _get_inputs(context, node, expected=None, min_expected=None) -> List[Var]: @expected is not None, also verifies the number of inputs matches the value of @expected. """ - inputs = [context[name] for name in node.inputs] + + def get_bindings(alist) -> List[Any]: + """ + This utility is needed in order to handle following cases: + With EdgeIR, + - Some of the inputs can be literals (like axis, perms) and thus can be of types: list, int etc. + - An Input Parameter of an op could be a list/tuple similar to our concat layer + """ + results = [] + + for i in alist: + if isinstance(i, str): + results.append(context[i]) + elif isinstance(i, (list, tuple)) and all(isinstance(j, int) for j in i): + results.append(mb.const(val=i)) + elif isinstance(i, (list, tuple)): + results.append(get_bindings(i)) + elif isinstance(i, (int, float)): + results.append(mb.const(val=i)) + elif i is None: + results.append(None) + else: + raise NotImplementedError(f"Binding of inputs of type {type(i)} not handled yet") + + return results + + inputs = get_bindings(node.inputs) + if expected is not None: expected = [expected] if not isinstance(expected, (list, tuple)) else expected @@ -694,9 +719,9 @@ def eq(context, node): x = inputs[0] y = inputs[1] if is_bool(x.dtype): - x = mb.cast(x=x, dtype='int32') + x = mb.cast(x=x, dtype="int32") if is_bool(y.dtype): - y = mb.cast(x=y, dtype='int32') + y = mb.cast(x=y, dtype="int32") x, y = promote_input_dtypes([x, y]) equal_to = mb.equal(x=x, y=y, name=node.name) context.add(equal_to) @@ -708,9 +733,9 @@ def ne(context, node): x = inputs[0] y = inputs[1] if is_bool(x.dtype): - x = mb.cast(x=x, dtype='int32') + x = mb.cast(x=x, dtype="int32") if is_bool(y.dtype): - y = mb.cast(x=y, dtype='int32') + y = mb.cast(x=y, dtype="int32") x, y = promote_input_dtypes([x, y]) equal_to = mb.not_equal(x=x, y=y, name=node.name) context.add(equal_to) @@ -774,8 +799,8 @@ def transpose(context, node): context.add(res) -@register_torch_op -def permute(context, node): +@register_torch_op(torch_alias=["permute"]) +def permute_copy(context, node): inputs = _get_inputs(context, node, expected=2) perm = mb.transpose(x=inputs[0], perm=inputs[1], name=node.name) context.add(perm) @@ -807,18 +832,19 @@ def pixel_unshuffle(context, node): context.add(perm) -@register_torch_op(torch_alias=["bmm"]) +@register_torch_op(torch_alias=["bmm", "mm"]) def matmul(context, node): inputs = _get_inputs(context, node, expected=2) if inputs[1].val is not None and \ len(inputs[1].shape) == 2 and len(inputs[0].shape) <= 3: res = mb.linear(x=inputs[0], weight=_np.transpose(inputs[1].val), name=node.name) else: - res = mb.matmul(x=inputs[0], y=inputs[1], name=node.name) + x, y = promote_input_dtypes([inputs[0], inputs[1]]) + res = mb.matmul(x=x, y=y, name=node.name) context.add(res) -@register_torch_op +@register_torch_op(torch_alias=["add.tensor"]) def add(context, node): add_inputs = _get_inputs(context, node) assert len(node.outputs) == 1 @@ -852,12 +878,12 @@ def addmm(context, node): # output = beta * input + alpha * mat1 * mat2 assert len(node.outputs) == 1 - inputs = _get_inputs(context, node, expected=5) + inputs = _get_inputs(context, node, expected=[3, 4, 5]) bias = inputs[0] mat1 = inputs[1] mat2 = inputs[2] - beta = inputs[3] - alpha = inputs[4] + beta = inputs[3] if len(inputs) > 3 else mb.const(val=1.0) + alpha = inputs[4] if len(inputs) > 4 else mb.const(val=1.0) if beta.val != 1.0: # Apply scaling factor beta to the bias. @@ -889,7 +915,7 @@ def linear(context, node): context.add(res, torch_name=node.name) -@register_torch_op(torch_alias=["conv2d"]) +@register_torch_op(torch_alias=["conv2d", "convolution"]) def _convolution(context, node): inputs = _get_inputs(context, node) @@ -923,7 +949,7 @@ def _convolution(context, node): dilations = inputs[5] out_pad = None - if len(inputs) >= 12: + if len(inputs) >= 9: transposed = inputs[6].val out_pad = inputs[7].val group = inputs[8] @@ -1042,7 +1068,7 @@ def _convolution_mode(context, node): ) -@register_torch_op +@register_torch_op(torch_alias=["_softmax"]) def softmax(context, node): inputs = _get_inputs(context, node) @@ -1333,14 +1359,15 @@ def _max_pool(context, node, inputs): strides = mb.const(val=kernel_sizes.val, name=strides.name) pad_type = "custom" - # Need to explicitly state L-R, T-B pad - pad = inputs[3] - pad = _np.repeat(pad.val, 2) - dilation = inputs[4].val - ceil_mode = inputs[5].val + + pad = np.array([0] * (kernel_sizes.shape[0] * 2)) if len(inputs) < 4 else _np.repeat(inputs[3].val, 2) + dilation = np.array([1] * kernel_sizes.shape[0]) if len(inputs) < 5 else inputs[4].val + ceil_mode = False if len(inputs) < 6 else inputs[5].val + if _np.any(dilation > 1): # See: rdar://60633736 (Implement dilation for mil op max_pool) raise ValueError("@max_pool does not support dilation > 1") + spatial_rank = len(pad) // 2 if spatial_rank > 2 and ceil_mode is True and list(strides.val) != [1] * len(strides.val): # since MIL does not support ceil_mode for 3D pool, @@ -1358,7 +1385,12 @@ def _max_pool(context, node, inputs): name=node.name, ceil_mode=ceil_mode if spatial_rank <= 2 else False, ) - context.add(pool) + + if node.kind == "max_pool2d_with_indices": + # TODO(rdar://117038432) ([Executorch] Handle/Bind other outputs of `max_pool2d_with_indices` op during lowering) + context.add((pool, None), torch_name=node.name) + else: + context.add(pool) @register_torch_op @@ -1367,9 +1399,9 @@ def max_pool1d(context, node): _max_pool(context, node, inputs) -@register_torch_op +@register_torch_op(torch_alias=["max_pool2d_with_indices"]) def max_pool2d(context, node): - inputs = _get_inputs(context, node, expected=6) + inputs = _get_inputs(context, node, min_expected=3) _max_pool(context, node, inputs) @@ -1406,7 +1438,7 @@ def maximum(context, node): context.add(out) -@register_torch_op +@register_torch_op(torch_alias = ["div.tensor"]) def div(context, node): inputs = _get_inputs(context, node, expected=[2, 3]) x = mb.cast(x=inputs[0], dtype="fp32") @@ -1457,7 +1489,7 @@ def true_divide(context, node): context.add(res) -@register_torch_op +@register_torch_op(torch_alias=["mul.tensor", "mul.scalar"]) def mul(context, node): inputs = _get_inputs(context, node, expected=2) x, y = promote_input_dtypes(inputs) @@ -1505,9 +1537,11 @@ def sub(context, node): @register_torch_op( torch_alias=[ + "mean.dim", "sum", "logsumexp", - ]) + ] +) def mean(context, node): inputs = _get_inputs(context, node) @@ -1546,18 +1580,22 @@ def mean(context, node): context.add(res) -@register_torch_op +@register_torch_op(torch_alias=["squeeze_copy.dim", "squeeze_copy.dims"]) def squeeze(context, node): inputs = _get_inputs(context, node) if len(inputs) == 1: res = mb.squeeze(x=inputs[0], name=node.name) elif len(inputs) == 2: - squeeze_dim = inputs[1].val - res = mb.squeeze(x=inputs[0], axes=(squeeze_dim,), name=node.name) + dims = inputs[1].val + try: + dims = (int(dims),) + except: + pass + res = mb.squeeze(x=inputs[0], axes=dims, name=node.name) context.add(res) -@register_torch_op +@register_torch_op(torch_alias=["unsqueeze_copy"]) def unsqueeze(context, node): inputs = _get_inputs(context, node, expected=2) unsqueeze = mb.expand_dims(x=inputs[0], axes=[inputs[1].val], name=node.name) @@ -1591,7 +1629,7 @@ def _shape_as_tensor(context, node): context.add(shape_node, node.name) -@register_torch_op(torch_alias=["reshape"]) +@register_torch_op(torch_alias=["view_copy", "reshape"]) def view(context, node): inputs = _get_inputs(context, node, expected=2) x = inputs[0] @@ -1602,9 +1640,8 @@ def view(context, node): indices = mb.range_1d(start=0, end=length, step=1) shape = mb.list_gather(ls=shape, indices=indices) - if ( - isinstance(shape, list) - and all([isinstance(dim, Var) and len(dim.shape) == 0 for dim in shape]) + if isinstance(shape, list) and all( + [isinstance(dim, Var) and len(dim.shape) == 0 for dim in shape] ): shape = mb.concat(values=shape, axis=0) @@ -1739,25 +1776,39 @@ def _adaptive_pool2d(context, node, pool_op, reduce_op): context.add(result) -@register_torch_op +@register_torch_op(torch_alias=["_native_batch_norm_legit_no_training"]) def batch_norm(context, node): - inputs = _get_inputs(context, node, expected=9) - # inputs skipped: - # float momentum (6) - # bool cudnn_enabled (8) - input_rank = inputs[0].rank - if input_rank < 2 or input_rank > 5: - raise ValueError( - "BatchNorm: Encountered invalid input rank during translation in torch frontend." - ) + inputs = _get_inputs(context, node, expected=[7, 9]) _input = inputs[0] weight = inputs[1] bias = inputs[2] running_mean = inputs[3] running_var = inputs[4] - training = inputs[5].val - eps = inputs[7] + + if len(inputs) == 9: + # inputs skipped: + # float momentum (6) + # bool cudnn_enabled (8) + + training = inputs[5].val + eps = inputs[7] + # no: training, cudnn_enabled + elif len(inputs) == 7: + # inputs skipped: + # float momentum (5) + eps = inputs[6] + + training = False + else: + raise ValueError( + f"BatchNorm: got {len(inputs)} inputs, expected 7 or 9" + ) + input_rank = _input.rank + if input_rank < 2 or input_rank > 5: + raise ValueError( + "BatchNorm: Encountered invalid input rank during translation in torch frontend." + ) # If training = True, the mean and variance of the current batch of data are used to normalize the input data. # If training = False, data statistics running_mean and running_var are used instead. @@ -1797,7 +1848,7 @@ def _add_batch_norm_dynamic(): bias_reshape = mb.reshape(x=bias, shape=shape) x = mb.add(x=x, y=bias_reshape, name=node.name) - context.add(x) + return x def _add_batch_norm_1d(): # first expand the 3d tensor to 4d, and call the standard mb.batch_norm @@ -1812,7 +1863,7 @@ def _add_batch_norm_1d(): name=node.name + "_batch_norm_1d", ) bn = mb.squeeze(x=bn, name=node.name, axes=[-1]) - context.add(bn) + return bn def _add_batch_norm(): bn = mb.batch_norm( @@ -1824,16 +1875,22 @@ def _add_batch_norm(): epsilon=eps, name=node.name, ) - context.add(bn) + return bn is_batch_norm_1d_rank_2 = input_rank == 2 if training or running_mean.val is None or running_var.val is None or weight is None or bias is None: - _add_batch_norm_dynamic() + bn = _add_batch_norm_dynamic() elif is_batch_norm_1d_rank_2: - _add_batch_norm_1d() + bn = _add_batch_norm_1d() else: - _add_batch_norm() + bn = _add_batch_norm() + + if node.kind == "_native_batch_norm_legit_no_training": + # TODO(rdar://117038279) ([Executorch] Handle/Bind other outputs of `_native_batch_norm_legit_no_training` op during lowering) + bn = (bn, None, None) + + context.add(bn, torch_name=node.name) @register_torch_op @@ -1935,7 +1992,7 @@ def hardtanh(context, node): context.add(res) -@register_torch_op(torch_alias=['concat']) +@register_torch_op(torch_alias=["concat"]) def cat(context, node): inputs = _get_inputs(context, node) axis = 0 if len(inputs) == 1 else inputs[1] @@ -2013,14 +2070,10 @@ def _cast(context, node, dtype, dtype_name): res = mb.const(val=dtype(x.val), name=node.name) else: res = x - elif x.shape == (1,): + elif len(x.shape) > 0: x = mb.squeeze(x=x, name=node.name + "_item") res = mb.cast(x=x, dtype=dtype_name, name=node.name) else: - if len(x.shape) > 0: - # TODO: There's no MIL op to extract a value from a symbolic tensor, - # so as a workaround we use reduce_max to convert it to a scalar. - x = mb.reduce_max(x=x, name=node.name + "_item") res = mb.cast(x=x, dtype=dtype_name, name=node.name) context.add(res, node.name) @@ -2035,9 +2088,9 @@ def _int(context, node): _cast(context, node, int, "int32") -@register_torch_op +@register_torch_op(torch_alias=["native_layer_norm"]) def layer_norm(context, node): - inputs = _get_inputs(context, node, expected=6) + inputs = _get_inputs(context, node, min_expected=5) _input = inputs[0] normalized_shape = inputs[1] weight = inputs[2] @@ -2053,7 +2106,12 @@ def layer_norm(context, node): epsilon=eps, name=node.name, ) - context.add(layer_norm) + + if node.kind == "native_layer_norm": + # TODO(rdar://117038370) ([Executorch] Handle/Bind other outputs of `native_layer_norm` op during lowering) + context.add((layer_norm, None, None), torch_name=node.name) + else: + context.add(layer_norm) @register_torch_op @@ -3281,7 +3339,7 @@ def _false_path(): context.add(output_var, torch_name=output_name) -@register_torch_op +@register_torch_op(torch_alias=["select_copy.int"]) def select(context, node): inputs = _get_inputs(context, node, expected=3) _input = inputs[0] @@ -3311,7 +3369,7 @@ def select(context, node): squeeze_mask[dim] = True if index.val != -1: - if index.val is None: + if index.val is None: # index value not know till runtime temp = mb.add(x=index, y=1) end_array[dim] = temp @@ -3331,6 +3389,33 @@ def select(context, node): context.add(slice_by_index) +@register_torch_op +def getitem(context, node): + inputs = _get_inputs(context, node, expected=2) + + if not isinstance(inputs[0], (list, tuple)): + raise AssertionError("Item selection is supported only on python list/tuple objects") + + if inputs[1].val is None: + raise AssertionError("Only static item selection supported") + + try: + index = int(inputs[1].val) + except: + raise AssertionError( + f"Index into python list/tuple needs to be integer. Provided value: {inputs[1].val}" + ) + + out = inputs[0][index] + + if out is None: + raise AssertionError( + f"coremltools lowering didn't handle/bind value at index {index}. Please inspect the lowering of parent op for its return value" + ) + + context.add(out, torch_name=node.name) + + @register_torch_op def type_as(context, node): inputs = _get_inputs(context, node, expected=2) @@ -3357,6 +3442,20 @@ def nonzero(context, node): def _get_slice_params(context, data, inputs): + def _expand_list_to_rank_1(arr): + """ + We make the elements in begin and end rank 1, + so the pattern of ``squeeze -> expand_dims`` can be removed + by the ``fuse_squeeze_expand_dims`` graph pass. + """ + for i, val in enumerate(arr): + if isinstance(val, Var): + if val.rank == 0: + arr[i] = mb.expand_dims(x=val, axes=[0]) + else: + arr[i] = np.array([val]) + return arr + rank = data.rank begin = [0] * rank end = [0] * rank @@ -3400,12 +3499,38 @@ def _get_slice_params(context, data, inputs): begin_mask[i] = True end_mask[i] = True + begin = _expand_list_to_rank_1(begin) + eng = _expand_list_to_rank_1(end) begin = mb.concat(values=begin, axis=0) end = mb.concat(values=end, axis=0) return begin, end, stride, begin_mask, end_mask, squeeze_mask +def _translate_torch_tensor_assign( + x, + updates, + begin, + end, + stride, + begin_mask, + end_mask, + squeeze_mask, + name, +): + return mb.torch_tensor_assign( + x=x, + updates=updates, + begin=begin, + end=end, + stride=stride, + begin_mask=begin_mask, + end_mask=end_mask, + squeeze_mask=squeeze_mask, + name=name, + ) + + @register_torch_op def _internal_op_tensor_inplace_copy(context, node): data = context[node.inputs[0]] @@ -3415,8 +3540,8 @@ def _internal_op_tensor_inplace_copy(context, node): ) data, updates = promote_input_dtypes([data, updates]) - updated_x = mb.torch_tensor_assign( - data=data, + updated_x = _translate_torch_tensor_assign( + x=data, updates=updates, begin=begin, end=end, @@ -3456,8 +3581,9 @@ def _internal_op_tensor_inplace_fill(context, node): update_values = _np.full(fill_shape, fill_scalar.val) data, update_values = promote_input_dtypes([data, update_values]) - updated_x = mb.torch_tensor_assign( - data=data, + + updated_x = _translate_torch_tensor_assign( + x=data, updates=update_values, begin=begin, end=end, @@ -3916,8 +4042,8 @@ def avg_pool1d(context, node): @register_torch_op def avg_pool2d(context, node): - inputs = _get_inputs(context, node, expected=7) - divisor_override = inputs[6] + inputs = _get_inputs(context, node, min_expected=6) + divisor_override = None if len(inputs) < 7 else inputs[6] if divisor_override is not None: raise ValueError("divisor_override is not supported for avg_pool2d") _avg_pool(context, node, inputs) @@ -3932,14 +4058,17 @@ def avg_pool3d(context, node): _avg_pool(context, node, inputs) -@register_torch_op +@register_torch_op(torch_alias=["_log_softmax"]) def log_softmax(context, node): inputs = _get_inputs(context, node) x = inputs[0] axis = inputs[1] - out = inputs[2] # Ignored. - assert out is None + + # input 2 is either out or half_to_float, so we ignore + ignored = inputs[2] + assert ignored is None or ignored.dtype == types.bool + res = mb.softmax(x=x, axis=axis, name=node.name + "_softmax") res = mb.log(x=res, name=node.name) context.add(res) @@ -4126,7 +4255,7 @@ def unbind(context, node): context.add(res, torch_name=node.name) -@register_torch_op +@register_torch_op(torch_alias = ["_to_copy"]) def to(context, node): inputs = _get_inputs(context, node) @@ -4138,13 +4267,14 @@ def to(context, node): # - When len(inputs) == 3, the parameter is (input, non_blocking, copy) # We only use `input` and `dtype`, and `non_blocking` and `copy` are unused. _input = inputs[0] + target_dtype: Optional[Var] inputs_len = len(inputs) if inputs_len in (4, 5, 7, 8): target_dtype = inputs[1] elif inputs_len == 6: target_dtype = inputs[2] - elif inputs_len == 3: + elif inputs_len <= 3: target_dtype = None else: raise ValueError( @@ -4242,7 +4372,7 @@ def _broadcast(name, tensor, shape): return res -@register_torch_op +@register_torch_op(torch_alias=["expand_copy"]) def expand(context, node): def _broadcast_dynamic(name, tensor, shape): # Add any extra dimensions @@ -4406,7 +4536,6 @@ def meshgrid(context, node): "detach", "device", "dropout", - "dropout_", "feature_dropout", "lift_fresh", ] @@ -4435,9 +4564,12 @@ def argmax(context, node): def zeros_like(context, node): inputs = _get_inputs(context, node, expected=6) x = inputs[0] - dtype = inputs[1].val shape = mb.shape(x=x) - np_type = NUM_TO_NUMPY_DTYPE[dtype] + if inputs[1] and inputs[1].val: + dtype = inputs[1].val + np_type = NUM_TO_NUMPY_DTYPE[dtype] + else: + np_type = nptype_from_builtin(x.dtype) if shape.can_be_folded_to_const(): shape = shape.val @@ -4836,10 +4968,10 @@ def ceil(context, node): @register_torch_op def clamp(context, node): - inputs = _get_inputs(context, node, expected=3) + inputs = _get_inputs(context, node, expected=[1,2,3]) x = inputs[0] - min_val = inputs[1] if inputs[1] else _np.finfo(_np.float32).min - max_val = inputs[2] if inputs[2] else _np.finfo(_np.float32).max + min_val = inputs[1] if (len(inputs) > 1 and inputs[1]) else mb.const(val=_np.finfo(_np.float32).min) + max_val = inputs[2] if (len(inputs) > 2 and inputs[2]) else mb.const(val=_np.finfo(_np.float32).max) if isinstance(min_val, Var) and isinstance(max_val, Var) and min_val.val >= max_val.val: # When min >= max, PyTorch sets all values to max. @@ -6153,51 +6285,77 @@ def tupleindex(context, node): context.add(tuple_input[index_input.val], node.name) -def _get_attn_mask(is_causal: Var, attn_mask: Var, query_var: Var, key_var: Var) -> Var: - if is_causal.val: - # create mask of shape (target_seq, source_seq) - # s.t the diagonal and lower triangular of the matrix is all 1s - # and upper triangular is a large negative number (e.g. -30k) - target_seq = query_var.shape[-2] - source_seq = key_var.shape[-2] - if is_symbolic(target_seq) or is_symbolic(source_seq): - raise NotImplementedError( - "scaled_dot_product_attention op: " - "is_causal flag not handled when sequence length is symbolic" - ) +def _get_causal_attn_mask(is_causal: bool, query_var: Var, key_var: Var) -> Var: + assert is_causal - all_ones = mb.fill(value=1.0, shape=(target_seq, source_seq)) - all_negative_inf = mb.fill(value=-3e4, shape=(target_seq, source_seq)) - all_ones_lower = mb.band_part( - x=all_ones, lower=-1, upper=0 - ) # will 0 out upper triangle, excluding diag - all_negative_inf_upper = mb.band_part( - x=all_negative_inf, lower=0, upper=-1 - ) # will 0 out lower triangle, excluding diag - all_negative_inf_diag_only = mb.band_part(x=all_negative_inf_upper, lower=0, upper=0) - all_negative_inf_upper_no_diag = mb.sub( - x=all_negative_inf_upper, y=all_negative_inf_diag_only - ) - return mb.add(x=all_ones_lower, y=all_negative_inf_upper_no_diag) - elif is_bool(attn_mask.dtype): - """ - compute float mask as: - mask = cast(bool_mask) + (1-cast(bool_mask)) * -30k*ones(shape(bool_mask)) - """ - shape = mb.shape(x=attn_mask) - negative_inf = mb.fill( - shape=shape, value=_np.array([-3e4]).astype(types.nptype_from_builtin(query_var.dtype)) + # create mask of shape (target_seq, source_seq) + # s.t the diagonal and lower triangular of the matrix is all 1s + # and upper triangular is a large negative number (e.g. -30k) + target_seq = query_var.shape[-2] + source_seq = key_var.shape[-2] + if is_symbolic(target_seq) or is_symbolic(source_seq): + raise NotImplementedError( + "scaled_dot_product_attention op: " + "is_causal flag not handled when sequence length is symbolic" ) - mask = mb.cast(x=attn_mask, dtype=types.builtin_to_string(query_var.dtype)) - compliment_of_mask = mb.sub( - x=_np.array([1.0]).astype(types.nptype_from_builtin(mask.dtype)), y=mask + + all_ones = mb.fill(value=1.0, shape=(target_seq, source_seq)) + all_negative_inf = mb.fill(value=-3e4, shape=(target_seq, source_seq)) + all_ones_lower = mb.band_part( + x=all_ones, lower=-1, upper=0 + ) # will 0 out upper triangle, excluding diag + all_negative_inf_upper = mb.band_part( + x=all_negative_inf, lower=0, upper=-1 + ) # will 0 out lower triangle, excluding diag + all_negative_inf_diag_only = mb.band_part(x=all_negative_inf_upper, lower=0, upper=0) + all_negative_inf_upper_no_diag = mb.sub(x=all_negative_inf_upper, y=all_negative_inf_diag_only) + return mb.add(x=all_ones_lower, y=all_negative_inf_upper_no_diag) + + +def _cast_bool_attn_mask(attn_mask: Var, query_var: Var) -> Var: + """ + compute float mask as: + mask = cast(bool_mask) + (1-cast(bool_mask)) * -30k*ones(shape(bool_mask)) + """ + assert is_bool(attn_mask.dtype) + + shape = mb.shape(x=attn_mask) + negative_inf = mb.fill( + shape=shape, value=_np.array([-3e4]).astype(types.nptype_from_builtin(query_var.dtype)) + ) + mask = mb.cast(x=attn_mask, dtype=types.builtin_to_string(query_var.dtype)) + compliment_of_mask = mb.sub( + x=_np.array([1.0]).astype(types.nptype_from_builtin(mask.dtype)), y=mask + ) + compliment_of_mask = mb.mul(x=negative_inf, y=compliment_of_mask) + return mb.add(x=mask, y=compliment_of_mask) + + +def _lower_scaled_dot_product_attention(q: Var, k: Var, v: Var, mask: Var, name: str) -> Var: + # scale the query input + embed_size = q.shape[-1] + if is_symbolic(embed_size): + raise ValueError( + "The embedding size, i.e. last dimension of the shape of query tensor" + " cannot be symbolic, in scaled_dot_product_attention op" ) - compliment_of_mask = mb.mul(x=negative_inf, y=compliment_of_mask) - return mb.add(x=mask, y=compliment_of_mask) - else: - return attn_mask + multiplicative_scale_factor = 1 / _math.sqrt(embed_size) + q = mb.mul(x=q, y=multiplicative_scale_factor) + # multiply query and key input tensors + # shape of output: (target_seq, source_seq) or (B,...,target_seq, source_seq) + attn_weights = mb.matmul(x=q, y=k, transpose_y=True) + + # add mask if applicable + if mask is not None: + attn_weights = mb.add(x=attn_weights, y=mask) + + # do softmax + attn_weights_normalized = mb.softmax(x=attn_weights, axis=-1) + # multiply attn_weights and value tensor + res = mb.matmul(x=attn_weights_normalized, y=v, name=name) + return res @register_torch_op def scaled_dot_product_attention(context, node): @@ -6214,14 +6372,22 @@ def scaled_dot_product_attention(context, node): output = softmax(scale*Q*K^transpose + mask) * V + Currently, Core ML does not support dropout, so it has to be either None or 0 + See details at: https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html """ - q, k, v, attn_mask, dropout, is_causal = _get_inputs(context, node, expected=6) - if attn_mask is not None and is_causal.val: + inputs = _get_inputs(context, node, min_expected=3) + q, k, v = inputs[: 3] + attn_mask = None if len(inputs) < 4 else inputs[3] + dropout = 0.0 if len(inputs) < 5 else inputs[4] + is_causal = False if len(inputs) < 6 else inputs[5].val + if attn_mask is not None and is_causal: raise ValueError( "scaled_dot_product_attention op: attn_mask cannot be provided when is_causal is set to True." ) + if dropout is not None and (dropout.val is None or dropout.val != 0.0): + raise ValueError("scaled_dot_product_attention op: dropout is not supported yet") # check that ranks of q, k, v and attn_mask match if k.rank != q.rank: @@ -6233,34 +6399,16 @@ def scaled_dot_product_attention(context, node): "Rank of query and value do not match in scaled_dot_product_attention torch op" ) - is_mask_present = False - if is_causal.val or attn_mask is not None: - is_mask_present = True - mask = _get_attn_mask(is_causal, attn_mask, q, k) - - # scale the query input - embed_size = q.shape[-1] - if is_symbolic(embed_size): - raise ValueError( - "The embedding size, i.e. last dimension of the shape of query tensor" - " cannot be symbolic, in scaled_dot_product_attention op" - ) - multiplicative_scale_factor = 1 / _math.sqrt(embed_size) - q = mb.mul(x=q, y=multiplicative_scale_factor) - - # multiply query and key input tensors - # shape of output: (target_seq, source_seq) or (B,...,target_seq, source_seq) - attn_weights = mb.matmul(x=q, y=k, transpose_y=True) - - # add mask if applicable - if is_mask_present: - attn_weights = mb.add(x=attn_weights, y=mask) - - # do softmax - attn_weights_normalized = mb.softmax(x=attn_weights, axis=-1) + mask = None + if is_causal: + mask = _get_causal_attn_mask(is_causal, q, k) + elif attn_mask is not None: + if is_bool(attn_mask.dtype): + mask = _cast_bool_attn_mask(attn_mask, q) + else: + mask = attn_mask - # multiply attn_weights and value tensor - res = mb.matmul(x=attn_weights_normalized, y=v, name=node.name) + res = _lower_scaled_dot_product_attention(q, k, v, mask, node.name) context.add(res) @@ -6276,3 +6424,16 @@ def fliplr(context, node): x = _get_inputs(context, node, expected=1)[0] res = mb.reverse(x=x, axes=[1], name=node.name) context.add(res) + + +@register_torch_op +def multinomial(context, node): + x = context[node.inputs[0]] + num_samples = context[node.inputs[1]].val + replacement = context[node.inputs[2]].val + if num_samples is None: + raise ValueError("In torch.multinomial op, num_samples must be const") + if num_samples > 1 and not replacement: + raise ValueError("When num_samples is larger than 1, only replacement=True is supported.") + x = mb.random_categorical(x=x, size=num_samples, name=node.name) + context.add(x) diff --git a/coremltools/converters/mil/frontend/torch/quantization_ops.py b/coremltools/converters/mil/frontend/torch/quantization_ops.py index dd320c75e..2b1389ce4 100644 --- a/coremltools/converters/mil/frontend/torch/quantization_ops.py +++ b/coremltools/converters/mil/frontend/torch/quantization_ops.py @@ -95,7 +95,23 @@ def dequantize(context, node): context.quant_context.get_dequantized_var(node.inputs[0], node.name) -def _dequantized_weight(qweight, name:str = None): +def _construct_constexpr_affine_op(quantized_weights, zero_point, scale, axis=None, name=None): + """Constructs the constexpr op to represent the dequantized weight from PyTorch's data.""" + if axis is None: + # It's per-tensor quantization, just use a dummy value for axis. + axis = _np.int32(0) + kwargs = { + "quantized_data": quantized_weights, + "zero_point": zero_point, + "scale": scale, + "axis": axis, + } + if name is not None: + kwargs["name"] = name + return mb.constexpr_affine_dequantize(**kwargs) + + +def _dequantized_weight(qweight, name: str = None): """ Given the first output (qweight) of torch.ops.quantized.conv2d/linear_unpack, this returns a dequantized version of the tensor to be added to the context. @@ -105,24 +121,15 @@ def _dequantized_weight(qweight, name:str = None): scale = _np.float32(qweight.q_scale()) zero_point = quant_dtype_np(qweight.q_zero_point()) quantized_weights = _torch.int_repr(qweight).numpy() - # Axis doesn't matter for per-tensor quantization. - axis = _np.int32(0) - kwargs = { - "quantized_data": quantized_weights, - "zero_point": zero_point, - "scale": scale, - "axis": axis, - } - if name is not None: - kwargs["name"] = name - dequant_weights = mb.constexpr_affine_dequantize(**kwargs) + dequant_weights = _construct_constexpr_affine_op( + quantized_weights, zero_point, scale, axis=None, name=name + ) # per_channel_affine_float_qparams is same as per_channel_affine except that it # expects both scale and zero point to be floating point values. elif qweight.qscheme() in {_torch.per_channel_affine, _torch.per_channel_affine_float_qparams}: quant_dtype_np = TORCH_QTYPE_TO_NP_TYPE[qweight.dtype] # TODO: How do we set the appropriate dtype here (fp16/fp32)? scale = qweight.q_per_channel_scales().numpy() - zero_point = quant_dtype_np(qweight.q_per_channel_zero_points().numpy()) if qweight.qscheme() == _torch.per_channel_affine: zero_point = quant_dtype_np(qweight.q_per_channel_zero_points().numpy()) else: @@ -139,17 +146,10 @@ def _dequantized_weight(qweight, name:str = None): ) zero_point = quant_dtype_np(val) quantized_weights = _torch.int_repr(qweight).numpy() - # Axis doesn't matter for per-tensor quantization. - axis = _np.int32(0) - kwargs = { - "quantized_data": quantized_weights, - "zero_point": zero_point, - "scale": scale, - "axis": axis, - } - if name is not None: - kwargs["name"] = name - dequant_weights = mb.constexpr_affine_dequantize(**kwargs) + axis = _np.int32(qweight.q_per_channel_axis()) + dequant_weights = _construct_constexpr_affine_op( + quantized_weights, zero_point, scale, axis=axis, name=name + ) else: raise ValueError(f'Unsupported quant scheme "{qweight.qscheme()}"') return dequant_weights diff --git a/coremltools/converters/mil/frontend/torch/ssa_passes/torch_tensor_assign_to_core.py b/coremltools/converters/mil/frontend/torch/ssa_passes/torch_tensor_assign_to_core.py index a24a31c36..297a44135 100644 --- a/coremltools/converters/mil/frontend/torch/ssa_passes/torch_tensor_assign_to_core.py +++ b/coremltools/converters/mil/frontend/torch/ssa_passes/torch_tensor_assign_to_core.py @@ -31,7 +31,7 @@ def _torch_tensor_assign_to_core_block(block): def _transform_tensor_assign(op, block): - shape = mb.shape(x=op.data, before_op=op) + shape = mb.shape(x=op.x, before_op=op) dim_prod = mb.reduce_prod(x=shape, before_op=op) ref_indices = mb.range_1d(end=dim_prod, start=0, step=1, before_op=op) ref_indices = mb.reshape(x=ref_indices, shape=shape, before_op=op) @@ -47,18 +47,18 @@ def _transform_tensor_assign(op, block): ) flatten_indices = mb.reshape(x=ref_sliced_indices, shape=[-1], before_op=op) flatten_updates = mb.reshape(x=op.updates, shape=[-1], before_op=op) - flatten_data = mb.reshape(x=op.data, shape=[-1], before_op=op) + flatten_data = mb.reshape(x=op.x, shape=[-1], before_op=op) new_data = mb.scatter( - data=flatten_data, - indices=flatten_indices, - updates=flatten_updates, - mode="update", - before_op=op - ) + data=flatten_data, + indices=flatten_indices, + updates=flatten_updates, + mode="update", + before_op=op, + ) new_data = mb.reshape(x=new_data, shape=shape, before_op=op) op.enclosing_block.replace_uses_of_var_after_op( anchor_op=op, old_var=op.outputs[0], new_var=new_data ) # Remove all the ops at once - block.remove_ops([op]) \ No newline at end of file + block.remove_ops([op]) diff --git a/coremltools/converters/mil/frontend/torch/test/test_api.py b/coremltools/converters/mil/frontend/torch/test/test_api.py deleted file mode 100644 index 4a3e0cf51..000000000 --- a/coremltools/converters/mil/frontend/torch/test/test_api.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) 2021, Apple Inc. All rights reserved. -# -# Use of this source code is governed by a BSD-3-clause license that can be -# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause - -import os - -import pytest -import torch -import torchvision - -import coremltools as ct -from coremltools._deps import _HAS_TORCH, MSG_TORCH_NOT_FOUND -from coremltools.converters.mil.testing_reqs import backends - -if _HAS_TORCH: - import torch - import torchvision - - -@pytest.mark.skipif(not _HAS_TORCH, reason=MSG_TORCH_NOT_FOUND) -class TestPyTorchConverter: - @staticmethod - @pytest.mark.parametrize( - "backend", - backends, - ) - def test_no_inputs(backend): - model = torchvision.models.mobilenet_v2() - model.eval() - - example_input = torch.rand(1, 3, 256, 256) - - traced_model = torch.jit.trace(model, example_input) - - with pytest.raises(ValueError) as e: - ct.convert(traced_model, convert_to=backend[0]) - e.match(r'Expected argument for pytorch "inputs" not provided') - - - @staticmethod - @pytest.mark.parametrize( - "backend", - backends, - ) - def test_pth_extension(tmpdir, backend): - # test for issue: https://github.com/apple/coremltools/issues/917 - class TestModule(torch.nn.Module): - def __init__(self): - super(TestModule, self).__init__() - self.linear = torch.nn.Linear(10, 20) - - def forward(self, x): - return self.linear(x) - - model = TestModule() - model.eval() - example_input = torch.rand(1, 10) - traced_model = torch.jit.trace(model, example_input) - model_path = os.path.join(str(tmpdir), "torch_model.pth") - traced_model.save(model_path) - - ct.convert( - model_path, - source='pytorch', - inputs=[ - ct.TensorType( - shape=example_input.shape, - ) - ], - convert_to=backend[0], - ) diff --git a/coremltools/converters/mil/frontend/torch/test/test_custom_ops.py b/coremltools/converters/mil/frontend/torch/test/test_custom_ops.py index d8e266c98..799d753ff 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_custom_ops.py +++ b/coremltools/converters/mil/frontend/torch/test/test_custom_ops.py @@ -28,7 +28,7 @@ # Log Converter supported Cosine Similarity conversion function -default_cosine_similarity = _TORCH_OPS_REG.get("cosine_similarity", None) +default_cosine_similarity = _TORCH_OPS_REG.get_func("cosine_similarity") @register_torch_op(override=True) @@ -37,11 +37,11 @@ def cosine_similarity(context, node): # Log custom Cosine Similarity conversion function -custom_cosine_similarity = _TORCH_OPS_REG["cosine_similarity"] +custom_cosine_similarity = _TORCH_OPS_REG.get_func("cosine_similarity") def _set_torch_reg_op(op_type, op_func): - _TORCH_OPS_REG[op_type] = op_func + _TORCH_OPS_REG.set_func_by_name(op_func, op_type) class TestCompositeOp(TorchBaseTest): @@ -69,7 +69,7 @@ class custom_torch_sparse_matmul(Operation): x_is_sparse=TensorInputType(const=True, optional=True, type_domain=types.bool), y_is_sparse=TensorInputType(const=True, optional=True, type_domain=types.bool), ) - + type_domains = { "T": (types.fp16, types.fp32), } diff --git a/coremltools/converters/mil/frontend/torch/test/test_executorch_e2e.py b/coremltools/converters/mil/frontend/torch/test/test_executorch_e2e.py new file mode 100644 index 000000000..14627cf1a --- /dev/null +++ b/coremltools/converters/mil/frontend/torch/test/test_executorch_e2e.py @@ -0,0 +1,158 @@ +# Copyright (c) 2023, Apple Inc. All rights reserved. +# +# Use of this source code is governed by a BSD-3-clause license that can be +# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + +import pytest + +from coremltools._deps import _HAS_EXECUTORCH, _HAS_TORCH_VISION + +if not (_HAS_EXECUTORCH and _HAS_TORCH_VISION): + pytest.skip(allow_module_level=True, reason="executorch and torchvision are required") + +import torch +import torchvision +import torchaudio + +import timm + +from .testing_utils import TorchBaseTest, TorchFrontend + + +class TestExecutorch(TorchBaseTest): + def test_mul(self): + class MulModule(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + + def forward(self, input, other): + return input * other + + model = MulModule() + model.eval() + + self.run_compare_torch( + [(3, 2), (3, 2)], + model, + frontend=TorchFrontend.EDGEIR, + ) + + def test_linear(self): + class LinearModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.linear = torch.nn.Linear(3, 3) + + def forward(self, arg): + return self.linear(arg) + + model = LinearModule() + model.eval() + + self.run_compare_torch( + [(3, 3)], model, frontend=TorchFrontend.EDGEIR, backend=("mlprogram", "fp16") + ) + + def test_add(self): + class AddModule(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + z = x + y + z = z + x + z = z + x + z = z + z + return z + + model = AddModule() + model.eval() + + self.run_compare_torch( + [(1,), (1,)], model, frontend=TorchFrontend.EDGEIR, backend=("mlprogram", "fp16") + ) + + def test_add_mul(self): + class AddMulModule(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, a, x, b): + y = torch.mm(a, x) + z = torch.add(y, b) + return z + + model = AddMulModule() + model.eval() + + self.run_compare_torch( + [(2, 2), (2, 2), (2, 2)], + model, + frontend=TorchFrontend.EDGEIR, + backend=("mlprogram", "fp16"), + ) + + def test_mobilenet_v2(self): + model = torchvision.models.mobilenet_v2(weights=torchvision.models.mobilenetv2.MobileNet_V2_Weights.DEFAULT) + model.eval() + + self.run_compare_torch( + [(1, 3, 224, 224)], model, frontend=TorchFrontend.EDGEIR, backend=("mlprogram", "fp16") + ) + + def test_mobilenet_v3(self): + model = torchvision.models.mobilenet_v3_small(pretrained=True) + model.eval() + + self.run_compare_torch( + [(1, 3, 224, 224)], model, frontend=TorchFrontend.EDGEIR, backend=("mlprogram", "fp16") + ) + + def test_vit(self): + model = torchvision.models.vit_b_16(weights="IMAGENET1K_V1") + model.eval() + + self.run_compare_torch( + [(1, 3, 224, 224)], model, frontend=TorchFrontend.EDGEIR, backend=("mlprogram", "fp16") + ) + + def test_wav2letter(self): + model = torchaudio.models.Wav2Letter(num_classes=4096) + model.eval() + + self.run_compare_torch( + [(10, 1, 700)], model, frontend=TorchFrontend.EDGEIR, backend=("mlprogram", "fp16") + ) + + @pytest.mark.xfail(reason="Nodes of type get_attr not yet implemented") + def test_inception_v3(self): + model = torchvision.models.inception_v3(weights="IMAGENET1K_V1") + model.eval() + + self.run_compare_torch( + [(1, 3, 224, 224)], model, frontend=TorchFrontend.EDGEIR, backend=("mlprogram", "fp16") + ) + + def test_inception_v4(self): + model = timm.models.inception_v4(pretrained=True) + model.eval() + + self.run_compare_torch( + [(1, 3, 299, 299)], model, frontend=TorchFrontend.EDGEIR, backend=("mlprogram", "fp16") + ) + + def test_resnet18(self): + model = torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.IMAGENET1K_V1) + model.eval() + + self.run_compare_torch( + [(1, 3, 224, 224)], model, frontend=TorchFrontend.EDGEIR, backend=("mlprogram", "fp16") + ) + + def test_resnet50(self): + model = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.IMAGENET1K_V1) + model.eval() + + self.run_compare_torch( + [(1, 3, 224, 224)], model, frontend=TorchFrontend.EDGEIR, backend=("mlprogram", "fp16") + ) diff --git a/coremltools/converters/mil/frontend/torch/test/test_internal_graph.py b/coremltools/converters/mil/frontend/torch/test/test_internal_graph.py index d2c3fdb8b..82d226adf 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_internal_graph.py +++ b/coremltools/converters/mil/frontend/torch/test/test_internal_graph.py @@ -523,7 +523,7 @@ def test_permute(self, context, input_shape): kind="Permute", inputs=input_list, outputs=[output_name], ) ssa = self._construct_test_graph( - context, ops.permute, permute_node, output_name, constants=constants + context, ops.permute_copy, permute_node, output_name, constants=constants ) expected_result = test_data.permute(*permutation) assert expected_result.shape == ssa.shape @@ -1483,7 +1483,7 @@ def test_erf(self, context): context, ops.erf, node, output_name, constants=constants ) expected_result = test_input.erf() - assert np.allclose(expected_result, ssa.val) + assert np.allclose(expected_result, ssa.val, atol=1e-05) def test_implicittensortonum(self, context): input_shape = (1,) diff --git a/coremltools/converters/mil/frontend/torch/test/test_torch_conversion_api.py b/coremltools/converters/mil/frontend/torch/test/test_torch_conversion_api.py index 487db8256..be62a74bb 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_torch_conversion_api.py +++ b/coremltools/converters/mil/frontend/torch/test/test_torch_conversion_api.py @@ -11,8 +11,17 @@ from PIL import Image import coremltools as ct -from coremltools._deps import _HAS_TORCH, MSG_TORCH_NOT_FOUND +from coremltools._deps import ( + _HAS_EXECUTORCH, + _HAS_TORCH, + MSG_EXECUTORCH_NOT_FOUND, + MSG_TORCH_NOT_FOUND, +) from coremltools.converters.mil.frontend.torch.test.testing_utils import _copy_input_data +from coremltools.converters.mil.frontend.torch.torch_op_registry import ( + _TORCH_OPS_REGISTRY, + register_torch_op, +) from coremltools.converters.mil.testing_reqs import backends from coremltools.converters.mil.testing_utils import ( assert_cast_ops_count, @@ -26,6 +35,7 @@ get_op_types_in_program, verify_prediction, ) +from coremltools.models import _METADATA_SOURCE_DIALECT from coremltools.proto import FeatureTypes_pb2 as ft from coremltools.test.api.test_api_examples import TestInputs as _TestInputs @@ -35,9 +45,228 @@ torch.manual_seed(1818) +if _HAS_EXECUTORCH: + from executorch import exir + + _CAPTURE_CONFIG = exir.CaptureConfig(enable_aot=True) + _EDGE_COMPILE_CONFIG = exir.EdgeCompileConfig( + _check_ir_validity=False, + ) + + +@pytest.fixture +def torch_model(): + class TestModule(torch.nn.Module): + def __init__(self): + super(TestModule, self).__init__() + self.linear = torch.nn.Linear(10, 20) + + def forward(self, x): + return self.linear(x) + + model = TestModule() + model.eval() + return model + + +@pytest.mark.skipif(not _HAS_TORCH, reason=MSG_TORCH_NOT_FOUND) +class TestTorchScriptValidation: + @staticmethod + @pytest.mark.parametrize( + "backend", + backends, + ) + def test_no_inputs(torch_model, backend): + + traced_torch_model = torch.jit.trace(torch_model, torch.rand(1, 10)) + with pytest.raises( + ValueError, match=r'Expected argument "inputs" for TorchScript models not provided' + ): + ct.convert(traced_torch_model, convert_to=backend[0]) + + @staticmethod + @pytest.mark.parametrize( + "backend", + backends, + ) + def test_pth_extension(torch_model, tmpdir, backend): + # test for issue: https://github.com/apple/coremltools/issues/917 + + shape = (1, 10) + traced_torch_model = torch.jit.trace(torch_model, torch.rand(*shape)) + + model_path = os.path.join(str(tmpdir), "torch_model.pth") + traced_torch_model.save(model_path) + + ct.convert( + model_path, + source="pytorch", + inputs=[ + ct.TensorType( + shape=shape, + ) + ], + convert_to=backend[0], + ) + + @staticmethod + @pytest.mark.parametrize( + "backend", + backends, + ) + def test_source_dialect_metadata(torch_model, backend): + shape = (1, 10) + traced_torch_model = torch.jit.trace(torch_model, torch.rand(*shape)) + + mlmodel = ct.convert( + traced_torch_model, + source="pytorch", + inputs=[ + ct.TensorType( + shape=shape, + ) + ], + convert_to=backend[0], + ) + + assert _METADATA_SOURCE_DIALECT in mlmodel.user_defined_metadata + + assert mlmodel.user_defined_metadata[_METADATA_SOURCE_DIALECT] == "TorchScript" + + + +@pytest.mark.skipif(not _HAS_EXECUTORCH, reason=MSG_EXECUTORCH_NOT_FOUND) +class TestEdgeIRValidation: + @staticmethod + @pytest.mark.parametrize( + "backend", + backends, + ) + def test_inputs( + torch_model, backend + ): # TODO: rdar://115845792 ([Executorch] Handle user provided inputs/outputs in the convert API) + + shape = (1, 10) + exir_program = ( + exir.capture(torch_model, (torch.rand(*shape),), _CAPTURE_CONFIG) + .to_edge(_EDGE_COMPILE_CONFIG) + .exported_program + ) + + with pytest.raises( + AssertionError, match=r"'inputs' argument should be None for ExportedProgram" + ): + ct.convert( + exir_program, + convert_to=backend[0], + inputs=[ct.TensorType(shape=shape)], + ) + + @staticmethod + @pytest.mark.parametrize( + "backend", + backends, + ) + def test_outputs( + torch_model, backend + ): # TODO: rdar://115845792 ([Executorch] Handle user provided inputs/outputs in the convert API) + + shape = (1, 10) + exir_program = ( + exir.capture(torch_model, (torch.rand(*shape),), _CAPTURE_CONFIG) + .to_edge(_EDGE_COMPILE_CONFIG) + .exported_program + ) + + with pytest.raises( + AssertionError, match=r"'outputs' argument should be None for ExportedProgram" + ): + ct.convert(exir_program, convert_to=backend[0], outputs=[ct.TensorType(name="result")]) + + @staticmethod + @pytest.mark.parametrize( + "backend", + backends, + ) + def test_source_dialect_metadata(torch_model, backend): + shape = (1, 10) + exir_program = ( + exir.capture(torch_model, (torch.rand(*shape),), _CAPTURE_CONFIG) + .to_edge(_EDGE_COMPILE_CONFIG) + .exported_program + ) + + mlmodel = ct.convert( + exir_program, + source="pytorch", + convert_to=backend[0], + ) + + assert _METADATA_SOURCE_DIALECT in mlmodel.user_defined_metadata + + assert mlmodel.user_defined_metadata[_METADATA_SOURCE_DIALECT] == "TorchExport::EDGE" + +@pytest.mark.skipif(not _HAS_TORCH, reason=MSG_TORCH_NOT_FOUND) +class TestTorchOpsRegistry: + @staticmethod + def test_api_example(): + # Example code in https://apple.github.io/coremltools/docs-guides/source/composite-operators.html#using-composite-ops-with-pytorch-conversion + # Whenever this test fails, we should update API documentations + # This test needs to be modified after rdar://117502178 ([Infra][Pytorch] We should deprecate the direct use of _TORCH_OPS_REGISTRY in 7.2) + from coremltools.converters.mil import Builder as mb + from coremltools.converters.mil.frontend.torch.ops import _get_inputs + from coremltools.converters.mil.frontend.torch.torch_op_registry import ( + _TORCH_OPS_REGISTRY, + register_torch_op, + ) + + default_func = _TORCH_OPS_REGISTRY.get_func("selu") + + # Test ``__contains__`` and ``__delitem__`` + assert "selu" in _TORCH_OPS_REGISTRY + if "selu" in _TORCH_OPS_REGISTRY: + del _TORCH_OPS_REGISTRY["selu"] + assert not "selu" in _TORCH_OPS_REGISTRY + + # Test ``@register_torch_op`` decorator + @register_torch_op + def selu(context, node): + x = _get_inputs(context, node, expected=1)[0] + x = mb.elu(x=x, alpha=1.6732632423543772) + x = mb.mul(x=x, y=1.0507009873554805, name=node.name) + context.add(x) + + # Test ``__getitem__`` + assert _TORCH_OPS_REGISTRY["selu"] is not None + + # Test ``__setitem__`` + _TORCH_OPS_REGISTRY["selu"] = default_func + + @staticmethod + def test_register_torch_op(): + # Test ``register_torch_op`` works + def test_func_dummy(context, inputs): + return + register_torch_op(test_func_dummy) + assert _TORCH_OPS_REGISTRY.name_to_func_mapping["test_func_dummy"] is test_func_dummy + + # Test error out for duplicate registration + with pytest.raises(ValueError, match="Torch op test_func_dummy already registered."): + register_torch_op(test_func_dummy) + + # Test we can override the function + def test_func_dummy(context, inputs): + dummy = 1 + return + register_torch_op(test_func_dummy, override=True) + assert _TORCH_OPS_REGISTRY.name_to_func_mapping["test_func_dummy"] is test_func_dummy + + # Cleanup the test + del _TORCH_OPS_REGISTRY.name_to_func_mapping["test_func_dummy"] + ################################################################################# -# Note: all tests are also used as examples in https://coremltools.readme.io/docs -# as a reference. +# Note: Starting from here, all of the following tests are also used as examples +# in https://coremltools.readme.io/docs as a reference. # Whenever any of the following test fails, we should update API documentations ################################################################################# @@ -1137,11 +1366,15 @@ def test_input_name_specified_by_user(self, float32_input_model_relu_ops, def test_two_input_model(self, float32_two_input_model): # test that error is raised if only 1 input is provided - with pytest.raises(ValueError): - ct.convert(float32_two_input_model, - inputs=[ct.TensorType(shape=(10, 20), dtype=np.int32)], - minimum_deployment_target=ct.target.macOS12) - + with pytest.raises( + ValueError, + match="Number of TorchScript inputs \(2\) must match the user provided inputs \(1\).", + ): + ct.convert( + float32_two_input_model, + inputs=[ct.TensorType(shape=(10, 20), dtype=np.int32)], + minimum_deployment_target=ct.target.macOS12, + ) # test forcing 1st input to type int32 mlmodel = ct.convert(float32_two_input_model, diff --git a/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py b/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py index 1deb09a1b..b8b02966f 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py +++ b/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py @@ -11,17 +11,19 @@ import numpy as np import pytest import torch.nn as nn -import torchaudio -import torchvision import coremltools as ct from coremltools import RangeDim, Shape, TensorType -from coremltools._deps import version_lt +from coremltools._deps import ( + _HAS_EXECUTORCH, + _HAS_TORCH_AUDIO, + _HAS_TORCH_VISION, + version_lt, +) from coremltools.converters.mil import testing_reqs from coremltools.converters.mil.frontend.torch.ops import ( NUM_TO_TORCH_DTYPE, NUMPY_DTYPE_TO_TORCH_NUM, - TORCH_DTYPE_TO_NUM, ) from coremltools.converters.mil.mil import Operation, Program, types from coremltools.converters.mil.mil.var import Var @@ -33,7 +35,25 @@ ) from coremltools.models.utils import _macos_version, _python_version -from .testing_utils import ModuleWrapper, TorchBaseTest, contains_op, generate_input_data +from .testing_utils import ( + ModuleWrapper, + TorchBaseTest, + TorchFrontend, + contains_op, + generate_input_data, +) + +if _HAS_TORCH_AUDIO: + import torchaudio + +if _HAS_TORCH_VISION: + import torchvision + + +frontends = [TorchFrontend.TORCHSCRIPT] + +if _HAS_EXECUTORCH: + frontends.append(TorchFrontend.EDGEIR) backends = testing_reqs.backends compute_units = testing_reqs.compute_units @@ -182,8 +202,10 @@ def forward(self, x): use_scripting=True ) - @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) - def test_linear(self, compute_unit, backend): + @pytest.mark.parametrize( + "compute_unit, backend, frontend", itertools.product(compute_units, backends, frontends) + ) + def test_linear(self, compute_unit, backend, frontend): class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() @@ -199,6 +221,7 @@ def forward(self, x): model, input_as_shape=False, backend=backend, + frontend=frontend, compute_unit=compute_unit, use_scripting=True, ) @@ -4092,6 +4115,11 @@ class TestTypeAs(TorchBaseTest): itertools.product(compute_units, backends, ["int32", "float32", "bool"]), ) def test_type_as(self, compute_unit, backend, type): + if backend == ("mlprogram", "fp16") and type == "bool": + pytest.xfail( + "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" + ) + class TestNet(nn.Module): def forward(self, x, y): return x.type_as(y) @@ -4417,7 +4445,8 @@ def forward(self, x, y): converter_input_type=[ TensorType( shape=[ct.RangeDim(upper_bound=20 if backend[0] == "mlprogram" else -1), 1] - ) + ), + TensorType(shape=(2,)), ], backend=backend, compute_unit=compute_unit, @@ -4707,6 +4736,11 @@ def forward(self, x, y): ), ) def test_unary_einsum(self, compute_unit, backend, equation, dynamic): + if backend == ("mlprogram", "fp16") and equation == "iijk->ji" and dynamic: + pytest.xfail( + "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" + ) + class TestUnaryEinsum(nn.Module): def forward(self, x): return torch.einsum(equation, x) @@ -5571,6 +5605,39 @@ def test_bmm(self, compute_unit, backend): [shape_x, shape_y], model, backend=backend, compute_unit=compute_unit ) + @pytest.mark.parametrize( + "compute_unit, backend", + itertools.product( + compute_units, + backends, + ), + ) + def test_bmm_with_fp16_inputs(self, compute_unit, backend): + if backend == ("mlprogram", "fp16"): + pytest.xfail( + "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" + ) + + class TestModel(torch.nn.Module): + def forward(self, x, y): + x = x.to(torch.float16) + y = y + 1 + return torch.bmm(x, y) + + inputs = [ + TensorType(name="x", shape=(1, 2, 3), dtype=np.int32), + TensorType(name="y", shape=(1, 3, 2), dtype=np.float16), + ] + + self.run_compare_torch( + inputs, + TestModel(), + backend=backend, + compute_unit=compute_unit, + minimum_deployment_target=ct.target.iOS16, + torch_device=torch.device("mps"), + ) + class TestNumel(TorchBaseTest): @pytest.mark.parametrize( @@ -5761,6 +5828,29 @@ def forward(self, input_data): inputs, TestModel(), backend=backend, compute_unit=compute_unit ) + @pytest.mark.parametrize( + "compute_unit, backend", + itertools.product( + compute_units, + backends, + ), + ) + def test_to_float16(self, compute_unit, backend): + class TestModel(torch.nn.Module): + def forward(self, input_data): + input_data = input_data.to(torch.float16) + return input_data + 8 + + inputs = [TensorType(name="input_data", shape=(1, 2, 3), dtype=np.float32)] + self.run_compare_torch( + inputs, + TestModel(), + backend=backend, + compute_unit=compute_unit, + atol=0.01, + rtol=0.001, + ) + @pytest.mark.parametrize( "compute_unit, backend, input_type", itertools.product( @@ -7243,14 +7333,10 @@ def forward(self, x): ) @pytest.mark.parametrize( - "compute_unit, backend, dynamic", - itertools.product( - compute_units, - backends, - [True, False], - ), + "compute_unit, backend, dynamic, mixed_rank", + itertools.product(compute_units, backends, [True, False], [True, False]), ) - def test_tensor_assign_case_8(self, compute_unit, backend, dynamic): + def test_tensor_assign_case_8(self, compute_unit, backend, dynamic, mixed_rank): # general case with dynamic begin and end class TensorAssignModel(torch.nn.Module): def forward(self, x, begin_0, begin_1, end_1): @@ -7260,6 +7346,22 @@ def forward(self, x, begin_0, begin_1, end_1): shape = (2, 10, 3) model = TensorAssignModel() + + if mixed_rank: + inputs = [ + torch.rand(*shape), + torch.as_tensor([[[1]]], dtype=torch.int32), + torch.as_tensor([1], dtype=torch.int32), + torch.as_tensor([[2]], dtype=torch.int32), + ] + else: + inputs = [ + torch.rand(*shape), + torch.as_tensor([1], dtype=torch.int32), + torch.as_tensor([1], dtype=torch.int32), + torch.as_tensor([2], dtype=torch.int32), + ] + if dynamic: upper_bound = 10 if backend[0] == "mlprogram" else -1 converter_input_type = [ @@ -7270,24 +7372,17 @@ def forward(self, x, begin_0, begin_1, end_1): ct.RangeDim(upper_bound=upper_bound), ) ), - ct.TensorType(shape=(1,), dtype=np.int32), - ct.TensorType(shape=(1,), dtype=np.int32), - ct.TensorType(shape=(1,), dtype=np.int32), + ct.TensorType(shape=inputs[1].shape, dtype=np.int32), + ct.TensorType(shape=inputs[2].shape, dtype=np.int32), + ct.TensorType(shape=inputs[3].shape, dtype=np.int32), ] else: converter_input_type = None - inputs = [ - torch.rand(*shape), - torch.as_tensor([1], dtype=torch.int32), - torch.as_tensor([1], dtype=torch.int32), - torch.as_tensor([2], dtype=torch.int32), - ] - torch_inputs = [torch.clone(x) for x in inputs] expected_results = model(*torch_inputs) - self.run_compare_torch( + res = self.run_compare_torch( inputs, model, expected_results=expected_results, @@ -7297,6 +7392,13 @@ def forward(self, x, begin_0, begin_1, end_1): compute_unit=compute_unit ) + if not mixed_rank: + # the fuse_squeeze_expand_dims graph pass is going to + # fuse the pattern of ``squeeze -> expand_dims`` + prog = res[1]._mil_program + assert "squeeze" not in get_op_types_in_program(prog) + assert "expand_dims" not in get_op_types_in_program(prog) + @pytest.mark.parametrize( "compute_unit, backend", itertools.product( @@ -9323,47 +9425,52 @@ def forward(self, x): compute_unit=compute_unit ) -class TestSpectrogram(TorchBaseTest): - @pytest.mark.parametrize( - "compute_unit, backend, input_shape, spec, power", - itertools.product( - compute_units, - backends, - [(1, 1000), (1000,), (3, 1000)], # input shape - [torchaudio.transforms.Spectrogram, torchaudio.transforms.MelSpectrogram], - [None, 1, 2] # magnitude or power + +if _HAS_TORCH_AUDIO: + + class TestSpectrogram(TorchBaseTest): + @pytest.mark.parametrize( + "compute_unit, backend, input_shape, spec, power", + itertools.product( + compute_units, + backends, + [(1, 1000), (1000,), (3, 1000)], # input shape + [torchaudio.transforms.Spectrogram, torchaudio.transforms.MelSpectrogram], + [None, 1, 2], # magnitude or power + ), ) - ) - def test_spectrogram(self, compute_unit, backend, input_shape, spec, power): - if platform.machine() != "arm64": - pytest.xfail("rdar://108001659 ([PyTorch] Torchaudio Spectrogram Failed on Intel Machine)") + def test_spectrogram(self, compute_unit, backend, input_shape, spec, power): + if platform.machine() != "arm64": + pytest.xfail( + "rdar://108001659 ([PyTorch] Torchaudio Spectrogram Failed on Intel Machine)" + ) - if spec is torchaudio.transforms.MelSpectrogram and power is None: - pytest.skip("power or magnitude required for melspec") + if spec is torchaudio.transforms.MelSpectrogram and power is None: + pytest.skip("power or magnitude required for melspec") - class SpectrogramModel(torch.nn.Module): - def __init__(self) -> None: - super().__init__() - # the other spectrogram options are passed through to stft - # and are tested in TestSTFT - self.spec = spec(power=power, n_fft=128) + class SpectrogramModel(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + # the other spectrogram options are passed through to stft + # and are tested in TestSTFT + self.spec = spec(power=power, n_fft=128) - def forward(self, x): - x = self.spec(x) - if power is None: - # complex: stack them - x = torch.stack([torch.real(x), torch.imag(x)], dim=0) - return x + def forward(self, x): + x = self.spec(x) + if power is None: + # complex: stack them + x = torch.stack([torch.real(x), torch.imag(x)], dim=0) + return x - np.random.seed(1024) - TorchBaseTest.run_compare_torch( - input_shape, - SpectrogramModel(), - backend=backend, - compute_unit=compute_unit, - rtol=1e-4, - atol=1e-4, - ) + np.random.seed(1024) + TorchBaseTest.run_compare_torch( + input_shape, + SpectrogramModel(), + backend=backend, + compute_unit=compute_unit, + rtol=1e-4, + atol=1e-4, + ) class TestNms(TorchBaseTest): @pytest.mark.parametrize( @@ -9859,7 +9966,9 @@ class TestScaledDotProductAttention(TorchBaseTest): [2, 3, 4, 5], ), ) - def test_different_input_ranks_no_mask(self, compute_unit, backend, rank): + def test_different_input_ranks_no_mask( + self, compute_unit, backend, rank, minimum_deployment_target=None + ): """ The query/key/value inputs can be any rank 2 or greater. """ @@ -9884,12 +9993,14 @@ def test_different_input_ranks_no_mask(self, compute_unit, backend, rank): }, ) - self.run_compare_torch( + res = self.run_compare_torch( [input_shape] * 3, model, backend=backend, compute_unit=compute_unit, + minimum_deployment_target=minimum_deployment_target, ) + return res[1] @pytest.mark.parametrize( "compute_unit, backend, seq_lengths, include_heads", @@ -9900,7 +10011,9 @@ def test_different_input_ranks_no_mask(self, compute_unit, backend, rank): [False, True], ), ) - def test_is_causal_flag(self, compute_unit, backend, seq_lengths, include_heads): + def test_is_causal_flag( + self, compute_unit, backend, seq_lengths, include_heads, minimum_deployment_target=None + ): source_seq_len, target_seq_len = seq_lengths query_shape = (2, 2, target_seq_len, 7) if include_heads else (2, target_seq_len, 7) key_shape = (2, 2, source_seq_len, 7) if include_heads else (2, source_seq_len, 7) @@ -9918,6 +10031,7 @@ def test_is_causal_flag(self, compute_unit, backend, seq_lengths, include_heads) model, backend=backend, compute_unit=compute_unit, + minimum_deployment_target=minimum_deployment_target, ) # check that "fill" and "band_part" ops, which are needed to compute mask, have been constant folded mil_prog = res[1]._get_mil_internal() @@ -9934,7 +10048,9 @@ def test_is_causal_flag(self, compute_unit, backend, seq_lengths, include_heads) [False, True], ), ) - def test_attn_mask(self, compute_unit, backend, seq_lengths, bool_mask): + def test_attn_mask( + self, compute_unit, backend, seq_lengths, bool_mask, minimum_deployment_target=None + ): if bool_mask: pytest.xfail( "rdar://110499660 ([CI][Bug] test_attn_mask is occasionally failing when bool_mask = True)" @@ -9960,6 +10076,7 @@ def test_attn_mask(self, compute_unit, backend, seq_lengths, bool_mask): model, backend=backend, compute_unit=compute_unit, + minimum_deployment_target=minimum_deployment_target, input_as_shape=False, ) @@ -9971,7 +10088,9 @@ def test_attn_mask(self, compute_unit, backend, seq_lengths, bool_mask): [True, False], ), ) - def test_toy_xformer_with_sdpa(self, compute_unit, backend, mask_as_input): + def test_toy_xformer_with_sdpa( + self, compute_unit, backend, mask_as_input, minimum_deployment_target=None + ): embedding_size = 32 seq_length = 16 n_heads = 4 @@ -10061,8 +10180,44 @@ def forward(self, x, mask=None): model, backend=backend, compute_unit=compute_unit, + minimum_deployment_target=minimum_deployment_target, + ) + + def test_dropout_early_error_out(self): + B, S, L, E, EV = 3, 5, 7, 16, 32 + + query_shape = (B, L, E) + key_shape = (B, S, E) + value_shape = (B, S, EV) + + query = generate_input_data(query_shape) + key = generate_input_data(key_shape) + value = generate_input_data(value_shape) + + model = ModuleWrapper( + function=nn.functional.scaled_dot_product_attention, + kwargs={"dropout_p": 0.0} + ) + self.run_compare_torch( + (query, key, value), + model, + input_as_shape=False, ) + with pytest.raises( + ValueError, + match=r"scaled_dot_product_attention op: dropout is not supported yet", + ): + model = ModuleWrapper( + function=nn.functional.scaled_dot_product_attention, + kwargs={"dropout_p": 0.1} + ) + self.run_compare_torch( + (query, key, value), + model, + input_as_shape=False, + ) + class TestTransformer(TorchBaseTest): @pytest.mark.parametrize( @@ -10102,3 +10257,62 @@ def forward(self, x): return torch.fliplr(x) self.run_compare_torch(input_shape, TestModel(), backend=backend, compute_unit=compute_unit) + + +class TestMultinomial(TorchBaseTest): + @pytest.mark.parametrize( + "compute_unit, backend, num_samples", + itertools.product(compute_units, backends, [1, 3]), + ) + def test_multinomial(self, compute_unit, backend, num_samples): + class TestModel(nn.Module): + def forward(self, x): + return torch.multinomial(x, num_samples, replacement=True) + + # As sampling is random, we make one element significantly larger than others to make + # outputs consistent. + input_data = torch.tensor([0, 1e5, 0, 0, 1, 1, 1], dtype=torch.float) + self.run_compare_torch( + input_data, + TestModel(), + backend=backend, + compute_unit=compute_unit, + input_as_shape=False, + ) + + @pytest.mark.parametrize( + "compute_unit, backend", + itertools.product(compute_units, backends), + ) + def test_multinomial_not_supported(self, compute_unit, backend): + class TestModel(nn.Module): + def forward(self, x): + return torch.multinomial(x, 4) + + class TestModelDynamicNumSamples(nn.Module): + def forward(self, x): + return torch.multinomial(x, x.shape[0], replacement=True) + + input_data = torch.tensor([0, 10, 0, 0, 1, 1, 1], dtype=torch.float) + with pytest.raises( + ValueError, + match="When num_samples is larger than 1, only replacement=True is supported.", + ): + self.run_compare_torch( + input_data, + TestModel(), + backend=backend, + compute_unit=compute_unit, + input_as_shape=False, + ) + + with pytest.raises(ValueError, match="In torch.multinomial op, num_samples must be const"): + converter_input_type = [TensorType(shape=(RangeDim(1, 10),), dtype=np.float32)] + self.run_compare_torch( + input_data, + TestModelDynamicNumSamples(), + backend=backend, + compute_unit=compute_unit, + input_as_shape=False, + converter_input_type=converter_input_type, + ) diff --git a/coremltools/converters/mil/frontend/torch/test/test_torch_quantization_ops.py b/coremltools/converters/mil/frontend/torch/test/test_torch_quantization_ops.py index 5a09606ff..7e96a4f38 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_torch_quantization_ops.py +++ b/coremltools/converters/mil/frontend/torch/test/test_torch_quantization_ops.py @@ -4,6 +4,7 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause import itertools +from typing import Optional import numpy as np import pytest @@ -29,24 +30,57 @@ torch.backends.quantized.engine = "qnnpack" -def _force_quantize_model(model, q_dtype): +def _force_quantize_model( + model: torch.nn.Module, + q_dtype: torch.dtype, + low: Optional[int] = None, + high: Optional[int] = None, + scale: Optional[float] = None, + zero_point: Optional[int] = None, + channel_axis: Optional[int] = None, +): """ In torch, the quantized model can only be obtained from PTQ. This utility allows us to produce an int8 quantized model. + + If channel_axis is set, it will do per-channel quantization instead of per-tensor, for the param + that channel_axis is valid for. """ - # modify the parameter to int8 + if scale is None: + scale = 1.0 + if zero_point is None: + zero_point = 0 + + # modify the parameter to force the quantization within a specific range. with torch.no_grad(): for name, param in model.named_parameters(): shape = param.shape - new_value = torch.quantize_per_tensor( - torch.rand(*shape), scale=1.0, zero_point=0, dtype=q_dtype + input_data = ( + torch.rand(*shape) if low is None else torch.randint(low, high, shape).float() ) + input_data = (input_data - zero_point) * scale + + if channel_axis is not None and -len(shape) <= channel_axis < len(shape): + scale = torch.Tensor([scale] * shape[channel_axis]) + zero_point = torch.Tensor([zero_point] * shape[channel_axis]) + new_value = torch.quantize_per_channel( + input_data, + scales=scale, + zero_points=zero_point, + axis=channel_axis, + dtype=q_dtype, + ) + else: + new_value = torch.quantize_per_tensor( + input_data, scale=scale, zero_point=zero_point, dtype=q_dtype + ) + param_cls = type(param) - kwargs = param.__dict__ new_value = param_cls(new_value, requires_grad=False).to(torch.device("cpu")) model._parameters[name] = new_value return model + class TorchQuantizationBaseTest(TorchBaseTest): @staticmethod def run_compare_torch( @@ -55,6 +89,7 @@ def run_compare_torch( atol=1e-04, rtol=1e-05, input_as_shape=True, + minimum_deployment_target=ct.target.iOS17, ): # TODO(rdar://108472419): properly design a random input if input_as_shape: @@ -69,7 +104,7 @@ def run_compare_torch( backend=("mlprogram", "fp32"), use_scripting=False, compute_unit=ct.ComputeUnit.CPU_ONLY, - minimum_deployment_target=ct.target.iOS17, + minimum_deployment_target=minimum_deployment_target, ) @@ -346,10 +381,10 @@ def forward(self, x): self.run_compare_torch(input_shape, model) @pytest.mark.parametrize( - "quant_dtype", - [torch.quint8, torch.qint8], + "quant_dtype, channel_axis", + itertools.product([torch.quint8, torch.qint8], [0, 1, None]), ) - def test_quantized_params(self, quant_dtype): + def test_quantized_params(self, quant_dtype, channel_axis): class Model(torch.nn.Module): def __init__(self): super().__init__() @@ -360,7 +395,7 @@ def forward(self, x): return torch.matmul(x, dequanitized_weight) model = Model() - model = _force_quantize_model(model, q_dtype=quant_dtype) + model = _force_quantize_model(model, q_dtype=quant_dtype, channel_axis=channel_axis) input_shape = [(3, 5)] res = self.run_compare_torch(input_shape, model) prog = res[1]._mil_program diff --git a/coremltools/converters/mil/frontend/torch/test/testing_utils.py b/coremltools/converters/mil/frontend/torch/test/testing_utils.py index 19cc9ffab..e7ca8d86b 100644 --- a/coremltools/converters/mil/frontend/torch/test/testing_utils.py +++ b/coremltools/converters/mil/frontend/torch/test/testing_utils.py @@ -3,6 +3,8 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause +from enum import Enum + import numpy as np import pytest import torch @@ -11,11 +13,27 @@ import coremltools as ct import coremltools.models.utils as coremltoolsutils from coremltools import RangeDim, TensorType -from coremltools._deps import _IS_MACOS +from coremltools._deps import _HAS_EXECUTORCH, _HAS_TORCH_EXPORT_API, _IS_MACOS from coremltools.converters.mil.mil.types.type_mapping import nptype_from_builtin from coremltools.converters.mil.testing_utils import ct_convert, validate_minimum_deployment_target -from ..converter import torch_to_mil_types +from ..torchscript_utils import torch_to_mil_types + +if _HAS_TORCH_EXPORT_API: + from torch.export import ExportedProgram + +if _HAS_EXECUTORCH: + from executorch import exir + + _CAPTURE_CONFIG = exir.CaptureConfig(enable_aot=True) + _EDGE_COMPILE_CONFIG = exir.EdgeCompileConfig( + _check_ir_validity=False, + ) + + +class TorchFrontend(Enum): + TORCHSCRIPT = 1 + EDGEIR = 2 class ModuleWrapper(nn.Module): @@ -62,7 +80,8 @@ def convert_to_coreml_inputs(input_description, inputs): """ flattened_inputs = _flatten(inputs) coreml_inputs = { - str(x): inp.numpy().astype(np.float32) for x, inp in zip(input_description, flattened_inputs) + str(x): inp.cpu().numpy().astype(np.float32) + for x, inp in zip(input_description, flattened_inputs) } for k, v in coreml_inputs.items(): @@ -94,12 +113,16 @@ def _convert_to_inputtype(inputs): else: inputs = converter_input_type + if _HAS_EXECUTORCH and isinstance(model_spec, ExportedProgram): + inputs = None + outputs = None + return ct_convert(model_spec, inputs=inputs, convert_to=backend, source="pytorch", compute_units=compute_unit, minimum_deployment_target=minimum_deployment_target) -def generate_input_data(input_size, rand_range=(0, 1)): +def generate_input_data(input_size, rand_range=(0, 1), torch_device=torch.device("cpu")): r1, r2 = rand_range def random_data(spec): @@ -115,7 +138,7 @@ def random_data(spec): data = np.random.rand(*static_shape) if static_shape != () else np.random.rand() data = (r1 - r2) * data + r2 - return torch.from_numpy(np.array(data).astype(dtype)) + return torch.from_numpy(np.array(data).astype(dtype)).to(torch_device) if isinstance(input_size, list): return [random_data(size) for size in input_size] @@ -135,7 +158,7 @@ def flatten_and_detach_torch_results(torch_results): if isinstance(torch_results, (list, tuple)): return [x.detach().numpy() for x in _flatten(torch_results) if x is not None] # Do not need to flatten - return [torch_results.detach().numpy()] + return [torch_results.detach().cpu().numpy()] def convert_and_compare( @@ -220,6 +243,8 @@ def run_compare_torch( converter_input_type=None, compute_unit=ct.ComputeUnit.CPU_ONLY, minimum_deployment_target=None, + torch_device=torch.device("cpu"), + frontend=TorchFrontend.TORCHSCRIPT, ): """ Traces a model and runs a numerical test. @@ -228,18 +253,35 @@ def run_compare_torch( expected_results : Expected result from running pytorch model. converter_input_type: If not None, then pass it to the "inputs" argument to the ct.convert() call. + frontend: Either TorchFrontend.TORCHSCRIPT or TorchFrontend.EDGEIR """ if minimum_deployment_target is not None: validate_minimum_deployment_target(minimum_deployment_target, backend) model.eval() if input_as_shape: - input_data = generate_input_data(input_data, rand_range) - - if use_scripting: - model_spec = torch.jit.script(model) + input_data = generate_input_data(input_data, rand_range, torch_device) + + if frontend == TorchFrontend.TORCHSCRIPT: + if use_scripting: + model_spec = torch.jit.script(model) + else: + model_spec = trace_model(model, _copy_input_data(input_data)) + elif frontend == TorchFrontend.EDGEIR: + input_data_clone = _copy_input_data(input_data) + if isinstance(input_data_clone, list): + input_data_clone = tuple(input_data_clone) + elif isinstance(input_data_clone, torch.Tensor): + input_data_clone = (input_data_clone,) + model_spec = ( + exir.capture(model, input_data_clone, _CAPTURE_CONFIG) + .to_edge(_EDGE_COMPILE_CONFIG) + .exported_program + ) else: - model_spec = trace_model(model, _copy_input_data(input_data)) + raise ValueError( + f"Unknown value of frontend. Needs to be either TorchFrontend.TORCHSCRIPT or TorchFrontend.EDGEIR. Provided: {frontend}" + ) model_spec, mlmodel, coreml_inputs, coreml_results = convert_and_compare( input_data, diff --git a/coremltools/converters/mil/frontend/torch/torch_op_registry.py b/coremltools/converters/mil/frontend/torch/torch_op_registry.py index 128fdd5ae..3efb284f5 100644 --- a/coremltools/converters/mil/frontend/torch/torch_op_registry.py +++ b/coremltools/converters/mil/frontend/torch/torch_op_registry.py @@ -3,7 +3,98 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause -_TORCH_OPS_REGISTRY = {} +from typing import Callable + +from coremltools.models._deprecation import deprecated as _deprecated + + +class TorchOpsRegistry: + def __init__(self): + self.name_to_func_mapping = {} + + def get_func(self, op_lookup: str) -> Callable: + """ + Given a op type key, return the according translation function. + Note that the key is sanitized by removing suffix and prefix ``_`` before query. + For instance, ``__add__`` -> ``add``, ``sub_`` -> ``sub``. + """ + if op_lookup.startswith("__") and op_lookup.endswith("__"): + # Some ops may have double underscore, such as `__and__`. + op_lookup = op_lookup[2:-2] + elif op_lookup.endswith("_"): + # This is an "in place" op. + # Look up the standard op instead by removing underscore. + op_lookup = op_lookup[:-1] + + return self.name_to_func_mapping.get(op_lookup, None) + + def register_func(self, func=None, torch_alias=None, override=False): + """ + Given an op name and its alias, put the translation function (callable) + into the registry. + """ + f_name = func.__name__ + all_f_names = [f_name] + if torch_alias is not None: + all_f_names.extend(torch_alias) + + for name in all_f_names: + if name.endswith("_"): + raise Exception( + f'Attempting to register "{name}" op. Do not register inplace ops. (inplace torch ops' + f' end in a "_"). Instead register the normal op version: "{name[:-1]}". The inplace' + f" version will be supported automatically." + ) + if not override and name in self.name_to_func_mapping: + raise ValueError(f"Torch op {name} already registered.") + self.set_func_by_name(func, name) + + def set_func_by_name(self, func, name): + self.name_to_func_mapping[name] = func + + def is_inplace_op(self, op_lookup: str): + """ + A torch op is considered inplace if the op name endswith ``_``. + """ + return not (op_lookup.startswith("__") and op_lookup.endswith("__")) and op_lookup.endswith( + "_" + ) + + # The following functions will be deprecated after 7.2 + # rdar://117502178 ([Infra][Pytorch] We should deprecate the direct use of _TORCH_OPS_REGISTRY in 7.2) + @_deprecated( + suffix="Please use coremltools.converters.mil.frontend.torch.register_torch_op", + version="7.2", + obj_prefix="_TORCH_OPS_REGISTRY.", + ) + def __contains__(self, key: str) -> bool: + return key in self.name_to_func_mapping + + @_deprecated( + suffix="Please use coremltools.converters.mil.frontend.torch.register_torch_op", + version="7.2", + obj_prefix="_TORCH_OPS_REGISTRY.", + ) + def __setitem__(self, key: str, value: Callable) -> None: + self.name_to_func_mapping[key] = value + + @_deprecated( + suffix="Please use coremltools.converters.mil.frontend.torch.register_torch_op", + version="7.2", + obj_prefix="_TORCH_OPS_REGISTRY.", + ) + def __delitem__(self, key: str) -> None: + del self.name_to_func_mapping[key] + + @_deprecated( + suffix="Please use coremltools.converters.mil.frontend.torch.register_torch_op", + version="7.2", + obj_prefix="_TORCH_OPS_REGISTRY.", + ) + def __getitem__(self, key: str) -> Callable: + return self.name_to_func_mapping[key] + +_TORCH_OPS_REGISTRY = TorchOpsRegistry() def register_torch_op(_func=None, torch_alias=None, override=False): @@ -28,28 +119,8 @@ def register_torch_op(_func=None, torch_alias=None, override=False): function. Otherwise, duplicate registration will error out. """ - def func_wrapper(func): - f_name = func.__name__ - - if f_name.endswith("_"): - raise Exception( - "Attempting to register \"{}\" op. Do not register inplace ops. (inplace torch ops" - " end in a \"_\"). Instead register the normal op version: \"{}\". The inplace" - " version will be supported automatically.".format(f_name, f_name[:-1]) - ) - if not override and f_name in _TORCH_OPS_REGISTRY: - raise ValueError("Torch op {} already registered.".format(f_name)) - - _TORCH_OPS_REGISTRY[f_name] = func - - if torch_alias is not None: - for name in torch_alias: - if not override and name in _TORCH_OPS_REGISTRY: - msg = "Torch op alias {} already registered." - raise ValueError(msg.format(name)) - _TORCH_OPS_REGISTRY[name] = func - + _TORCH_OPS_REGISTRY.register_func(func, torch_alias, override) return func if _func is None: diff --git a/coremltools/converters/mil/frontend/torch/torchir_passes.py b/coremltools/converters/mil/frontend/torch/torchir_passes.py index d066d9a9d..b234c5328 100644 --- a/coremltools/converters/mil/frontend/torch/torchir_passes.py +++ b/coremltools/converters/mil/frontend/torch/torchir_passes.py @@ -29,11 +29,11 @@ def forward(self, x): # x a tensor with shape [4,10] %3 = copy_(%2, value=[[1], [3]]) output -> %x - This graph pass fuses the sequences into a single InternalTorchIRNode of a new kind, which is defined as `_internal_op_tensor_inplace_copy`. + This graph pass fuses the sequences into a single InternalTorchIRNode of a new kind, which is defined as `_internal_op_tensor_inplace_copy_`. input -> %x %nodes_to_fuse = [slice(%x, begin=0, end=2, stride=1), select(%1, dim=1, index=4)] - %x_internal_tensor_assign_1 = _internal_op_tensor_inplace_copy(%x, value=[[1],[3]], nodes_to_fuse=nodes_to_fuse) + %x_internal_tensor_assign_1 = _internal_op_tensor_inplace_copy_(%x, value=[[1],[3]], nodes_to_fuse=nodes_to_fuse) output -> x_internal_tensor_assign_1 The _internal_tensor_value_assign op takes an additional internal data member nodes_to_fuse, @@ -58,12 +58,12 @@ def forward(self, x): # x a tensor with shape [4,10] Output graph: input -> %x %nodes_to_fuse_1 = [select(%x, dim=0, index=0), select(%1, dim=0, index=0)] - %x_internal_tensor_assign_1 = _internal_op_tensor_inplace_copy(%x, value=1, nodes_to_fuse=nodes_to_fuse_1) + %x_internal_tensor_assign_1 = _internal_op_tensor_inplace_copy_(%x, value=1, nodes_to_fuse=nodes_to_fuse_1) %nodes_to_fuse_2 = [slice(%x, dim=0, begin=1, end=2, stride=1), slice(%4, dim=1, begin=1, end=2, stride=1)] - %x_internal_tensor_assign_2 = _internal_op_tensor_inplace_copy(%x_internal_tensor_assign_1, value=[[0]], nodes_to_fuse=nodes_to_fuse_2) + %x_internal_tensor_assign_2 = _internal_op_tensor_inplace_copy_(%x_internal_tensor_assign_1, value=[[0]], nodes_to_fuse=nodes_to_fuse_2) output -> x_internal_tensor_assign_2 - torch.Tensor.fill_ works in a similar way, except the InternalTorchIRNodes is defined by `_internal_op_tensor_inplace_fill`. + torch.Tensor.fill_ works in a similar way, except the InternalTorchIRNodes is defined by `_internal_op_tensor_inplace_fill_`. A fill_ operator is generated from the following forward pass: @@ -90,10 +90,10 @@ def forward(self, x): # x a tensor with shape [4,10] input -> %x %y = [empty[](x.shape)] - %x_internal_tensor_assign_1 = _internal_op_tensor_inplace_copy(%y, %x) + %x_internal_tensor_assign_1 = _internal_op_tensor_inplace_copy_(%y, %x) output -> %x_internal_tensor_assign_1 - As a result of side effects of fusing, output of `_internal_op_tensor_inplace_copy` will be renamed to `x_internal_tensor_assign_1`. + As a result of side effects of fusing, output of `_internal_op_tensor_inplace_copy_` will be renamed to `x_internal_tensor_assign_1`. If `%1` should be renamed to `x_internal_tensor_assign_1` too, the graph will be invalid. In this purpose out_alias was introduced. """ @@ -151,9 +151,9 @@ def _construct_nodes_to_fuse_inputs(nodes_to_fuse): raise ValueError("No matching select or slice.") if node.kind == "copy_": - kind = "_internal_op_tensor_inplace_copy" + kind = "_internal_op_tensor_inplace_copy_" else: - kind = "_internal_op_tensor_inplace_fill" + kind = "_internal_op_tensor_inplace_fill_" nodes_to_fuse = tensor_to_node_sequence_mapping[node_input] if nodes_to_fuse[0].kind in ["select", "slice"]: @@ -169,7 +169,7 @@ def _construct_nodes_to_fuse_inputs(nodes_to_fuse): update_value = node.inputs[1] nodes_to_fuse_inputs = _construct_nodes_to_fuse_inputs(nodes_to_fuse) tensor_assign_node = InternalTorchIRNode( - node=None, + name=outputs[0], inputs=[source_tensor, update_value] + nodes_to_fuse_inputs, outputs=outputs, kind=kind, @@ -273,8 +273,8 @@ def transform_inplace_ops(graph, name_remap_dict=None): def flatten_graph_input_values(graph): - """ CoreML can't handle nested iterables of tensors, so we flatten the - inputs of any graph that expects them. + """CoreML can't handle nested iterables of tensors, so we flatten the + inputs of any graph that expects them. """ new_graph_inputs = graph.inputs all_new_nodes = [] @@ -306,6 +306,7 @@ def flatten_graph_input_values(graph): inputs=node_inputs, outputs=[_input_name], kind="tupleconstruct", + name=_input_name, ) ) else: diff --git a/coremltools/converters/mil/frontend/torch/torchscript_utils.py b/coremltools/converters/mil/frontend/torch/torchscript_utils.py new file mode 100644 index 000000000..5f79e9355 --- /dev/null +++ b/coremltools/converters/mil/frontend/torch/torchscript_utils.py @@ -0,0 +1,201 @@ +# Copyright (c) 2023, Apple Inc. All rights reserved. +# +# Use of this source code is governed by a BSD-3-clause license that can be +# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + +import torch + +from coremltools._deps import version_lt +from coremltools.converters.mil.mil import types + +torch_to_mil_types = { + torch.bool: types.bool, + torch.float16: types.fp16, + torch.float32: types.fp32, + torch.float64: types.fp32, + torch.int32: types.int32, + torch.int64: types.int32, +} + +mil_to_torch_types = {v: k for k, v in torch_to_mil_types.items()} + +def _jit_pass_lower_graph(graph, torchscript): + """ + This graph pass does a similar thing as torch._C._jit_pass_lower_graph does. + It does three things: + 1. Rename getattr nodes which produce a torch tensor to match the keys in torch model's state_dict + 2. Construct the params_dict, with the keys similar to state_dict + 3. Get the named_buffer dict in torch model + + To be more specific, this graph pass traces down series of GetAttr ops, and rename the final node to match the torch model state_dict. + It also replaces the node inputs by the first created tensor node with the same name. + + Example: + Input graph: + graph(%self.1 : __torch__.torch.nn.modules.Sequential, %input.1 : Tensor): + %2 : prim::GetAttr[name="linear"](%self.1) + %3 : prim::GetAttr[name="weight"](%2) + %4 : prim::GetAttr[name="bias"](%2) + %5 : prim::GetAttr[name="bias"](%2) # duplicated node + %6 : conv(%input.1, %3, %4) + %7 : add(%input.1, %5) + return (%6, %7) + + Output graph: + graph(%self.1 : __torch__.torch.nn.modules.Sequential, %input.1 : Tensor): + %2 : prim::GetAttr[name="linear"](%self.1) + %linear.weight : prim::GetAttr[name="weight"](%2) + %linear.bias : prim::GetAttr[name="bias"](%2) + %5 : prim::GetAttr[name="bias"](%2) # duplicated node, it is not used now + %6 : conv(%input.1, %linear.weight, %linear.bias) + %7 : add(%input.1, %linear.bias) # the second input is replaced + return (%6, %7) + + And a dictionary {"linear.weight": ..., "linear.bias": ...} is returned, to record the parameters values. + Note that, those GetAttr nodes are still in the torch ir graph, but they would be removed in a latter + graph pass in the coremltools torch internal graph + + """ + + """ + Each getattr node corresponds to a torch object in the torch IR, + it could be either: + 1. torch.nn.modules: submodule in a torch model. For instance, a linear layer in a MLP network. + 2. torch.Tensor: torch model parameters. For instance, weight for a conv layer. + 3. torch._C.ScriptObject: quantized torch model parameters. + For example, in the graph above, %2 is pointing to the __torch__.torch.nn.modules.Sequential.linear torch submodule. + node_to_module_map tracks these mapping. + + node_to_prefic_map track the name for each module, + for example, %2 has the prefix name linear and %3 is linear.weight. + These names are also keys in the state_dict + """ + node_to_module_map = {} + node_to_prefix_map = {} + first_node_with_prefix = {} + replace_input = {} + + base_module_node = list(graph.inputs())[0] + node_to_module_map[base_module_node] = torchscript + node_to_prefix_map[base_module_node] = "" + + """ + params_dict will be contructed in this graph pass. It contains all const tensors needed for the graph computation. + And the value is validated against the state_dict if the key is presented in both dictionaries. + In some rare cases, state_dict lacks parameters / buffers, so we still need to go through the while graph ourselves. + """ + params_dict = {} + state_dict = torchscript.state_dict(keep_vars=True) + buffer_dict = {k: v for k, v in torchscript.named_buffers()} + + def _check_is_tensor(node, module): + if not isinstance(module, torch.Tensor): + return False + if str(node.output().type()) not in ("Tensor", "Optional[Tensor]"): + raise TypeError(f'Type "{node.output().type()}" not supported') + return True + + def _check_is_quantized_tensor(node, module): + if not isinstance(module, torch._C.ScriptObject): + return False + # We only support ScriptObjects that correspond to quantized packed params. + assert "PackedParams" in node.output().type().name() + return True + + def _lower_graph_block(graph): + for node in list(graph.nodes()): + + for block in node.blocks(): + _lower_graph_block(block) + + for idx, _input in enumerate(list(node.inputs())): + if _input in replace_input: + node.replaceInput(idx, replace_input[_input]) + + kind = node.kind().split("::")[1].lower() + if kind != "getattr": + continue + + _input = node.input() + _output = node.output() + attr_name = getattr(node, node.kindOf("name"))("name") + + module = getattr(node_to_module_map[_input], attr_name) + node_to_module_map[_output] = module + + input_prefix = node_to_prefix_map[_input] + prefix = input_prefix + '.' + attr_name if input_prefix != "" else attr_name + node_to_prefix_map[_output] = prefix + + is_tensor = _check_is_tensor(node, module) + is_quantized_tensor = _check_is_quantized_tensor(node, module) + + if is_tensor or is_quantized_tensor: + if is_tensor and prefix in state_dict: + assert torch.equal( + module.cpu(), state_dict[prefix].cpu() + ), "tensor value not consistent between torch ir and state_dict" + if prefix in params_dict: + assert torch.equal(module.cpu(), params_dict[prefix].cpu()) + replace_input[_output] = first_node_with_prefix[prefix] + else: + params_dict[prefix] = module + first_node_with_prefix[prefix] = _output + _output.setDebugName(prefix) + + _lower_graph_block(graph) + return graph, params_dict, buffer_dict + +def _expand_and_optimize_ir(torchscript): + """ + Given a torch.jit.ScriptModule, convert it to a optimized + torch._C.Graph and dict of model parameter's names to tensors. + """ + graph = torchscript.forward.graph + + # From PyTorch code: Inline function and method calls. + torch._C._jit_pass_inline(graph) + # From PyTorch code: This inlines the forked section in the fork() + # callsite and replaces uses of the result of wait() calls with the + # values produced from the (now-inlined) forked section. + torch._C._jit_pass_inline_fork_wait(graph) + # Starting from the return node, marks all nodes that feed into the + # output, as well as nodes with side effects. Any nodes not marked are + # eliminated. + torch._C._jit_pass_dce(graph) + # From PyTorch code: checks well-formedness and invariants of graph. + torch._C._jit_pass_lint(graph) + # Replaces a couple specific ops patterns (add, sub, mul, div, chunk). + if version_lt(torch, "1.6.0"): + torch._C._jit_pass_canonicalize_ops(graph) + torch._C._jit_pass_lint(graph) + + # From PyTorch code: This pass catches all of the small, easy to catch + # peephole optimizations you might be interested in doing. + # Eliminate no-op 'expand' nodes + # Simplify x.t().t() to x + # pass disabled for v1.6.0 and onwards, wrongly captures the shape of dummy inputs during tracing. + torch._C._jit_pass_peephole(graph, addmm_fusion_enabled=False) + else: + # v1.6.0 pass renamed + torch._C._jit_pass_canonicalize_graph_fuser_ops(graph) + torch._C._jit_pass_lint(graph) + + # From PyTorch docs: Renumber the graph so that all structurally + # equivalent graphs have same numbers. + graph = torch._C._jit_pass_canonicalize(graph) + torch._C._jit_pass_lint(graph) + if version_lt(torch, "1.6.0"): + # v1.6.0 JIT changes disallows pulling list values out of + # prim::Constant. We can only pull scalar values. constant + # propagation removes `listConstruct` and results in list values. + # We disallow constant prop pass to keep them as scalars, and rely + # on our own constant prop to interpret `listConstruct`. + torch._C._jit_pass_constant_propagation(graph) + # NOTE: Don't need another DCE, it's included in constant propagation. + torch._C._jit_pass_lint(graph) + + # Get the params_dict and rename the getattr nodes in the graph + graph, params_dict, buffer_dict = _jit_pass_lower_graph(graph, torchscript) + + return graph, params_dict, buffer_dict diff --git a/coremltools/converters/mil/input_types.py b/coremltools/converters/mil/input_types.py index 04fa9030a..8c6cf8637 100644 --- a/coremltools/converters/mil/input_types.py +++ b/coremltools/converters/mil/input_types.py @@ -310,6 +310,7 @@ def __init__( self.symbol = get_new_symbol() else: from coremltools.converters.mil.mil import Symbol + self.symbol = Symbol(symbol) self.lower_bound = lower_bound self.upper_bound = upper_bound diff --git a/coremltools/converters/mil/mil/__init__.py b/coremltools/converters/mil/mil/__init__.py index 15f4c03b2..2ec248b9e 100644 --- a/coremltools/converters/mil/mil/__init__.py +++ b/coremltools/converters/mil/mil/__init__.py @@ -7,13 +7,30 @@ from .block import Block, Function, curr_block from .builder import Builder -from .input_type import (SUPPORT_FLOAT_TYPES, SUPPORT_INT_TYPES, DefaultInputs, - InputSpec, InternalVar, ListInputType, - PyFunctionInputType, TensorInputType, TupleInputType) +from .input_type import ( + SUPPORT_FLOAT_TYPES, + SUPPORT_INT_TYPES, + DefaultInputs, + InputSpec, + InternalVar, + ListInputType, + PyFunctionInputType, + TensorInputType, + TupleInputType, +) from .operation import Operation, mil_list, precondition -from .program import (InputType, Placeholder, Program, Symbol, - get_existing_symbol, get_new_symbol, - get_new_variadic_symbol) +from .program import ( + InputType, + Placeholder, + Program, + Symbol, + get_existing_symbol, + get_new_symbol, + get_new_variadic_symbol, +) from .var import ListVar, Var -from .ops.defs._op_reqs import register_op +""" +DO NOT REMOVE THIS COMMENT, since we need to keep the import order. +""" +from .ops.defs._op_reqs import register_op diff --git a/coremltools/converters/mil/mil/block.py b/coremltools/converters/mil/mil/block.py index 399e65628..4871e1b1f 100644 --- a/coremltools/converters/mil/mil/block.py +++ b/coremltools/converters/mil/mil/block.py @@ -5,12 +5,14 @@ import copy from collections import Counter, OrderedDict +from typing import Tuple -from coremltools import _OPSET, _logger as logger -from coremltools.converters.mil._deployment_compatibility import \ - AvailableTarget as _target +from coremltools import _OPSET +from coremltools import _logger as logger +from coremltools.converters.mil._deployment_compatibility import AvailableTarget as _target from . import SPACES, types +from .operation import Operation from .types.symbolic import is_symbolic, k_used_symbols from .var import ComplexVar, InternalVar, Var from .visitors.dot_visitor import DotVisitor @@ -895,3 +897,26 @@ def to_str(self, func_name="function"): s += self.indented_str(SPACES) s += "}\n" return s + + def get_max_opset_version_and_op(self) -> Tuple[_target, Operation]: + """ + Find the max opset version among all operations in the function. + Returns the opset version Enum and the corresponding op. + """ + max_opset_version = _target.iOS13 + op_with_max_opset_version = None + + def update_max_opset_version_block(block): + nonlocal max_opset_version + nonlocal op_with_max_opset_version + for op in list(block.operations): + for b in op.blocks: + update_max_opset_version_block(b) + if not hasattr(op, "_op_variants") or not isinstance(op._op_variants, dict): + continue + if op.opset_version > max_opset_version: + max_opset_version = op.opset_version + op_with_max_opset_version = op + + update_max_opset_version_block(self) + return max_opset_version, op_with_max_opset_version diff --git a/coremltools/converters/mil/mil/builder.py b/coremltools/converters/mil/mil/builder.py index 2f782c27f..68f1b2a27 100644 --- a/coremltools/converters/mil/mil/builder.py +++ b/coremltools/converters/mil/mil/builder.py @@ -5,15 +5,16 @@ import numbers from collections import defaultdict +from typing import Callable, List, Optional import numpy as np from coremltools import _logger as logger +from coremltools.converters.mil._deployment_compatibility import AvailableTarget from coremltools.converters.mil.mil.types.symbolic import any_symbolic from .block import Function, curr_block -from .input_type import (InternalInputType, ListOrTensorInputType, - TensorInputType, TupleInputType) +from .input_type import InternalInputType, ListOrTensorInputType, TensorInputType, TupleInputType from .program import Placeholder, Program from .var import InternalVar, Var @@ -163,6 +164,7 @@ def _add_op(cls, op_cls, **kwargs): input_spec=op_cls.input_spec, op_name=kwargs["name"], before_op=before_op, candidate_kv=kwargs)) + kwargs["enclosing_block"] = curr_block() new_op = op_cls(**kwargs) # Initialize optional input Vars if it wasn't in kwargs @@ -193,21 +195,92 @@ def TensorSpec(shape, dtype=None): return Placeholder(shape, dtype) @staticmethod - def program(input_specs=None, opset_version=None): + def _create_function( + main_block: Callable, + input_specs: Optional[List[Placeholder]] = None, + opset_version: Optional[AvailableTarget] = None, + ): """ + Utility to construct a pymil function. + """ + if input_specs is None: + input_specs = [] - The ``mb.program`` decorator creates a MIL program with a single - function (``main``). The input to ``main`` is a tensor. + # validate number of function inputs + num_args = main_block.__code__.co_argcount + arg_names = list(main_block.__code__.co_varnames)[:num_args] + if len(input_specs) != num_args: + raise ValueError( + f"{main_block.__name__} expects {num_args} inputs: {arg_names}. Got {len(input_specs)} input_specs." + ) + + # create the function + input_spec_dict = {k: v for k, v in zip(arg_names, input_specs)} + with Function(input_spec_dict, opset_version) as func: + input_vars = [func.inputs[a] for a in arg_names] + outputs = main_block(*input_vars) + if isinstance(outputs, tuple): + outputs = list(outputs) + elif not isinstance(outputs, list): + outputs = [outputs] + func.set_outputs(outputs) + + # infer the opset version if not provided + max_opset_version, _ = func.get_max_opset_version_and_op() + if opset_version is None: + func.opset_version = max_opset_version + + return func + + @staticmethod + def function( + input_specs: Optional[List[Placeholder]] = None, + opset_version: Optional[AvailableTarget] = None, + ): + """ + The ``mb.function`` decorator creates a MIL function. Parameters ---------- + input_specs: List[TensorSpec] + Describes the function inputs + + opset_version: AvailableTarget enum + Describes the opset version of the function + + Examples + -------- + >>> import coremltools as ct + >>> @mb.function(input_specs=[mb.TensorSpec(shape=(1,2))], opset_version=ct.target.iOS16) + >>> def func(a): + >>> return mb.add(x=a, y=2) - input_specs: TensorSpec - Describes a tensor. + """ + def wrapper(main_block): + return Builder._create_function(main_block, input_specs, opset_version) + + return wrapper + + @staticmethod + def program( + input_specs: Optional[List[Placeholder]] = None, + opset_version: Optional[AvailableTarget] = None, + function_name: Optional[str] = "main", + ): + """ + The ``mb.program`` decorator creates a MIL program with a single + function with name ``function_name``. + + Parameters + ---------- + input_specs: List[TensorSpec] + Describes the function inputs opset_version: AvailableTarget enum Describes the opset version of the program + function_name: str + Name of the function Examples -------- @@ -217,30 +290,9 @@ def program(input_specs=None, opset_version=None): >>> return mb.add(x=a, y=2) """ - if input_specs is None: - input_specs = [] - def wrapper(main_block): + function = Builder._create_function(main_block, input_specs, opset_version) program = Program() - num_args = main_block.__code__.co_argcount - arg_names = list(main_block.__code__.co_varnames)[:num_args] - if len(input_specs) != num_args: - msg = "{} expects {} inputs: {}. Got {} input_specs." - raise ValueError( - msg.format( - main_block.__name__, num_args, arg_names, len(input_specs) - ) - ) - input_spec_dict = {k: v for k, v in zip(arg_names, input_specs)} - with Function(input_spec_dict, opset_version) as func: - input_vars = [func.inputs[a] for a in arg_names] - outputs = main_block(*input_vars) - if isinstance(outputs, tuple): - outputs = list(outputs) - elif not isinstance(outputs, list): - outputs = [outputs] - func.set_outputs(outputs) - program.add_function("main", func) + program.add_function(function_name, function) return program - return wrapper diff --git a/coremltools/converters/mil/mil/input_type.py b/coremltools/converters/mil/mil/input_type.py index f3b57e491..29cd32bd7 100644 --- a/coremltools/converters/mil/mil/input_type.py +++ b/coremltools/converters/mil/mil/input_type.py @@ -276,7 +276,7 @@ def __init__(self, type_domain, **kwargs): super().__init__(**kwargs) def _is_compatible(self, v): - result = types.is_scalar(v.dtype) or types.is_tensor(v.dtype) + result = types.is_scalar(v.sym_type) or types.is_tensor(v.sym_type) result = result and (v.dtype in self.type_domain) return result @@ -309,9 +309,6 @@ class ListInputType(_InputType): """ ListInputType allows inputs of type types.list """ - def __init__(self, **kwargs): - super().__init__(**kwargs) - def _is_compatible(self, v): return types.is_list(v.sym_type) @@ -326,14 +323,9 @@ class ListOrTensorInputType(_InputType): (1) MIL tensor (2) python list/tuple of MIL tensors """ - def __init__(self, **kwargs): - super().__init__(**kwargs) - def _is_compatible(self, v): return ( - types.is_list(v.sym_type) - or types.is_scalar(v.dtype) - or types.is_tensor(v.dtype) + types.is_list(v.sym_type) or types.is_scalar(v.sym_type) or types.is_tensor(v.sym_type) ) @property @@ -345,9 +337,6 @@ class TupleInputType(_InputType): """ TupleInputType specifies input types of python list/tuple of MIL tensors. """ - def __init__(self, **kwargs): - super().__init__(**kwargs) - def _is_compatible(self, v): # We don't check the detail types within the tuple. return isinstance(v, (tuple, list)) @@ -363,10 +352,6 @@ class InternalInputType(_InputType): It allows ops to take, for example, python primitive types, instead of only the builtin types. """ - - def __init__(self, **kwargs): - super().__init__(**kwargs) - def _is_compatible(self, v): return True # skip type check by default for InternalInputType. @@ -375,9 +360,5 @@ class PyFunctionInputType(InternalInputType): """ Native python function. """ - - def __init__(self, **kwargs): - super().__init__(**kwargs) - def _is_compatible(self, v): return callable(v.val) diff --git a/coremltools/converters/mil/mil/operation.py b/coremltools/converters/mil/mil/operation.py index 5fdb6add8..71263fd70 100644 --- a/coremltools/converters/mil/mil/operation.py +++ b/coremltools/converters/mil/mil/operation.py @@ -9,11 +9,9 @@ from coremltools.converters.mil.mil import types from coremltools.converters.mil.mil.types import is_compatible_type -from coremltools.converters.mil.mil.types.symbolic import (any_symbolic, - is_symbolic) +from coremltools.converters.mil.mil.types.symbolic import any_symbolic, is_symbolic from . import SPACES -from .block import curr_block from .input_type import DefaultInputs, TensorInputType, TupleInputType from .var import ComplexVar, InternalVar, ListVar, Var @@ -143,6 +141,11 @@ class Operation: input_types (InputSpec, class attr): Read-only named input types from all subclasses. Input types are used to validate `inputs`. + If an input arg name start with prefix `_`, that indicates the input has the following properties: + 1. Most of the time, the input is type of ``InternalInputType`` and + used only in pymil scope. It doesn't have the corresponding arg / attr + in the MIL framework definition. + 2. It won't be printed in pymil. inputs [_input_vars] (dict of str --> Var): An Operation (subclass of Operation) only has access to input Var, @@ -163,7 +166,7 @@ def __init__(self, **kwargs): self._output_vars = None self._input_vars = {} self.blocks = [] - self.enclosing_block = curr_block() + self.enclosing_block = kwargs["enclosing_block"] # Initialize inputs as object attributes (all None) for k in self._input_types.keys(): @@ -205,6 +208,7 @@ def _check_expected_inputs(self, kwargs): "no_check_var_visibility", # no_check_var_visibility==True to deviate from SSA "no_check_var_types", # no_check_var_types==True to force set inputs, even if type does not match with earlier ones + "enclosing_block", ] for k in kwargs.keys(): if k not in non_attributes and k not in self._input_types: diff --git a/coremltools/converters/mil/mil/ops/defs/_utils.py b/coremltools/converters/mil/mil/ops/defs/_utils.py index fe2c1e074..3f084dc84 100644 --- a/coremltools/converters/mil/mil/ops/defs/_utils.py +++ b/coremltools/converters/mil/mil/ops/defs/_utils.py @@ -20,6 +20,37 @@ MAX_SIZE_CONSTANT_FOLDING = 1024 * 1024 / 4 # When a fp32 const takes over 1MB, we won't create a const op for that +class ConvPoolingTypeInferenceCache(dict): + """ + An utility class to cache the shape inference of ``conv`` and ``pool`` op. + The cache mechanism makes sure ops with the same input shape (symbolic also), + and params (``pad, stride, kernel``) would produce the same output shape. + """ + @staticmethod + def get_cache_key( + input_shape: Tuple[int], + pad_type: str, + pad: Tuple[int], + strides: Tuple[int], + kernel: Tuple[int], + ceil_mode: bool, + ) -> Tuple[Tuple]: + return ( + ("input_shape", input_shape), + ("pad_type", pad_type), + ("pad", pad), + ("strides", strides), + ("kernel", kernel), + ("ceil_mode", ceil_mode), + ) + + def __setitem__(self, key, value): + if key in self: + raise ValueError(f"cache key {key} already exisit.") + return dict.__setitem__(self, key, value) + +CONV_POOLING_TYPE_INFERENCE_CACHE = ConvPoolingTypeInferenceCache() + def broadcast_shapes(shape_x, shape_y): """ Check and broadcast given input shapes. @@ -129,7 +160,7 @@ def effective_kernel(kernel_shape, dilations): f"kernel_shape ({len(kernel_shape)}) and dilations ({len(dilations)}) " f"must be the same length" ) - return [(k - 1) * d + 1 for k, d in zip(kernel_shape, dilations)] + return tuple([(k - 1) * d + 1 for k, d in zip(kernel_shape, dilations)]) def aggregated_pad( @@ -161,7 +192,7 @@ def aggregated_pad( Returns: - A list of total (before + after) padding for each spatial dimension in kernel_shape. + A tuple of total (before + after) padding for each spatial dimension in kernel_shape. """ num_spatial_dims = len(kernel_shape) if dilations is None: @@ -188,19 +219,20 @@ def aggregated_pad( ) ) effective_ks = effective_kernel(kernel_shape, dilations) - return [ - int(max(0, s * math.ceil(float(i) / float(s)) - i + k - s)) - if not is_symbolic(i) else get_new_symbol() - for i, k, s in zip(input_shape, effective_ks, strides) - ] + return tuple( + [ + int(max(0, s * math.ceil(float(i) / float(s)) - i + k - s)) + if not is_symbolic(i) + else get_new_symbol() + for i, k, s in zip(input_shape, effective_ks, strides) + ] + ) if pad_type == "valid": - return [0] * num_spatial_dims + return tuple([0] * num_spatial_dims) if pad_type == "custom": if custom_pad is None or len(custom_pad) != 2 * num_spatial_dims: raise ValueError("Invalid custom_pad.") - return [ - custom_pad[2 * d] + custom_pad[2 * d + 1] for d in range(num_spatial_dims) - ] + return tuple([custom_pad[2 * d] + custom_pad[2 * d + 1] for d in range(num_spatial_dims)]) raise ValueError('Invalid padding pad_type "{}"'.format(pad_type)) @@ -242,7 +274,7 @@ def spatial_dimensions_out_shape( if dilations is None: dilations = [1] * num_spatial_dims if custom_pad is None: - custom_pad = [0] * num_spatial_dims * 2 + custom_pad = np.array([0] * num_spatial_dims * 2) if not ( len(input_shape) == len(kernel_shape) @@ -259,6 +291,22 @@ def spatial_dimensions_out_shape( "must all be the same length" ) + effective_ks = effective_kernel(kernel_shape, dilations) + if isinstance(strides, np.ndarray): + strides = tuple(strides.tolist()) + if isinstance(custom_pad, np.ndarray): + custom_pad = tuple(custom_pad.tolist()) + cache_key = CONV_POOLING_TYPE_INFERENCE_CACHE.get_cache_key( + input_shape, + pad_type, + custom_pad, + strides, + effective_ks, + ceil_mode, + ) + if cache_key in CONV_POOLING_TYPE_INFERENCE_CACHE: + return CONV_POOLING_TYPE_INFERENCE_CACHE[cache_key] + pad = aggregated_pad( pad_type=pad_type, kernel_shape=kernel_shape, @@ -267,7 +315,7 @@ def spatial_dimensions_out_shape( dilations=dilations, custom_pad=custom_pad, ) - effective_ks = effective_kernel(kernel_shape, dilations) + out_shape = [] for r in range(num_spatial_dims): # only check if `input_shape` (spatial part of the input image) is symbolic, because: @@ -288,6 +336,7 @@ def spatial_dimensions_out_shape( if out_dim <= 0: raise ValueError(f"spatial dimension {r} has invalid output size {out_dim}") out_shape.append(out_dim) + CONV_POOLING_TYPE_INFERENCE_CACHE[cache_key] = out_shape return out_shape @@ -409,17 +458,85 @@ def _promoted_var(var, promoted_dtype): return input_vars +def get_squeeze_axes(squeeze_mask, rank): + """ + Utility function to get the squeeze_axes from squeeze_mask. + i.e., returns a list of indices ``i`` where ``squeeze_mask[i] == True``. + For instance, given ``squeeze_mask = [True, False, True]``, + this utility returns ``[0, 2]`` + """ + if squeeze_mask is None: + squeeze_mask = [False] * rank + squeeze_axes = [] + for idx, mask in enumerate(squeeze_mask): + if mask: + squeeze_axes.append(idx) + return squeeze_axes + +def get_param_val(param): + """ + Given a param, if it is not None, returns param.val, else returns None. + """ + if param is None: + return None + return param.val + +def solve_slice_by_index_slice(x_shape, begin, end, stride, begin_mask, end_mask, squeeze_mask): + """ + Utility function to solve the slices of tensor slicing + """ + # set default values for parameters + rank = len(x_shape) + begin = [int(i) for i in list(begin[:])] + end = [int(i) for i in list(end[:])] + if stride is None: + stride = [1] * rank + if begin_mask is None: + begin_mask = [False] * rank + if end_mask is None: + end_mask = [False] * rank + if squeeze_mask is None: + squeeze_mask = [False] * rank + + # compute slices + slices = [] + for idx, mask in enumerate(begin_mask): + if mask: + begin[idx] = None + for idx, mask in enumerate(end_mask): + if mask: + end[idx] = None + for idx, mask in enumerate(squeeze_mask): + if mask: + end[idx] = None + stride[idx] = np.iinfo( + np.int32 + ).max # We slice out only 1 element by setting stride to INF + for idx in range(rank): + slices.append(slice(begin[idx], end[idx], stride[idx])) + + return tuple(slices) + def solve_slice_by_index_shape(x_shape, begin, end, stride, begin_mask, end_mask, squeeze_mask): """ Helper function to solve the shape of tensor slicing. """ - ret_shape = [] - + # set default values + rank = len(x_shape) if begin is None or len(begin) == 0: - begin = [None] * len(x_shape) + begin = [None] * rank if end is None or len(end) == 0: - end = [None] * len(x_shape) - + end = [None] * rank + if stride is None: + stride = [1] * rank + if begin_mask is None: + begin_mask = [False] * rank + if end_mask is None: + end_mask = [False] * rank + if squeeze_mask is None: + squeeze_mask = [False] * rank + + # basic validation for tensor shape if len(begin) != len(x_shape): raise TypeError( "slice_by_index op: size of 'begin', {}, is not equal to the rank of input, which is {}".format( @@ -434,6 +551,7 @@ def solve_slice_by_index_shape(x_shape, begin, end, stride, begin_mask, end_mask ) # solve for shape inference + ret_shape = [] for idx in range(len(x_shape)): # skip if we want to squeeze the dimension if squeeze_mask[idx]: diff --git a/coremltools/converters/mil/mil/ops/defs/iOS15/elementwise_unary.py b/coremltools/converters/mil/mil/ops/defs/iOS15/elementwise_unary.py index ec7fa8cc2..c5ebc40d0 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS15/elementwise_unary.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS15/elementwise_unary.py @@ -848,7 +848,7 @@ class cast(Operation): @classmethod def supported_dtypes(cls): - return (builtin_to_string(v) for v in cls.type_domains["T"]) + return [builtin_to_string(v) for v in cls.type_domains["T"]] def type_inference(self): if self.dtype.val not in self.supported_dtypes(): diff --git a/coremltools/converters/mil/mil/ops/defs/iOS15/linear.py b/coremltools/converters/mil/mil/ops/defs/iOS15/linear.py index ffa806739..698c680fe 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS15/linear.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS15/linear.py @@ -57,8 +57,10 @@ class linear(Operation): def default_inputs(self): Dout = self.weight.shape[0] + # If the bias is not provided, we initialize it a zero vector + # with dtype of weight. return DefaultInputs( - bias=np.array([0.0] * Dout, dtype=nptype_from_builtin(self.x.dtype)), + bias=np.array([0.0] * Dout, dtype=nptype_from_builtin(self.weight.dtype)), ) def type_inference(self): diff --git a/coremltools/converters/mil/mil/ops/defs/iOS15/pool.py b/coremltools/converters/mil/mil/ops/defs/iOS15/pool.py index b1d25fb2b..ce245f19d 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS15/pool.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS15/pool.py @@ -101,16 +101,26 @@ class avg_pool(Pooling): * ``S == len(D_in)``. pad_type: const str (Required) - Must be one of ``valid``, ``same``, ``custom`` or ``same_lower``. - - * ``valid``: No padding. This is equivalent to custom pad with ``pad[i] = 0, for - all i``. - * ``same`` : This is equivalent to custom pad with ``pad[2*i] + pad[2*i+1] = kernel_size[i]``. - * ``custom``: Specify custom padding in the parameter pad. note that ``same`` - padding is equivalent to custom padding with - ``pad[2*i] + pad[2*i+1] = kernel_size[i]``. - * ``same_lower``: Similar to ``same`` but the padding - will place extra rows/cols on the top/left if the padding amount is odd. + + Must be one of the following: + + * ``valid``: No padding. This is equivalent to custom pad with + ``pad[2*i] == pad[2*i+1] == 0, for i=0,...,len(d_in)-1``. + * ``custom``: Specify custom padding in the parameter ``pad``. + * ``same``: Input is padded such that out spatial shapes are + ``d_out[i] = ceil(d_in[i] / strides[i])``. + * ``same_lower``: Similar to ``same`` but the padding + will place extra rows/cols on the top/left if the padding amount is odd. + + Specifically, for ``i = 0,..,,len(d_in)-1``, the equivalent paddings are + calculated as follows: + + * ``dilated_kernel = (K[i] - 1) * dilate[i] + 1`` + * If ``dilated_kernel`` is odd, + ``padding[2*i] = padding[2*i+1] = floor(dilated_kernel / 2)`` + * Otherwise: + ``padding[2*i] = ceil((dilated_kernel - 1) / 2)``, + ``padding[2*i+1] = floor((dilated_kernel - 1) / 2)`` pad: const<[P],i32> (Optional. Default to all 0s) * ``pad`` represents the number of elements to pad before and after each diff --git a/coremltools/converters/mil/mil/ops/defs/iOS15/random.py b/coremltools/converters/mil/mil/ops/defs/iOS15/random.py index f6663cf48..c89f9fe1a 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS15/random.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS15/random.py @@ -45,13 +45,13 @@ class random_bernoulli(RandomDistribution): r""" Returns a tensor with the specified shape, with random values from a Bernoulli distribution. - + .. math:: f(k) = \begin{cases}1-p &\text{if } k = 0\\ p &\text{if } k = 1\end{cases} for :math:`k` in :math:`\{0, 1\}`. - + Parameters ---------- shape: (Required) @@ -62,7 +62,7 @@ class random_bernoulli(RandomDistribution): * The probability of sampling ``1``. Defaults to ``0.5``. seed: const (Optional) * Seed to create a reproducible sequence of values across multiple invokes. - + Returns ------- <\*, T> @@ -76,7 +76,7 @@ class random_bernoulli(RandomDistribution): -------- random_categorical, random_normal, random_uniform """ - + input_spec = ( InputSpec( shape=TensorInputType(type_domain=types.int32), @@ -85,7 +85,7 @@ class random_bernoulli(RandomDistribution): ) + RandomDistribution.input_spec ) - + type_domains = { "T": (types.fp16, types.fp32), } @@ -106,23 +106,29 @@ def type_inference(self): class random_categorical(Operation): """ Returns random values from a categorical distribution. - + Parameters ---------- - shape: <\*D_in, T> - * N-dimensional tensor, one of ``logits`` (event log-probabilities) or ``probs`` - (event probabilities). The first ``N - 1`` dimensions specifies distributions, - and the last dimension represents a vector of probabilities. + x: <\*D_in, T> + * N-dimensional tensor which represents ``logits`` (event log-probabilities) or ``probs`` + (event probabilities) depending on ``mode``. The first ``N - 1`` dimensions specifies + distributions, and the last dimension represents a vector of probabilities. mode: const (Optional) One of ``['logits', 'probs']``. Defaults to ``logits``. + When set to ``probs``, an element-wise log layer will be added to calculate logits. size: const (Optional) Number of samples to draw. Defaults to ``1``. + When set as ``1``, it's categorical distribution. + When set larger than ``1``, it's actually multinomial distribution by drawing with + replacement. It means that when a sample index is drawn, it can be drawn again. + The categorical distribution is a special case of the multinomial distribution, giving + the probabilities of potential outcomes of a single drawing rather than multiple drawings. seed: const (Optional) Seed to create a reproducible sequence of values across multiple invokes. - + Returns ------- <\*D_in[:-1] + [size], T> @@ -136,14 +142,14 @@ class random_categorical(Operation): -------- random_bernoulli, random_normal, random_uniform """ - + input_spec = InputSpec( x=TensorInputType(type_domain="T"), mode=TensorInputType(const=True, optional=True, type_domain=types.str), size=TensorInputType(const=True, optional=True, type_domain=types.int32), seed=TensorInputType(const=True, optional=True, type_domain=types.int32), ) - + type_domains = { "T": (types.fp16, types.fp32), } @@ -166,7 +172,7 @@ class random_normal(RandomDistribution): r""" Returns a tensor with the specified shape, with random values from a normal distribution. - + Parameters ---------- shape: (Required) @@ -179,7 +185,7 @@ class random_normal(RandomDistribution): The standard deviation (width) of the normal distribution. Defaults to ``1.0``. seed: const (Optional) Seed to create a reproducible sequence of values across multiple invokes. - + Returns ------- <\*, T> @@ -193,7 +199,7 @@ class random_normal(RandomDistribution): -------- random_categorical, random_bernoulli, random_uniform """ - + input_spec = ( InputSpec( shape=TensorInputType(type_domain=types.int32), @@ -203,7 +209,7 @@ class random_normal(RandomDistribution): ) + RandomDistribution.input_spec ) - + type_domains = { "T": (types.fp16, types.fp32), } @@ -229,15 +235,15 @@ class random_uniform(RandomDistribution): Returns a tensor with the specified shape with random values from a uniform distribution. Samples are uniformly distributed over the half-open interval ``[low, high)`` (includes low, but excludes high). - + .. math:: p(x) = \frac{1}{high - low} - + For a real number :math:`x`. - + When ``high == low``, values of ``low`` will be returned. If ``high < low``, the results are officially undefined and may eventually raise an error. - + Parameters ---------- shape: (Required) @@ -250,7 +256,7 @@ class random_uniform(RandomDistribution): * Upper boundary of the output interval (exclusive). Defaults to ``1.0``. seed: const (Optional) * Seed to create a reproducible sequence of values across multiple invokes. - + Returns ------- <\*, T> @@ -264,7 +270,7 @@ class random_uniform(RandomDistribution): -------- random_categorical, random_bernoulli, random_normal """ - + input_spec = ( InputSpec( shape=TensorInputType(type_domain=types.int32), @@ -274,7 +280,7 @@ class random_uniform(RandomDistribution): ) + RandomDistribution.input_spec ) - + type_domains = { "T": (types.fp16, types.fp32), } diff --git a/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_operation.py b/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_operation.py index 3202c480b..9a5340764 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_operation.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_operation.py @@ -524,6 +524,10 @@ def type_inference(self): if len(pad) % 2 != 0: raise ValueError("Number of elements in the argument Pad must be divisible by 2.") + for i in range(len(pad)): + if not is_symbolic(pad[i]) and pad[i] < 0: + raise ValueError(f"pad must be non-negative integer, got {pad[i]} at index {i}") + pad = pad.reshape(-1, 2) if pad.shape[0] > len(ret_shape): diff --git a/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_transformation.py b/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_transformation.py index a409b36af..9f218ac15 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_transformation.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_transformation.py @@ -19,7 +19,12 @@ from coremltools.converters.mil.mil.input_type import DefaultInputs, InputSpec, TensorInputType from coremltools.converters.mil.mil.operation import SYMBOL, VALUE from coremltools.converters.mil.mil.ops.defs._op_reqs import register_op -from coremltools.converters.mil.mil.ops.defs._utils import solve_slice_by_index_shape +from coremltools.converters.mil.mil.ops.defs._utils import ( + get_param_val, + get_squeeze_axes, + solve_slice_by_index_shape, + solve_slice_by_index_slice, +) from coremltools.converters.mil.mil.types.symbolic import ( any_symbolic, any_variadic, @@ -514,23 +519,16 @@ def default_inputs(self): ) def type_inference(self): - - # get tensor and set default value - begin = self.begin.val - end = self.end.val - x_rank = self.x.rank - stride = self.stride.val if self.stride is not None else [1] * x_rank - begin_mask = ( - self.begin_mask.val if self.begin_mask is not None else [False] * x_rank - ) - end_mask = self.end_mask.val if self.end_mask is not None else [False] * x_rank - squeeze_mask = ( - self.squeeze_mask.val if self.squeeze_mask is not None else [False] * x_rank - ) - # solve shape - x_shape = self.x.shape - ret_shape = solve_slice_by_index_shape(x_shape, begin, end, stride, begin_mask, end_mask, squeeze_mask) + ret_shape = solve_slice_by_index_shape( + self.x.shape, + self.begin.val, + self.end.val, + get_param_val(self.stride), + get_param_val(self.begin_mask), + get_param_val(self.end_mask), + get_param_val(self.squeeze_mask), + ) if len(ret_shape) == 0: # Scalar case. @@ -541,41 +539,21 @@ def type_inference(self): def value_inference(self): if self.x.sym_val is None or self.begin.val is None or self.end.val is None: return None - begin = [int(i) for i in list(self.begin.val[:])] - end = [int(i) for i in list(self.end.val[:])] - stride = [1] * self.x.rank if self.stride is None else self.stride.val - begin_mask = ( - [False] * self.x.rank if self.begin_mask is None else self.begin_mask.val - ) - end_mask = [False] * self.x.rank if self.end_mask is None else self.end_mask.val - squeeze_mask = ( - [False] * self.x.rank - if self.squeeze_mask is None - else self.squeeze_mask.val - ) - slices = [] - for idx, mask in enumerate(begin_mask): - if mask: - begin[idx] = None - for idx, mask in enumerate(end_mask): - if mask: - end[idx] = None - squeeze_axes = [] - for idx, mask in enumerate(squeeze_mask): - if mask: - end[idx] = None - stride[ - idx - ] = 2147483647 # We slice out only 1 element by setting stride to INF - squeeze_axes.append(idx) - for idx in range(self.x.rank): - slices.append(slice(begin[idx], end[idx], stride[idx])) - - slices = tuple(slices) + # solve the data slices and slice tensor + slices = solve_slice_by_index_slice( + self.x.shape, + self.begin.val, + self.end.val, + get_param_val(self.stride), + get_param_val(self.begin_mask), + get_param_val(self.end_mask), + get_param_val(self.squeeze_mask), + ) res = self.x.sym_val[slices] - # remove squeezed axes + # remove squeeze_axes + squeeze_axes = get_squeeze_axes(get_param_val(self.squeeze_mask), self.x.rank) if len(squeeze_axes) > 0: if len(squeeze_axes) == len(res.shape): if len(res) == 0: diff --git a/coremltools/converters/mil/mil/ops/defs/iOS16/scatter_gather.py b/coremltools/converters/mil/mil/ops/defs/iOS16/scatter_gather.py index 1e3e88c61..d7f56371f 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS16/scatter_gather.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS16/scatter_gather.py @@ -4,13 +4,13 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause from coremltools.converters.mil.mil import Operation, types -from coremltools.converters.mil.mil.input_type import (DefaultInputs, - InputSpec, - TensorInputType) -from coremltools.converters.mil.mil.operation import (SYMBOL, VALUE, - precondition) +from coremltools.converters.mil.mil.input_type import DefaultInputs, InputSpec, TensorInputType +from coremltools.converters.mil.mil.operation import SYMBOL, VALUE, precondition from coremltools.converters.mil.mil.ops.defs._op_reqs import register_op from coremltools.converters.mil.mil.ops.defs._utils import compute_gather +from coremltools.converters.mil.mil.ops.defs.iOS15.scatter_gather import ( + gather_along_axis as _gather_along_axis_iOS15, +) from coremltools.converters.mil.mil.ops.defs.iOS16 import _IOS16_TARGET @@ -20,12 +20,13 @@ class gather(Operation): The iOS16 version. This section documents only the differences between this version and the iOS 15 :py:class:`~.iOS15.scatter_gather.gather`. - + This version supports ``batch_dims``, similar to `tf.gather `_. + Input parameter ``indices`` now supports ``int16`` and ``uint16``. Parameters ---------- - x: tensor<\*D, U> (Required) + x: tensor<\*D, T> (Required) indices: tensor<\*N, I> (Required) * Indices values may be negative. More precisely, ``-D[axis]<= v < D[axis]`` for ``v`` in ``indices``. axis: const i32 (Optional. Default=``0``) @@ -50,14 +51,14 @@ class gather(Operation): """ input_spec = InputSpec( - x=TensorInputType(type_domain="U"), + x=TensorInputType(type_domain="T"), indices=TensorInputType(type_domain="I"), axis=TensorInputType(const=True, optional=True, type_domain=types.int32), batch_dims=TensorInputType(const=True, optional=True, type_domain=types.int32) ) - + type_domains = { - "U": (types.fp16, types.fp32, types.int32), + "T": (types.fp16, types.fp32, types.int32), "I": (types.int32, types.uint16, types.int16), } @@ -75,11 +76,11 @@ def value_inference(self): # only allow x to be symbolic. indices cannot. return None return compute_gather( - params=self.x.sym_val, - indices=self.indices.val, - axis=self.axis.val, - batch_dims=self.batch_dims.val - ) + params=self.x.sym_val, + indices=self.indices.val, + axis=self.axis.val, + batch_dims=self.batch_dims.val, + ) def type_inference(self): # validate parameters @@ -100,7 +101,7 @@ def type_inference(self): "batch_dims {} must be less or equal to than indices.rank {} for node {}".format( self.batch_dims.val, self.indices.rank, self.name ) - ) + ) output_rank = self.x.rank - 1 + self.indices.rank - self.batch_dims.val if output_rank == 0: @@ -115,6 +116,44 @@ def type_inference(self): return types.tensor(self.x.dtype, out_shape) + +@register_op(opset_version=_IOS16_TARGET) +class gather_along_axis(_gather_along_axis_iOS15): + """ + The iOS16 version. + The only difference between this version and the iOS 15 :py:class:`~.iOS15.scatter_gather.gather_along_axis`. + is that input parameter ``indices`` now supports ``int16`` and ``uint16``. + + Parameters + ---------- + x: tensor<\*D, T> (Required) + indices: tensor<\*K, I> (Required) + axis: const i32 (Optional): + * Default to ``0``. + + Returns + ------- + tensor<\*D, T>: + * Output tensor has the same shape as ``indices``. + + Attributes + ---------- + T: fp16, fp32, i32 + I: uint16, int16, int32 + """ + + input_spec = InputSpec( + x=TensorInputType(type_domain="T"), + indices=TensorInputType(type_domain="I"), + axis=TensorInputType(const=True, optional=True, type_domain=types.int32), + ) + + type_domains = { + "T": (types.fp16, types.fp32, types.int32), + "I": (types.int32, types.uint16, types.int16), + } + + @register_op(opset_version=_IOS16_TARGET) class gather_nd(Operation): """ @@ -123,11 +162,12 @@ class gather_nd(Operation): iOS 15 :py:class:`~.iOS15.scatter_gather.gather_nd`. This version supports ``batch_dims``. + Input parameter ``indices`` now supports ``int16`` and ``uint16``. Parameters ---------- x: tensor<\*D, T> (Required) - indices: tensor<\*K, i32> (Required) + indices: tensor<\*K, I> (Required) batch_dims: const i32 (Optional. Default=``0``) * The number of batch dimensions. @@ -139,6 +179,7 @@ class gather_nd(Operation): Attributes ---------- T: fp16, fp32, i32 + I: uint16, int16, int32 References ---------- @@ -146,13 +187,13 @@ class gather_nd(Operation): """ input_spec = InputSpec( - x=TensorInputType(type_domain="U"), + x=TensorInputType(type_domain="T"), indices=TensorInputType(type_domain="I"), batch_dims=TensorInputType(const=True, optional=True, type_domain=types.int32), ) - + type_domains = { - "U": (types.fp16, types.fp32, types.int32), + "T": (types.fp16, types.fp32, types.int32), "I": (types.int32, types.uint16, types.int16), } diff --git a/coremltools/converters/mil/mil/ops/defs/iOS17/scatter_gather.py b/coremltools/converters/mil/mil/ops/defs/iOS17/scatter_gather.py index 95898c616..f30d3c761 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS17/scatter_gather.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS17/scatter_gather.py @@ -8,9 +8,6 @@ from coremltools.converters.mil.mil import types from coremltools.converters.mil.mil.input_type import DefaultInputs, InputSpec, TensorInputType from coremltools.converters.mil.mil.ops.defs._op_reqs import register_op -from coremltools.converters.mil.mil.ops.defs.iOS15.scatter_gather import ( - gather_along_axis as _gather_along_axis_iOS15, -) from coremltools.converters.mil.mil.ops.defs.iOS15.scatter_gather import scatter as _scatter_iOS15 from coremltools.converters.mil.mil.ops.defs.iOS15.scatter_gather import ( scatter_along_axis as _scatter_along_axis_iOS15, @@ -19,6 +16,9 @@ scatter_nd as _scatter_nd_iOS15, ) from coremltools.converters.mil.mil.ops.defs.iOS16.scatter_gather import gather as _gather_iOS16 +from coremltools.converters.mil.mil.ops.defs.iOS16.scatter_gather import ( + gather_along_axis as _gather_along_axis_iOS16, +) from coremltools.converters.mil.mil.ops.defs.iOS16.scatter_gather import ( gather_nd as _gather_nd_iOS16, ) @@ -247,6 +247,8 @@ class gather(_gather_iOS16): This section documents only the differences between this version and the iOS 16 :py:class:`~.iOS16.scatter_gather.gather`. The major differences are as follows: + - Input parameter ``x`` adds support for ``int16``, ``uint16``, ``int8``, and ``uint8``. + - Input parameter ``indices`` adds support for ``int8`` and ``uint8``. - Input parameter ``indices`` now supports only positive values -- negative values are considered out-of-bound. If support for negative indices is required, they must be explicitly converted to positive values, using the following:: @@ -262,7 +264,7 @@ class gather(_gather_iOS16): Parameters ---------- - x: tensor<\*D, U> (Required) + x: tensor<\*D, T> (Required) indices: tensor<\*N, I> (Required) * Indices values may be negative. More precisely, ``-D[axis]<= v < D[axis]`` for ``v`` in ``indices``. axis: const i32 (Optional. Default=``0``) @@ -283,18 +285,31 @@ class gather(_gather_iOS16): Attributes ---------- - T: fp16, fp32, i32 - I: uint16, int16, int32 + T: fp16, fp32, int32, int16, uint16, int8, uint8 + I: int32, int16, uint16, int8, uint8 """ input_spec = InputSpec( - x=TensorInputType(type_domain="U"), + x=TensorInputType(type_domain="T"), indices=TensorInputType(type_domain="I"), axis=TensorInputType(const=True, optional=True, type_domain=types.int32), batch_dims=TensorInputType(const=True, optional=True, type_domain=types.int32), validate_indices=TensorInputType(const=True, optional=True, type_domain=types.bool), ) + type_domains = { + "T": ( + types.fp16, + types.fp32, + types.int32, + types.int16, + types.uint16, + types.int8, + types.uint8, + ), + "I": (types.int32, types.int16, types.uint16, types.int8, types.uint8), + } + def default_inputs(self): return DefaultInputs(axis=0, batch_dims=0, validate_indices=False) @@ -314,17 +329,17 @@ def type_inference(self): @register_op(opset_version=_IOS17_TARGET) -class gather_along_axis(_gather_along_axis_iOS15): +class gather_along_axis(_gather_along_axis_iOS16): """ Take the values along ``axis`` at locations ``indices``. The major differences from the previous version are illustrated in :py:class:`gather`. - For more information, see the iOS 15 :py:class:`~.iOS15.scatter_gather.gather_along_axis`. + For more information, see the iOS 16 :py:class:`~.iOS16.scatter_gather.gather_along_axis`. Parameters ---------- x: tensor<\*D, T> (Required) - indices: tensor<\*K, i32> (Required) + indices: tensor<\*K, I> (Required) * ``rank(indices) == rank(x)``. axis: const i32 (Optional): * Default to ``0``. @@ -342,16 +357,30 @@ class gather_along_axis(_gather_along_axis_iOS15): Attributes ---------- - T: fp16, fp32, i32 + T: fp16, fp32, int32, int16, uint16, int8, uint8 + I: int32, int16, uint16, int8, uint8 """ input_spec = InputSpec( x=TensorInputType(type_domain="T"), - indices=TensorInputType(type_domain=types.int32), + indices=TensorInputType(type_domain="I"), axis=TensorInputType(const=True, optional=True, type_domain=types.int32), validate_indices=TensorInputType(const=True, optional=True, type_domain=types.bool), ) + type_domains = { + "T": ( + types.fp16, + types.fp32, + types.int32, + types.int16, + types.uint16, + types.int8, + types.uint8, + ), + "I": (types.int32, types.int16, types.uint16, types.int8, types.uint8), + } + def default_inputs(self): return DefaultInputs( axis=0, @@ -383,7 +412,7 @@ class gather_nd(_gather_nd_iOS16): Parameters ---------- x: tensor<\*D, T> (Required) - indices: tensor<\*K, i32> (Required) + indices: tensor<\*K, I> (Required) batch_dims: const i32 (Optional. Default=``0``) * The number of batch dimensions. validate_indices: const bool (Optional) @@ -400,16 +429,30 @@ class gather_nd(_gather_nd_iOS16): Attributes ---------- - T: fp16, fp32, i32 + T: fp16, fp32, int32, int16, uint16, int8, uint8 + I: int32, int16, uint16, int8, uint8 """ input_spec = InputSpec( - x=TensorInputType(type_domain="U"), + x=TensorInputType(type_domain="T"), indices=TensorInputType(type_domain="I"), batch_dims=TensorInputType(const=True, optional=True, type_domain=types.int32), validate_indices=TensorInputType(const=True, optional=True, type_domain=types.bool), ) + type_domains = { + "T": ( + types.fp16, + types.fp32, + types.int32, + types.int16, + types.uint16, + types.int8, + types.uint8, + ), + "I": (types.int32, types.int16, types.uint16, types.int8, types.uint8), + } + def default_inputs(self): return DefaultInputs( batch_dims=0, diff --git a/coremltools/converters/mil/mil/ops/registry.py b/coremltools/converters/mil/mil/ops/registry.py index 49946796c..51debd752 100644 --- a/coremltools/converters/mil/mil/ops/registry.py +++ b/coremltools/converters/mil/mil/ops/registry.py @@ -98,7 +98,6 @@ def register_op(_cls=None, is_custom_op=False, namespace=None, opset_version=tar """ def class_wrapper(op_cls): op_type = op_cls.__name__ - op_cls.__name__ = op_type # debug message op_msg = "op" @@ -117,10 +116,10 @@ def class_wrapper(op_cls): # Check that op_type is prefixed with namespace if op_type[: len(namespace)] != namespace: msg = ( - "Dialect pp type {} registered under {} namespace must " - + "prefix with {}" + "Dialect op type {} registered under {} namespace must " + "prefix with {}" ) raise ValueError(msg.format(op_type, namespace, namespace)) + op_cls._dialect_namespace = namespace else: op_reg = SSAOpRegistry.core_ops diff --git a/coremltools/converters/mil/mil/ops/tests/iOS14/test_conv.py b/coremltools/converters/mil/mil/ops/tests/iOS14/test_conv.py index 0af19fa19..8817aa59b 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS14/test_conv.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS14/test_conv.py @@ -218,6 +218,44 @@ def build(x): class TestConv: + @pytest.mark.parametrize( + "backend, pad_type", + itertools.product( + backends, + ["valid", "same", "same_lower", "custom"], + ), + ) + def test_type_inference_cache_no_pad(self, backend, pad_type): + # Test the type inference has the caching mechanism to ensure + # same symbolic input shapes results in the same output shape + if pad_type == "same_lower" and backend.opset_version == ct.target.iOS15: + return + + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(1, 3, get_new_symbol(), get_new_symbol()), dtype=types.fp32) + ], + opset_version=backend.opset_version, + ) + def prog(x): + weight = np.random.rand(2, 3, 2, 2) + + # Basic conv + conv_1 = mb.conv(x=x, weight=weight) + conv_2 = mb.conv(x=x, weight=weight) + assert conv_1.shape == conv_2.shape + + # With strides / dialations + conv_1 = mb.conv(x=x, weight=weight, strides=[1, 2], dilations=[3, 4]) + conv_2 = mb.conv(x=x, weight=weight, strides=[1, 2], dilations=[3, 4]) + assert conv_1.shape == conv_2.shape + + # With padding + conv_1 = mb.conv(x=x, weight=weight, pad_type=pad_type, pad=[2, 3, 4, 5]) + conv_2 = mb.conv(x=x, weight=weight, pad_type=pad_type, pad=[2, 3, 4, 5]) + assert conv_1.shape == conv_2.shape + return conv_1 + @pytest.mark.skipif(not _HAS_TORCH, reason=MSG_TORCH_NOT_FOUND) @pytest.mark.parametrize( "compute_unit, backend, padding_mode, conv_dim", diff --git a/coremltools/converters/mil/mil/ops/tests/iOS14/test_linear.py b/coremltools/converters/mil/mil/ops/tests/iOS14/test_linear.py index c3fafa42c..0da0d8320 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS14/test_linear.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS14/test_linear.py @@ -10,7 +10,7 @@ import coremltools as ct from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import types +from coremltools.converters.mil.mil import get_new_symbol, types from coremltools.converters.mil.mil.ops.tests.iOS14 import backends from coremltools.converters.mil.mil.ops.tests.testing_utils import run_compare_builder from coremltools.converters.mil.mil.types import builtin_to_string, nptype_from_builtin @@ -109,7 +109,7 @@ def build(x): itertools.product(compute_units, backends, [types.int32, types.fp16, types.fp32]), ) def test_default_bias_type(self, compute_unit, backend, input_type): - # Test the default bias matches the dtype of x + # Test the default bias matches the dtype of x and weight. @mb.program( input_specs=[mb.TensorSpec(shape=(1, 2), dtype=types.fp32)], opset_version=backend.opset_version, @@ -365,3 +365,35 @@ def test_builder_eval(self): equation = "bcd,dce->bce" v = mb.einsum(values=(x_val, y_val), equation=equation) np.testing.assert_allclose(np.einsum(equation, x_val, y_val), v.val, atol=1e-04, rtol=1e-05) + + @pytest.mark.parametrize( + "backend", + backends, + ) + def test_symbolic_input_conv_and_einsum(self, backend): + """ + Test a pattern of: + + %1 = conv_1(%x) + %2 = conv_2(%x) + %3 = transpose(%2, [0, 3, 2, 1]) + %4 = einsum(%1, %3) + + If ``%x`` has symbolic shape and ``conv_1, conv_2`` have the same + configuration, the above program should pass the type inference. + """ + + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(1, 3, get_new_symbol(), get_new_symbol()), dtype=types.fp32) + ], + opset_version=backend.opset_version, + ) + def prog(x): + weight = np.random.rand(2, 3, 2, 2) + conv_1 = mb.conv(x=x, weight=weight) + conv_2 = mb.conv(x=x, weight=weight) + conv_2_transpose = mb.transpose(x=conv_2, perm=[0, 3, 2, 1]) + return mb.einsum(values=(conv_1, conv_2_transpose), equation="abcd,adce->abce") + + assert prog is not None diff --git a/coremltools/converters/mil/mil/ops/tests/iOS14/test_pool.py b/coremltools/converters/mil/mil/ops/tests/iOS14/test_pool.py index 63ffe3603..2cd35ff27 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS14/test_pool.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS14/test_pool.py @@ -10,13 +10,49 @@ import coremltools as ct from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import types +from coremltools.converters.mil.mil import get_new_symbol, types from coremltools.converters.mil.mil.ops.tests.iOS14 import backends from coremltools.converters.mil.mil.ops.tests.testing_utils import run_compare_builder from coremltools.converters.mil.testing_reqs import compute_units class TestAvgPool: + @pytest.mark.parametrize( + "backend, pad_type", + itertools.product( + backends, + ["valid", "same", "same_lower", "custom"], + ), + ) + def test_type_inference_cache(self, backend, pad_type): + # Test the type inference has the caching mechanism to ensure + # same symbolic input shapes results in the same output shape + if pad_type == "same_lower" and backend.opset_version == ct.target.iOS15: + return + + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(1, 3, get_new_symbol(), get_new_symbol()), dtype=types.fp32) + ], + opset_version=backend.opset_version, + ) + def prog(x): + # Basic pool + pool_1 = mb.avg_pool(x=x, kernel_sizes=[1, 2], pad_type=pad_type) + pool_2 = mb.avg_pool(x=x, kernel_sizes=[1, 2], pad_type=pad_type) + assert pool_1.shape == pool_1.shape + + # With strides + pool_1 = mb.avg_pool(x=x, kernel_sizes=[1, 2], strides=[1, 2], pad_type=pad_type) + pool_2 = mb.avg_pool(x=x, kernel_sizes=[1, 2], strides=[1, 2], pad_type=pad_type) + assert pool_1.shape == pool_1.shape + + # With padding + pool_1 = mb.avg_pool(x=x, kernel_sizes=[1, 2], pad_type=pad_type, pad=[2, 3, 4, 5]) + pool_2 = mb.avg_pool(x=x, kernel_sizes=[1, 2], pad_type=pad_type, pad=[2, 3, 4, 5]) + assert pool_1.shape == pool_2.shape + return pool_1 + @pytest.mark.parametrize( "compute_unit, backend, inputshape_kernelshape", itertools.product( diff --git a/coremltools/converters/mil/mil/ops/tests/iOS14/test_scatter_gather.py b/coremltools/converters/mil/mil/ops/tests/iOS14/test_scatter_gather.py index a10f516f4..8c7ef8374 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS14/test_scatter_gather.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS14/test_scatter_gather.py @@ -491,15 +491,18 @@ def prog(x): class TestGatherAlongAxis: @pytest.mark.parametrize( - "compute_unit, backend", - itertools.product(compute_units, backends), + "compute_unit, backend, x_dtype, indices_dtype", + itertools.product(compute_units, backends, [np.float32, np.float16, np.int32], [np.int32]), ) - def test_builder_to_backend_smoke(self, compute_unit, backend): - x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) - indices = np.array([[1, 0, 1], [1, 1, 0]], dtype=np.int32) + def test_builder_to_backend_smoke(self, compute_unit, backend, x_dtype, indices_dtype): + x = np.array([[1, 2, 3], [4, 5, 6]], dtype=x_dtype) + indices = np.array([[1, 0, 1], [1, 1, 0]], dtype=indices_dtype) + builtin_x_dtype = types.numpy_type_to_builtin_type(x_dtype) input_placeholders = { - "x": mb.placeholder(shape=x.shape), - "indices": mb.placeholder(shape=indices.shape, dtype=types.int32), + "x": mb.placeholder(shape=x.shape, dtype=builtin_x_dtype), + "indices": mb.placeholder( + shape=indices.shape, dtype=types.numpy_type_to_builtin_type(indices_dtype) + ), } input_values = {"x": x, "indices": indices} @@ -514,19 +517,19 @@ def build(x, indices): ] expected_output_types = [ - (2, 3, types.fp32), - (2, 3, types.fp32), - (2, 3, types.fp32), - (2, 3, types.fp32), - (2, 3, types.fp32), + (2, 3, builtin_x_dtype), + (2, 3, builtin_x_dtype), + (2, 3, builtin_x_dtype), + (2, 3, builtin_x_dtype), + (2, 3, builtin_x_dtype), ] expected_outputs = [ - np.array([[4, 2, 6], [4, 5, 3]], dtype=np.float32), - np.array([[2, 1, 2], [5, 5, 4]], dtype=np.float32), - np.array([[4, 2, 6], [4, 5, 3]], dtype=np.float32), - np.array([[2, 1, 2], [5, 5, 4]], dtype=np.float32), - np.array([[4, 2, 6], [4, 5, 3]], dtype=np.float32), + np.array([[4, 2, 6], [4, 5, 3]], dtype=x_dtype), + np.array([[2, 1, 2], [5, 5, 4]], dtype=x_dtype), + np.array([[4, 2, 6], [4, 5, 3]], dtype=x_dtype), + np.array([[2, 1, 2], [5, 5, 4]], dtype=x_dtype), + np.array([[4, 2, 6], [4, 5, 3]], dtype=x_dtype), ] run_compare_builder( @@ -566,30 +569,36 @@ def prog(x): @staticmethod def _test_builder_to_backend_programmatic( - compute_unit, backend, rank_axis, force_non_negative_indices + compute_unit, backend, rank_axis, x_dtype, indices_dtype, force_non_negative_indices ): rank, axis = rank_axis x_shape = np.random.randint(low=2, high=8, size=rank) indices_shape = np.copy(x_shape) indices_shape[axis] = np.random.randint(low=1, high=8) - x = np.random.rand(*x_shape).astype(np.float32) + x = np.random.rand(*x_shape).astype(x_dtype) - # IOS17 gather_along_axis requires non-negative indices. - lower_bound = 0 if force_non_negative_indices else -x_shape[axis] - indices = np.random.randint(lower_bound, x_shape[axis], size=indices_shape).astype(np.int32) + lower_bound = -x_shape[axis] + if force_non_negative_indices or np.issubdtype(indices_dtype, np.unsignedinteger): + lower_bound = 0 + indices = np.random.randint(lower_bound, x_shape[axis], size=indices_shape).astype( + indices_dtype + ) def build(x, indices): return mb.gather_along_axis(x=x, indices=indices, axis=axis) + builtin_x_dtype = types.numpy_type_to_builtin_type(x_dtype) input_placeholders = { - "x": mb.placeholder(shape=x.shape), - "indices": mb.placeholder(shape=indices.shape, dtype=types.int32), + "x": mb.placeholder(shape=x.shape, dtype=builtin_x_dtype), + "indices": mb.placeholder( + shape=indices.shape, dtype=types.numpy_type_to_builtin_type(indices_dtype) + ), } input_values = {"x": x, "indices": indices} - expected_output_types = tuple(indices_shape[:]) + (types.fp32,) + expected_output_types = tuple(indices_shape[:]) + (builtin_x_dtype,) expected_output = np.take_along_axis(x, indices, axis=axis) run_compare_builder( @@ -604,15 +613,21 @@ def build(x, indices): @mark_api_breaking(breaking_opset_version=ct.target.iOS17) @pytest.mark.parametrize( - "compute_unit, backend, rank_axis", + "compute_unit, backend, rank_axis, x_dtype, indices_dtype", itertools.product( compute_units, backends, [(rank, axis) for rank in range(1, 5) for axis in range(-rank, rank)], + [np.float32, np.float16, np.int32], + [np.int32], ), ) - def test_builder_to_backend_programmatic(self, compute_unit, backend, rank_axis): - self._test_builder_to_backend_programmatic(compute_unit, backend, rank_axis, False) + def test_builder_to_backend_programmatic( + self, compute_unit, backend, rank_axis, x_dtype, indices_dtype + ): + self._test_builder_to_backend_programmatic( + compute_unit, backend, rank_axis, x_dtype, indices_dtype, False + ) @pytest.mark.parametrize( "backend, indices_val, validate_indices", diff --git a/coremltools/converters/mil/mil/ops/tests/iOS14/test_tensor_operation.py b/coremltools/converters/mil/mil/ops/tests/iOS14/test_tensor_operation.py index 53b719c0b..a9f43104c 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS14/test_tensor_operation.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS14/test_tensor_operation.py @@ -1006,6 +1006,17 @@ def prog(x, y): pad = mb.reshape(x=y, shape=[-1]) res = mb.pad(x=x, pad=pad) + @staticmethod + def test_error_out_with_invalid_padding_value(): + with pytest.raises( + ValueError, + match=r"pad must be non-negative integer, got -1022 at index 6", + ): + + @mb.program(input_specs=[mb.TensorSpec(shape=(1, 48, 1, 1024))]) + def prog(x): + y = mb.pad(x=x, pad=[0, 0, 0, 0, 0, 0, -1022, 0], mode="constant") + return y class TestRange1d: @pytest.mark.parametrize( diff --git a/coremltools/converters/mil/mil/ops/tests/iOS14/test_tensor_transformation.py b/coremltools/converters/mil/mil/ops/tests/iOS14/test_tensor_transformation.py index ab24a6eea..1a1d31fc3 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS14/test_tensor_transformation.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS14/test_tensor_transformation.py @@ -344,6 +344,15 @@ def build(x): backend=backend, ) + @staticmethod + def test_expand_dims_value_inference_is_inplace(): + @mb.program() + def prog(): + const = mb.const(val=[[2, 3], [4, 5]]) + x = mb.expand_dims(x=const, axes=(1, 2)) + x.val[0, 0, 0, 0] = 112 + assert const.val[0, 0] == 112 + return x class TestReshape: @pytest.mark.parametrize( @@ -491,6 +500,16 @@ def prog(x): assert res_sym_val[0][1] == shape.sym_val[1] return res + @staticmethod + def test_reshape_value_inference_is_inplace(): + @mb.program() + def prog(): + const = mb.const(val=[[2, 3], [4, 5]]) + x = mb.reshape(x=const, shape=(4, 1)) + x.val[0, 0] = 112 + assert const.val[0, 0] == 112 + return x + class TestReverse: @pytest.mark.parametrize( "compute_unit, backend", @@ -891,7 +910,7 @@ def test_builder_eval(self): mb.slice_by_index( x=x_val, begin=[1, 1, 1], - end=[2, 3, 4], + end=[2, 3, 3], stride=[1, 1, 2], begin_mask=[False, False, True], end_mask=[True, False, False], @@ -1239,6 +1258,16 @@ def test_builder_eval_rank_0(self): assert type(v.val) == np.float32 assert np.isclose(np.squeeze(x), v.val) + @staticmethod + def test_squeeze_value_inference_is_inplace(): + @mb.program() + def prog(): + const = mb.const(val=[[[2, 3], [4, 5]]]) + x = mb.squeeze(x=const, axes=(0,)) + x.val[0, 0] = 112 + assert const.val[0, 0, 0] == 112 + return x + class TestTranspose: @pytest.mark.parametrize( @@ -1341,6 +1370,15 @@ def build(x): backend=backend, ) + @staticmethod + def test_transpose_value_inference_is_inplace(): + @mb.program() + def prog(): + const = mb.const(val=[[2, 3], [4, 5]]) + x = mb.transpose(x=const, perm=(0, 1)) + x.val[0, 0] = 112 + assert const.val[0, 0] == 112 + return x class TestPixelShuffle: @pytest.mark.parametrize( diff --git a/coremltools/converters/mil/mil/ops/tests/iOS16/test_conv.py b/coremltools/converters/mil/mil/ops/tests/iOS16/test_conv.py index 5782f39c9..77f875cf8 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS16/test_conv.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS16/test_conv.py @@ -3,23 +3,18 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause -import itertools import numpy as np import pytest -from coremltools.converters.mil import testing_reqs from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import types from coremltools.converters.mil.mil.ops.tests.iOS16 import backends from coremltools.converters.mil.testing_utils import get_op_types_in_program -compute_units = testing_reqs.compute_units - - class TestConvolution: - @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) - def test_type_inference_with_constexpr_ops(self, compute_unit, backend): + @pytest.mark.parametrize("backend", backends) + def test_type_inference_with_constexpr_ops(self, backend): # Test the type inference of the conv op doesn't error out for constexpr bias @mb.program( input_specs=[mb.TensorSpec(shape=(1, 3, 4, 4), dtype=types.fp32)], diff --git a/coremltools/converters/mil/mil/ops/tests/iOS16/test_scatter_gather.py b/coremltools/converters/mil/mil/ops/tests/iOS16/test_scatter_gather.py index 4e1918623..7d6fa1fbb 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS16/test_scatter_gather.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS16/test_scatter_gather.py @@ -11,6 +11,9 @@ import coremltools as ct from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import types +from coremltools.converters.mil.mil.ops.tests.iOS14.test_scatter_gather import ( + TestGatherAlongAxis as _TestGatherAlongAxis_iOS14, +) from coremltools.converters.mil.mil.ops.tests.iOS16 import backends from coremltools.converters.mil.mil.ops.tests.testing_utils import ( mark_api_breaking, @@ -21,17 +24,24 @@ class TestGather: @pytest.mark.parametrize( - "compute_unit, backend", - itertools.product(compute_units, backends), + "compute_unit, backend, x_dtype, indices_dtype", + itertools.product( + compute_units, + backends, + [np.float32, np.float16, np.int32], + [np.int32, np.int16, np.uint16], + ), ) - def test_builder_to_backend_smoke_batch_dims(self, compute_unit, backend): - # TODO MAKE SURE RUN ON IOS17 - x = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=np.float32) - indices = np.array([[[1, 0], [0, 1]], [[1, 0], [0, 0]]], dtype=np.int32) + def test_builder_to_backend_smoke(self, compute_unit, backend, x_dtype, indices_dtype): + x = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=x_dtype) + indices = np.array([[[1, 0], [0, 1]], [[1, 0], [0, 0]]], dtype=indices_dtype) + builtin_x_dtype = types.numpy_type_to_builtin_type(x_dtype) input_placeholders = { - "x": mb.placeholder(shape=x.shape), - "indices": mb.placeholder(shape=indices.shape, dtype=types.int32), + "x": mb.placeholder(shape=x.shape, dtype=builtin_x_dtype), + "indices": mb.placeholder( + shape=indices.shape, dtype=types.numpy_type_to_builtin_type(indices_dtype) + ), } input_values = {"x": x, "indices": indices} @@ -46,11 +56,11 @@ def build(x, indices): ] expected_output_types = [ - (2, 2, 2, 2, 3, types.fp32), - (2, 2, 2, 3, types.fp32), - (2, 2, 2, 2, 2, types.fp32), - (2, 2, 2, 2, types.fp32), - (2, 2, 2, types.fp32), + (2, 2, 2, 2, 3, builtin_x_dtype), + (2, 2, 2, 3, builtin_x_dtype), + (2, 2, 2, 2, 2, builtin_x_dtype), + (2, 2, 2, 2, builtin_x_dtype), + (2, 2, 2, builtin_x_dtype), ] expected_outputs = [ @@ -65,14 +75,14 @@ def build(x, indices): [[[10, 11, 12], [7, 8, 9]], [[7, 8, 9], [7, 8, 9]]], ], ], - dtype=np.float32, + dtype=x_dtype, ), np.array( [ [[[4, 5, 6], [1, 2, 3]], [[1, 2, 3], [4, 5, 6]]], [[[10, 11, 12], [7, 8, 9]], [[7, 8, 9], [7, 8, 9]]], ], - dtype=np.float32, + dtype=x_dtype, ), np.array( [ @@ -82,13 +92,13 @@ def build(x, indices): [[[11, 10], [10, 11]], [[11, 10], [10, 10]]], ], ], - dtype=np.float32, + dtype=x_dtype, ), np.array( [[[[2, 1], [1, 2]], [[5, 4], [4, 5]]], [[[8, 7], [7, 7]], [[11, 10], [10, 10]]]], - dtype=np.float32, + dtype=x_dtype, ), - np.array([[[2, 1], [4, 5]], [[8, 7], [10, 10]]], dtype=np.float32), + np.array([[[2, 1], [4, 5]], [[8, 7], [10, 10]]], dtype=x_dtype), ] run_compare_builder( @@ -127,19 +137,57 @@ def prog(x): ) +class TestGatherAlongAxis(_TestGatherAlongAxis_iOS14): + @pytest.mark.parametrize( + "compute_unit, backend, x_dtype, indices_dtype", + itertools.product( + compute_units, + backends, + [np.float32, np.float16, np.int32], + [np.int32, np.int16, np.uint16], + ), + ) + def test_builder_to_backend_smoke(self, compute_unit, backend, x_dtype, indices_dtype): + super().test_builder_to_backend_smoke(compute_unit, backend, x_dtype, indices_dtype) + + @pytest.mark.parametrize( + "compute_unit, backend, rank_axis, x_dtype, indices_dtype", + itertools.product( + compute_units, + backends, + [(rank, axis) for rank in range(1, 5) for axis in range(-rank, rank)], + [np.float32, np.float16, np.int32], + [np.int32, np.int16, np.uint16], + ), + ) + def test_builder_to_backend_programmatic( + self, compute_unit, backend, rank_axis, x_dtype, indices_dtype + ): + super()._test_builder_to_backend_programmatic( + compute_unit, backend, rank_axis, x_dtype, indices_dtype, True + ) + + class TestGatherNd: @pytest.mark.parametrize( - "compute_unit, backend", - itertools.product(compute_units, backends), + "compute_unit, backend, x_dtype, indices_dtype", + itertools.product( + compute_units, + backends, + [np.float32, np.float16, np.int32], + [np.int32, np.int16, np.uint16], + ), ) - def test_builder_to_backend_smoke_batch_dims(self, compute_unit, backend): - # TODO MAKE SURE RUN ON IOS17 + def test_builder_to_backend_smoke(self, compute_unit, backend, x_dtype, indices_dtype): x = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=np.float32) indices = np.array([[[1, 0], [0, 1]], [[1, 0], [0, 0]]], dtype=np.int32) + builtin_x_dtype = types.numpy_type_to_builtin_type(x_dtype) input_placeholders = { - "x": mb.placeholder(shape=x.shape), - "indices": mb.placeholder(shape=indices.shape, dtype=types.int32), + "x": mb.placeholder(shape=x.shape, dtype=builtin_x_dtype), + "indices": mb.placeholder( + shape=indices.shape, dtype=types.numpy_type_to_builtin_type(indices_dtype) + ), } input_values = {"x": x, "indices": indices} @@ -150,11 +198,11 @@ def build(x, indices): mb.gather_nd(x=x, indices=indices, batch_dims=1), ] - expected_output_types = [(2, 2, 3, types.fp32), (2, 2, types.fp32)] + expected_output_types = [(2, 2, 3, builtin_x_dtype), (2, 2, builtin_x_dtype)] expected_outputs = [ - np.array([[[7, 8, 9], [4, 5, 6]], [[7, 8, 9], [1, 2, 3]]], dtype=np.float32), - np.array([[4, 2], [10, 7]], dtype=np.float32), + np.array([[[7, 8, 9], [4, 5, 6]], [[7, 8, 9], [1, 2, 3]]], dtype=x_dtype), + np.array([[4, 2], [10, 7]], dtype=x_dtype), ] run_compare_builder( diff --git a/coremltools/converters/mil/mil/ops/tests/iOS17/test_linear.py b/coremltools/converters/mil/mil/ops/tests/iOS17/test_linear.py index a020003f6..ce014a3a1 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS17/test_linear.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS17/test_linear.py @@ -13,6 +13,7 @@ from coremltools.converters.mil.mil import types from coremltools.converters.mil.mil.ops.tests.iOS17 import backends from coremltools.converters.mil.mil.ops.tests.testing_utils import run_compare_builder +from coremltools.converters.mil.mil.types import builtin_to_string, nptype_from_builtin from coremltools.converters.mil.mil.types.type_mapping import numpy_type_to_builtin_type from coremltools.converters.mil.testing_reqs import compute_units @@ -59,6 +60,29 @@ def build(x): backend=backend, ) + @pytest.mark.parametrize( + "compute_unit, backend, x_input_type, weight_input_type", + itertools.product( + compute_units, + backends, + [types.int32, types.fp16, types.fp32], + [types.int32, types.fp16, types.fp32], + ), + ) + def test_default_bias_type_ios17(self, compute_unit, backend, x_input_type, weight_input_type): + # Start from iOS17, x and weight can have different dtype. + # Test the default bias matches the dtype of weight. + @mb.program( + input_specs=[mb.TensorSpec(shape=(1, 2), dtype=types.fp32)], + opset_version=backend.opset_version, + ) + def prog(x): + x = mb.cast(x=x, dtype=builtin_to_string(x_input_type)) + weight = np.random.rand(3, 2).astype(nptype_from_builtin(weight_input_type)) + res = mb.linear(x=x, weight=weight) + assert res.op.bias.val.dtype == nptype_from_builtin(weight_input_type) + return res + class TestMatMul: @pytest.mark.parametrize( diff --git a/coremltools/converters/mil/mil/ops/tests/iOS17/test_scatter_gather.py b/coremltools/converters/mil/mil/ops/tests/iOS17/test_scatter_gather.py index 7199c8a2c..6d5d0cdeb 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS17/test_scatter_gather.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS17/test_scatter_gather.py @@ -11,10 +11,16 @@ from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import types from coremltools.converters.mil.mil.ops.tests.iOS14.test_scatter_gather import ( - TestGatherAlongAxis as _TestGatherAlongAxis_iOS14, + TestGatherAlongAxis as _TestGatherAlongAxisIOS14, ) from coremltools.converters.mil.mil.ops.tests.iOS14.test_scatter_gather import ( - TestScatterAlongAxis as _TestScatterAlongAxis_iOS14, + TestScatterAlongAxis as _TestScatterAlongAxisIOS14, +) +from coremltools.converters.mil.mil.ops.tests.iOS16.test_scatter_gather import ( + TestGather as _TestGatherIOS16, +) +from coremltools.converters.mil.mil.ops.tests.iOS16.test_scatter_gather import ( + TestGatherNd as _TestGatherNdIOS16, ) from coremltools.converters.mil.mil.ops.tests.iOS17 import backends from coremltools.converters.mil.mil.ops.tests.testing_utils import run_compare_builder @@ -107,7 +113,7 @@ class TestScatterAlongAxis: ), ) def test_builder_to_backend_programmatic(self, compute_unit, backend, rank_axis): - _TestScatterAlongAxis_iOS14._test_builder_to_backend_programmatic( + _TestScatterAlongAxisIOS14._test_builder_to_backend_programmatic( compute_unit, backend, rank_axis, force_non_negative_indices=True ) @@ -241,7 +247,19 @@ def build_dynamic(data, indices, updates): assert any([err in str(excinfo.value) for err in expected_error_msg]) -class TestGather: +class TestGather(_TestGatherIOS16): + @pytest.mark.parametrize( + "compute_unit, backend, x_dtype, indices_dtype", + itertools.product( + compute_units, + backends, + [np.float32, np.float16, np.int32, np.int16, np.uint16, np.int8, np.uint8], + [np.int32, np.int16, np.uint16, np.int8, np.uint8], + ), + ) + def test_builder_to_backend_smoke(self, compute_unit, backend, x_dtype, indices_dtype): + super().test_builder_to_backend_smoke(compute_unit, backend, x_dtype, indices_dtype) + @pytest.mark.parametrize( "backend, indices_val, validate_indices", itertools.product(backends, [[-1, 0], [0, 3]], [True, False]), @@ -276,16 +294,20 @@ def prog(x): class TestGatherAlongAxis: @pytest.mark.parametrize( - "compute_unit, backend, rank_axis", + "compute_unit, backend, rank_axis, x_dtype, indices_dtype", itertools.product( compute_units, backends, - [(rank, axis) for rank in range(1, 5) for axis in range(-rank, rank)], + [(rank, axis) for rank in (3,) for axis in (-rank, 0, rank - 1)], + [np.float32, np.float16, np.int32, np.int16, np.uint16, np.int8, np.uint8], + [np.int32, np.int16, np.uint16, np.int8, np.uint8], ), ) - def test_builder_to_backend_programmatic(self, compute_unit, backend, rank_axis): - _TestGatherAlongAxis_iOS14._test_builder_to_backend_programmatic( - compute_unit, backend, rank_axis, True + def test_builder_to_backend_programmatic( + self, compute_unit, backend, rank_axis, x_dtype, indices_dtype + ): + _TestGatherAlongAxisIOS14._test_builder_to_backend_programmatic( + compute_unit, backend, rank_axis, x_dtype, indices_dtype, True ) @pytest.mark.parametrize( @@ -327,7 +349,20 @@ def prog(x): opset_version=backend.opset_version, )(prog) -class TestGatherNd: + +class TestGatherNd(_TestGatherNdIOS16): + @pytest.mark.parametrize( + "compute_unit, backend, x_dtype, indices_dtype", + itertools.product( + compute_units, + backends, + [np.float32, np.float16, np.int32, np.int16, np.uint16, np.int8, np.uint8], + [np.int32, np.int16, np.uint16, np.int8, np.uint8], + ), + ) + def test_builder_to_backend_smoke(self, compute_unit, backend, x_dtype, indices_dtype): + super().test_builder_to_backend_smoke(compute_unit, backend, x_dtype, indices_dtype) + @pytest.mark.parametrize( "backend, indices_val, validate_indices", itertools.product( diff --git a/coremltools/converters/mil/mil/passes/defs/optimize_quantization.py b/coremltools/converters/mil/mil/passes/defs/optimize_quantization.py index f26045a0d..3b8125c4f 100644 --- a/coremltools/converters/mil/mil/passes/defs/optimize_quantization.py +++ b/coremltools/converters/mil/mil/passes/defs/optimize_quantization.py @@ -21,6 +21,136 @@ from coremltools.converters.mil.mil.passes.pass_registry import register_pass +@register_pass(namespace="common") +class merge_tensorwise_affine_dequantize_with_consecutive_ops(AbstractGraphPass): + """ + This graph pass does const folding to a chain of supported ops starts with a + tensor-wise ``constexpr_affine_dequantize`` op. i.e., both ``scale`` and + ``zero_point`` are scalar (rank 0). + + For example: + Input graph: + data -> constexpr_affine_dequantize -> transpose -> expand_dims -> out + + Output graph: + new_data -> constexpr_affine_dequantize -> out + + where ``new_data`` is computed by ``data -> transpose -> expand_dims``. + + Note that, the graph pass only supports const folding of a single linked list pattern. + For example, the following pattern will not be changed: + + data ---> constexpr_affine_dequantize -> transpose -> out + | + --> constexpr_affine_dequantize -> reshape -> out_2 + """ + + SUPPORTED_OPS = [ + "transpose", + "reshape", + "expand_dims", + "squeeze", + ] + + def apply(self, prog): + for f in prog.functions.values(): + block_changed = True + while block_changed: + block_changed = self.merge_tensorwise_affine_dequantize_with_consecutive_ops_block( + f + ) + + @block_context_manager + def merge_tensorwise_affine_dequantize_with_consecutive_ops_block(self, block): + fusion_status = False + for op in list(block.operations): + for b in op.blocks: + block_changed = True + while block_changed: + block_changed = ( + self.merge_tensorwise_affine_dequantize_with_consecutive_ops_block(b) + ) + + if op.op_type != "constexpr_affine_dequantize": + continue + + fusion_status = self._try_to_transform(op, block) + if fusion_status: + return fusion_status + return fusion_status + + @staticmethod + def _apply_equivalent_transform(val, op): + if op.op_type not in merge_tensorwise_affine_dequantize_with_consecutive_ops.SUPPORTED_OPS: + raise ValueError(f"unsupported op_type {op.op_type}") + + if op.op_type == "transpose": + return np.transpose(val, axes=op.perm.val) + if op.op_type == "reshape": + return np.reshape(val, op.outputs[0].shape) + if op.op_type == "expand_dims": + return np.expand_dims(val, axis=op.axes.val.tolist()) + if op.op_type == "squeeze": + axes = op.axes + if axes is None or axes.val is None: + return np.squeeze(val) + return np.squeeze(val, axis=tuple(op.axes.val.tolist())) + + @staticmethod + def _try_to_transform(op, block): + # first check if it is tensorwise quantization + if op.scale.rank != 0 or op.zero_point.rank != 0: + return False + + # first check if quantized_data only feeds into a single op + if len(op.quantized_data.child_ops) != 1: + return False + + # traverse the graph to get a chain of applicable ops to fold + ops_to_fold = [] + cursor = op + while True: + prev_cursor = cursor + if cursor.outputs[0] in block.outputs: + break + for val in merge_tensorwise_affine_dequantize_with_consecutive_ops.SUPPORTED_OPS: + if _check_child_op_type(cursor, val): + ops_to_fold.append(cursor.outputs[0].child_ops[0]) + cursor = ops_to_fold[-1] + break + if prev_cursor == cursor: + break + + if len(ops_to_fold) == 0: + return False + + # do the same transformation on the source quantized data + cursor = op.quantized_data.val + for val in ops_to_fold: + cursor = ( + merge_tensorwise_affine_dequantize_with_consecutive_ops._apply_equivalent_transform( + cursor, val + ) + ) + + # after transformation, we create a new constexpr_affine_dequantize op and do the replacement + new_var = mb.constexpr_affine_dequantize( + quantized_data=cursor, + zero_point=op.zero_point, + scale=op.scale, + axis=op.axis, + name=ops_to_fold[-1].outputs[0].name, + before_op=ops_to_fold[-1], + ) + block.replace_uses_of_var_after_op( + anchor_op=ops_to_fold[-1], + old_var=ops_to_fold[-1].outputs[0], + new_var=new_var, + force_replace=True, + ) + block.remove_ops([op] + ops_to_fold) + return True + @register_pass(namespace="common") class int_op_canonicalization(AbstractGraphPass): """ diff --git a/coremltools/converters/mil/mil/passes/defs/optimize_tensor_operation.py b/coremltools/converters/mil/mil/passes/defs/optimize_tensor_operation.py index 5733ab1b7..f0c0fd8ff 100644 --- a/coremltools/converters/mil/mil/passes/defs/optimize_tensor_operation.py +++ b/coremltools/converters/mil/mil/passes/defs/optimize_tensor_operation.py @@ -19,6 +19,83 @@ from coremltools.converters.mil.mil.types.symbolic import any_symbolic +@register_pass(namespace="common") +class fuse_squeeze_expand_dims(AbstractGraphPass): + """ + Detect the pattern ``input-->squeeze-->expand_dims``, and fuse + them into an ``identity`` op if ``squeeze`` and ``expand_dims`` cancel out each other. + Note that, the ``identity`` can be further removed by ``noop_elimination``. + + .. code-block:: + + Given: + %x[3, 1, 4, 1] + %1[3, 4] = squeeze(%x, axes=[1, 3]) + %2[3, 1, 4, 1] = expand_dims(%1, axes=[1, 3]) + %3 = op(%2) + + Result: + %x[3, 1, 4, 1] + %2[3, 1, 4, 1] = identity(%x) + %3 = op(%2) + """ + + def apply(self, prog): + for f in prog.functions.values(): + block_changed = True + while block_changed: + block_changed = self.fuse_squeeze_expand_dims_block(f) + + @block_context_manager + def fuse_squeeze_expand_dims_block(self, block): + fusion_status = False + for op in list(block.operations): + for b in op.blocks: + block_changed = True + while block_changed: + block_changed = self.fuse_squeeze_expand_dims_block(b) + + if len(op.blocks) > 0: + continue + + squeeze_op = self._match_pattern(op) + if squeeze_op is not None: + fusion_status = self._try_to_transform(squeeze_op, block) + # has to break as the downstream iterator is affected. + if fusion_status: + return fusion_status + return fusion_status + + @staticmethod + def _match_pattern(op): + if op.op_type != "squeeze": + return None + if not _check_child_op_type(op, "expand_dims"): + return None + return op + + @staticmethod + def _try_to_transform(op, block): + expand_dims_op = op.outputs[0].child_ops[0] + x = op.x + out_var = expand_dims_op.outputs[0] + if x.shape != out_var.shape: + return False + if op.outputs[0] in block.outputs: + return False + + new_var = mb.identity(x=x, before_op=op) + if op.enclosing_block.try_replace_uses_of_var_after_op( + anchor_op=expand_dims_op, + old_var=out_var, + new_var=new_var, + ): + # Remove all the ops at once + block.remove_ops([op, expand_dims_op]) + return True + return False + + @register_pass(namespace="common") class expand_high_rank_reshape_and_transpose(AbstractGraphPass): """ diff --git a/coremltools/converters/mil/mil/passes/defs/quantization.py b/coremltools/converters/mil/mil/passes/defs/quantization.py index 4ddea93a6..fab3e1656 100644 --- a/coremltools/converters/mil/mil/passes/defs/quantization.py +++ b/coremltools/converters/mil/mil/passes/defs/quantization.py @@ -3,6 +3,7 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause +from abc import abstractmethod from enum import Enum as _Enum from typing import Set, Text @@ -11,10 +12,13 @@ from coremltools.converters.mil._deployment_compatibility import AvailableTarget from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import Operation, types +from coremltools.converters.mil.mil.block import is_current_opset_version_compatible_with +from coremltools.converters.mil.mil.ops.registry import SSAOpRegistry from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass from coremltools.converters.mil.mil.passes.helper import block_context_manager from coremltools.converters.mil.mil.passes.pass_registry import register_pass from coremltools.converters.mil.mil.program import Program +from coremltools.converters.mil.mil.types.symbolic import is_symbolic class ComputePrecision(_Enum): @@ -30,7 +34,6 @@ class AbstractQuantizationPass(AbstractGraphPass): - is_valid_op(op) - transform_op(op) """ - type_eps = {} type_min = {} type_negmin = {} @@ -119,7 +122,127 @@ def __str__(self): return type(self).__name__ -class FP16ComputePrecision(AbstractQuantizationPass): +class CastTypeQuantization(AbstractQuantizationPass): + """ + Base class for all type casting related quantization, such as fp32->fp16, int32->int16, etc. + + For each valid op, if the "op_selector" return True: + - For each input with dtype `origin_dtype`, inject a "cast" op to change it to `target_dtype`. + - For each output with dtype `target_dtype`, inject a "cast" op to change it back to `origin_dtype`. + All child classes need to specify `origin_dtype` and `target_dtype`. + """ + + def __init__(self, op_selector=None): + super().__init__(op_selector=op_selector) + + # Var that feeds into multiple ops will be cast once and cached into this dict + # For reference: Checkout test_single_input_to_multiple_operations in `TestFP16CastTransform`. + self.cache_vars = {} + + @property + @abstractmethod + def origin_dtype(self) -> str: + """Original dtype that need to be cast, such as fp32.""" + raise NotImplementedError("origin_dtype must be specified in subclass.") + + @property + @abstractmethod + def target_dtype(self) -> str: + """Target dtype, such as fp16.""" + raise NotImplementedError("target_dtype must be specified in subclass.") + + def should_cast_parameter(self, op: Operation, param_name: str) -> bool: + """ + Determines if a param of an op should be cast to target_dtype. + + There are two cases that an op shouldn't be cast: + 1. The op's parameter doesn't support target_dtype. + 2. The cast op itself doesn't support target_dtype + """ + type_domain = getattr(op.input_spec.input_types[param_name], "type_domain", None) + if type_domain and types.string_to_builtin(self.target_dtype) not in type_domain: + return False + if self.target_dtype not in SSAOpRegistry._get_core_op_cls("cast").supported_dtypes(): + return False + + return True + + def transform_op(self, op) -> None: + """Transform the input(s)/output(s) dtypes of the op.""" + block = op.enclosing_block + casted_inputs = {} + inputs_modified = False + + for param, inputs in op.inputs.items(): + if not self.should_cast_parameter(op, param): + continue + + is_list_input = isinstance(inputs, (list, tuple)) + if not is_list_input: + inputs = [inputs] + + casted_inputs[param] = list(inputs[:]) + for i, var in enumerate(inputs): + if not var.is_tensor_or_scalar_of(dtype=self.origin_dtype): + continue + + inputs_modified = True + casted_var_name = f"{var.name}_to_{self.target_dtype}" + if ( + len(var._child_ops) > 1 + and casted_var_name in self.cache_vars + and (block.is_var_visible_in_block(self.cache_vars[casted_var_name])) + ): + casted_inputs[param][i] = self.cache_vars[casted_var_name] + else: + x = mb.cast(x=var, dtype=self.target_dtype, name=casted_var_name, before_op=op) + if self.target_dtype == "fp16": + self._check_underflow_to_zero(x, var) + + casted_inputs[param][i] = x + if len(var._child_ops) > 1: + self.cache_vars[casted_var_name] = casted_inputs[param][i] + + if not is_list_input: + casted_inputs[param] = casted_inputs[param][0] + + if inputs_modified: + casted_inputs.update({k: v for k, v in op.inputs.items() if k not in casted_inputs}) + casted_inputs["name"] = f"{op.name}_cast_{self.target_dtype}" + casted_inputs["before_op"] = op + quant_output = getattr(mb, op.op_type)(**casted_inputs) + + if not isinstance(quant_output, (list, tuple)): + quant_output = [quant_output] + + for old_output_var, new_output_var in zip(op.outputs, quant_output): + if old_output_var.is_tensor_or_scalar_of(dtype=self.origin_dtype) and ( + not new_output_var.is_tensor_or_scalar_of(dtype=self.origin_dtype) + ): + x = mb.cast( + x=new_output_var, + dtype=self.origin_dtype, + name=f"{new_output_var.name}_to_{self.origin_dtype}", + before_op=op, + ) + op.enclosing_block.replace_uses_of_var_after_op( + anchor_op=op, + old_var=old_output_var, + new_var=x, + force_replace=True, + ) + else: + op.enclosing_block.replace_uses_of_var_after_op( + anchor_op=op, + old_var=old_output_var, + new_var=new_output_var, + force_replace=True, + ) + + block.remove_ops([op]) + + +class FP16ComputePrecision(CastTypeQuantization): """ This transform does the following, for each valid op and if the "op_selector" return True: - For each input of dtype float32, inject a "cast" op to change it to float16 dtype @@ -137,21 +260,43 @@ class FP16ComputePrecision(AbstractQuantizationPass): } _ELEMENTWISE_UNARY_EPSILON_OPS: Set[str] = {"inverse", "log", "rsqrt"} + # Unsupported op for fp16 casting + _UNSUPPORTED_FP16_OPS: Set[str] = { + "cast", + "while_loop", + "cond", + # TODO: Remove after supporting FP16 dynamic quantize transformation for list ops (rdar://74458192) + "make_list", + "list_gather", + "list_scatter", + "list_read", + "list_write", + "list_length", + } + def __init__(self, op_selector=None): super(FP16ComputePrecision, self).__init__(op_selector=op_selector) - self.target_dtype = "fp16" - # Var that feeds into multiple ops will be casted once and cached into this dict - # For reference: Checkout test_single_input_to_multiple_operations in `TestFP16CastTransform`. - self.cache_vars = {} + @property + def origin_dtype(self) -> str: + return "fp32" - def fp16_overflow(self, op: Operation) -> bool: - # This overflow check consists of two parts: - # 1. For valid fp32 numbers (abs < 1e38), we want their exact values, - # so we make sure they are within fp16 range [-65504, 65504] - # 2. For inifinities (abs >= 1e38), their exact values does not matter, - # so we can always downcast them to fp16 inf. For example, in attention mask - # we just want -inf to make the masked entries have 0 probability after softmax + @property + def target_dtype(self) -> str: + return "fp16" + + @staticmethod + def fp16_overflow(op: Operation) -> bool: + """ + Determines if any of the op's input will overflow when represented by FP16. + + This overflow check consists of two parts: + 1. For valid fp32 numbers (abs < 1e38), we want their exact values, + so we make sure they are within fp16 range [-65504, 65504] + 2. For inifinities (abs >= 1e38), their exact values does not matter, + so we can always downcast them to fp16 inf. For example, in attention mask + we just want -inf to make the masked entries have 0 probability after softmax + """ for _, inputs in op.inputs.items(): is_list_input = isinstance(inputs, (list, tuple)) if not is_list_input: @@ -170,18 +315,7 @@ def fp16_overflow(self, op: Operation) -> bool: def is_valid_op(self, op: Operation) -> bool: """Determines if op is valid for fp16 casting.""" - if op.op_type in ["cast", "while_loop", "cond"]: - return False - - # TODO: Remove after supporting FP16 dynamic quantize transformation for list ops (rdar://74458192) - if op.op_type in [ - "make_list", - "list_gather", - "list_scatter", - "list_read", - "list_write", - "list_length", - ]: + if op.op_type in self._UNSUPPORTED_FP16_OPS: return False if self.fp16_overflow(op): @@ -190,13 +324,11 @@ def is_valid_op(self, op: Operation) -> bool: return True def should_cast_parameter(self, op: Operation, param_name: str) -> bool: - """Determines if a param of an op should be casted to fp16.""" - # Make sure the param is valid for fp16 when type domain is specified. - type_domain = getattr(op.input_spec.input_types[param_name], "type_domain", None) - if type_domain and types.fp16 not in type_domain: + """Determines if a param of an op should be cast to fp16.""" + if not super().should_cast_parameter(op, param_name): return False - if op.opset_version >= AvailableTarget.iOS17: + if is_current_opset_version_compatible_with(AvailableTarget.iOS17): # In IOS17+ activation ops with alpha/beta support mixed precision, and we don't want to # cast alpha/beta to fp16 for better numerical accuracy. if op.op_type in self._ACTIVATION_ALPHA_OPS and param_name == "alpha": @@ -239,80 +371,6 @@ def _check_underflow_to_zero(self, new_var, var): else: new_var._sym_val.val = new_val.reshape(new_var.val.shape) - def transform_op(self, op): - block = op.enclosing_block - casted_inputs = {} - inputs_modified = False - - for param, inputs in op.inputs.items(): - # First loop, iterates over all the input parameters of an operation. - if not self.should_cast_parameter(op, param): - continue - - is_list_input = isinstance(inputs, (list, tuple)) - if not is_list_input: - inputs = [inputs] - - casted_inputs[param] = list(inputs[:]) - for i, var in enumerate(inputs): - # Second loop, iterates over all the vars of a python list corresponding to an input parameter. - if not var.is_tensor_or_scalar_of(dtype="fp32"): - continue - - inputs_modified = True - casted_var_name = var.name + "_to_fp16" - if ( - len(var._child_ops) > 1 - and casted_var_name in self.cache_vars - and (block.is_var_visible_in_block(self.cache_vars[casted_var_name])) - ): - casted_inputs[param][i] = self.cache_vars[casted_var_name] - else: - x = mb.cast(x=var, dtype="fp16", name=casted_var_name, before_op=op) - self._check_underflow_to_zero(x, var) - - casted_inputs[param][i] = x - if len(var._child_ops) > 1: - self.cache_vars[casted_var_name] = casted_inputs[param][i] - - if not is_list_input: - casted_inputs[param] = casted_inputs[param][0] - - if inputs_modified: - casted_inputs.update({k: v for k, v in op.inputs.items() if k not in casted_inputs}) - casted_inputs["name"] = op.name + "_cast" - casted_inputs["before_op"] = op - quant_output = getattr(mb, op.op_type)(**casted_inputs) - - if not isinstance(quant_output, (list, tuple)): - quant_output = [quant_output] - - for old_output_var, new_output_var in zip(op.outputs, quant_output): - if old_output_var.is_tensor_or_scalar_of(dtype="fp32") and ( - not new_output_var.is_tensor_or_scalar_of(dtype="fp32") - ): - x = mb.cast( - x=new_output_var, - dtype="fp32", - name=new_output_var.name + "_to_fp32", - before_op=op, - ) - op.enclosing_block.replace_uses_of_var_after_op( - anchor_op=op, - old_var=old_output_var, - new_var=x, - force_replace=True, - ) - else: - op.enclosing_block.replace_uses_of_var_after_op( - anchor_op=op, - old_var=old_output_var, - new_var=new_output_var, - force_replace=True, - ) - - block.remove_ops([op]) - @register_pass(namespace="common") class add_fp16_cast(FP16ComputePrecision): @@ -338,3 +396,61 @@ def skip_ops_by_type(self): @skip_ops_by_type.setter def skip_ops_by_type(self, criteria: Text): self._skip_ops_by_type = set(criteria.split(",")) + + +@register_pass(namespace="common") +class add_int16_cast(CastTypeQuantization): + """ + This transform does the following, for each op that supports int16: + - For each input of dtype int32 which actually supports int16, inject a "cast" op to change it + to int16 dtype. + - For each output of dtype int16, inject a "cast" op to change it back to int32. + It's mainly for int16 op ANE residency. + """ + # Ops that prefer int16 params. + _PREFER_INT16_OPS: Set[str] = {"gather", "gather_along_axis", "gather_nd"} + + def __init__(self, op_selector=None): + super().__init__(op_selector=op_selector) + + @property + def origin_dtype(self) -> str: + return "int32" + + @property + def target_dtype(self) -> str: + return "int16" + + @staticmethod + def int16_overflow(op: Operation) -> bool: + """ + Determines if any of the op's input will overflow when represented by int16. Constants with + values more than np.iinfo(np.int16).max or less than np.iinfo(np.int16).min overflows in int16. + """ + _INT16_MAX = np.iinfo(np.int16).max + _INT16_MIN = np.iinfo(np.int16).min + for _, inputs in op.inputs.items(): + is_list_input = isinstance(inputs, (list, tuple)) + if not is_list_input: + inputs = [inputs] + for var in inputs: + if var.val is not None and var.is_tensor_or_scalar_of(dtype="int32"): + if np.any(var.val > _INT16_MAX) or np.any(var.val < _INT16_MIN): + return True + + # In `gather` and `gather_along_axis`, if the dim size of x is larger than int16 upperbound, + # the dynamic indices could overflow. + if ( + op.op_type in {"gather", "gather_along_axis"} + and op.indices.val is None + and op.x.shape is not None + ): + dim_size = op.x.shape[op.axis.val] + if not is_symbolic(dim_size) and dim_size > _INT16_MAX: + return True + + return False + + def is_valid_op(self, op: Operation) -> bool: + """Determines if op is valid for int16 casting.""" + return op.op_type in self._PREFER_INT16_OPS and not self.int16_overflow(op) diff --git a/coremltools/converters/mil/mil/passes/pass_pipeline.py b/coremltools/converters/mil/mil/passes/pass_pipeline.py index 085269b14..d67e64d8f 100644 --- a/coremltools/converters/mil/mil/passes/pass_pipeline.py +++ b/coremltools/converters/mil/mil/passes/pass_pipeline.py @@ -50,6 +50,7 @@ "common::fuse_gelu_exact", "common::fuse_leaky_relu", "common::rank0_expand_dims_swap", + "common::fuse_squeeze_expand_dims", "common::compose_conv1d", # compose conv1d before any other conv passes "common::use_reflection_padding", "common::merge_consecutive_paddings", @@ -91,6 +92,7 @@ # which detects patterns that involve redundant ops ("sub") etc. "common::remove_redundant_ops", "common::add_fp16_cast", # Will be removed if compute precision is not FP16. + "common::add_int16_cast", # Will be removed if compute precision is not FP16. "common::dead_code_elimination", # always end with dce ] @@ -98,8 +100,11 @@ "common::dead_code_elimination", "common::const_elimination", "common::cast_optimization", + "common::dead_code_elimination", # must follow cast_optimization "common::const_elimination", "common::const_deduplication", # after all consts have been settled + "common::dead_code_elimination", # come before merge_tensorwise_affine_dequantize_with_consecutive_ops + "common::merge_tensorwise_affine_dequantize_with_consecutive_ops", # after const_deduplication and dead_code_elimination "common::loop_invariant_elimination", "common::noop_elimination", "common::dedup_op_and_var_names", @@ -371,6 +376,11 @@ def get_pipeline(cls, pipeline_name: Text) -> PassPipeline: ) return PassPipeline(cls._PIPELINE_NAME_TO_PASSES[pipeline_name], pipeline_name) + @classmethod + def list_available_pipelines(cls) -> List[str]: + """List all available pipelines.""" + return list(cls._PIPELINE_NAME_TO_PASSES.keys()) + """ ======================================= Pre-defined PassPipeline configurations diff --git a/coremltools/converters/mil/mil/passes/tests/test_pass_pipeline.py b/coremltools/converters/mil/mil/passes/tests/test_pass_pipeline.py index d48103a55..58687cea8 100644 --- a/coremltools/converters/mil/mil/passes/tests/test_pass_pipeline.py +++ b/coremltools/converters/mil/mil/passes/tests/test_pass_pipeline.py @@ -111,3 +111,9 @@ def test_get_invalid_pipeline(self): match="There is no pipeline for `invalid`.", ): PassPipeline.get_pipeline("invalid") + + def test_list_available_pipelines(self): + available_pipelines = PassPipeline.list_available_pipelines() + assert len(available_pipelines) == 12 + assert "default" in available_pipelines + assert "default_palettization" in available_pipelines diff --git a/coremltools/converters/mil/mil/passes/tests/test_passes.py b/coremltools/converters/mil/mil/passes/tests/test_passes.py index f1aa8598d..406507185 100644 --- a/coremltools/converters/mil/mil/passes/tests/test_passes.py +++ b/coremltools/converters/mil/mil/passes/tests/test_passes.py @@ -248,6 +248,83 @@ def _false_fn(): assert_op_count_match(prog, expect=6, op="const") +class TestFuseSqueezeExpandDims: + @pytest.mark.parametrize( + "rank", + [1, 5], + ) + def test_fuse_squeeze_expand_dims_basic(self, rank): + """ + Given: + %1 = squeeze(%x) + %2 = expand_dims(%1) + %3 = relu(%2) + + Result: + %3 = relu(%x) + """ + if rank == 1: + input_shape = (1,) + axes = (0,) + else: + assert rank == 5 + input_shape = (3, 1, 4, 1, 1) + axes = (1, 3, 4) + + @mb.program(input_specs=[mb.TensorSpec(shape=input_shape)]) + def prog(x): + x = mb.squeeze(x=x, axes=axes) + x = mb.expand_dims(x=x, axes=axes) + return mb.relu(x=x) + + # fuse_squeeze_expand_dims fused squeeze + expand_dims into identity + apply_pass_and_basic_check(prog, "common::fuse_squeeze_expand_dims") + assert get_op_types_in_program(prog) == ["identity", "relu"] + + # noop_elimination can further remove the identity op + apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prog) == ["relu"] + + def test_fuse_squeeze_expand_dims_negative(self): + """ + If squeeze and expand_dims cannot cancel each other, + the graph pass does nothing + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(3, 1, 4, 1, 1))]) + def prog(x): + x = mb.squeeze(x=x, axes=(1, 2)) + x = mb.expand_dims(x=x, axes=(1, 3)) + return mb.relu(x=x) + + apply_pass_and_basic_check(prog, "common::fuse_squeeze_expand_dims") + assert get_op_types_in_program(prog) == ["squeeze", "expand_dims", "relu"] + + def test_fuse_squeeze_expand_dims_connected_output(self): + """ + If squeeze is connected to block output, it cannot be removed. + However, the expand_dims can be a block output. + """ + # squeeze connected to output. Nothing happens. + @mb.program(input_specs=[mb.TensorSpec(shape=(1,))]) + def prog(x): + squeeze = mb.squeeze(x=x, axes=(0,)) + expand_dims = mb.expand_dims(x=squeeze, axes=(0,)) + return mb.relu(x=expand_dims), squeeze + + apply_pass_and_basic_check(prog, "common::fuse_squeeze_expand_dims") + assert get_op_types_in_program(prog) == ["squeeze", "expand_dims", "relu"] + + # expand_dims connected to output. Still good to fuse. + @mb.program(input_specs=[mb.TensorSpec(shape=(1,))]) + def prog(x): + squeeze = mb.squeeze(x=x, axes=(0,)) + expand_dims = mb.expand_dims(x=squeeze, axes=(0,)) + return mb.relu(x=expand_dims), expand_dims + + apply_pass_and_basic_check(prog, "common::fuse_squeeze_expand_dims") + assert get_op_types_in_program(prog) == ["identity", "relu"] + class TestConstElimination: def test_const_elimination(self): @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) @@ -3261,7 +3338,7 @@ def prog(x): return x prev_prog, _, block = apply_pass_and_basic_check(prog, "common::expand_high_rank_reshape_and_transpose") - prog._check_invalid_program() + prog._check_early_error_out_for_invalid_program() assert get_op_types_in_program(prog) == ["reshape", "transpose", "reshape"] TestExpandHighRankReshapeAndTranspose._test_numerical(prev_prog, input_shape, reshape_shape, perm, output_shape) @@ -3279,7 +3356,7 @@ def prog(x): return x prev_prog, _, block = apply_pass_and_basic_check(prog, "common::expand_high_rank_reshape_and_transpose") - prog._check_invalid_program() + prog._check_early_error_out_for_invalid_program() assert get_op_types_in_program(prog) == ["reshape", "transpose", "reshape"] TestExpandHighRankReshapeAndTranspose._test_numerical(prev_prog, input_shape, reshape_shape, perm, output_shape) @@ -3298,7 +3375,7 @@ def prog(x): prev_prog, _, block = apply_pass_and_basic_check(prog, "common::expand_high_rank_reshape_and_transpose") - prog._check_invalid_program() + prog._check_early_error_out_for_invalid_program() assert get_op_types_in_program(prog) == ["reshape", "transpose"] * 16 + ["reshape"] TestExpandHighRankReshapeAndTranspose._test_numerical(prev_prog, input_shape, reshape_shape, perm, output_shape) @@ -3318,7 +3395,7 @@ def prog(x): prev_prog, _, block = apply_pass_and_basic_check(prog, "common::expand_high_rank_reshape_and_transpose") with pytest.raises(ValueError, match="Core ML only supports tensors with rank <= 5"): - prog._check_invalid_program() + prog._check_early_error_out_for_invalid_program() class TestMergeConsecutiveRelus: diff --git a/coremltools/converters/mil/mil/passes/tests/test_quantization_passes.py b/coremltools/converters/mil/mil/passes/tests/test_quantization_passes.py index 5d0c43333..b1cbcf52f 100644 --- a/coremltools/converters/mil/mil/passes/tests/test_quantization_passes.py +++ b/coremltools/converters/mil/mil/passes/tests/test_quantization_passes.py @@ -9,10 +9,11 @@ import numpy as np import parameterized import pytest +from mock import patch import coremltools as ct import coremltools.converters.mil.mil.types as types -from coremltools._deps import _IS_MACOS +from coremltools._deps import _HAS_TORCH, _IS_MACOS, MSG_TORCH_NOT_FOUND from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil.passes.defs import quantization from coremltools.converters.mil.mil.types import numpy_type_to_builtin_type @@ -22,9 +23,282 @@ get_op_types_in_program, ) +if _HAS_TORCH: + import torch + import torch.nn as nn + np.random.seed(1818) +class TestTensorwiseAffineDequantizeConstElimination: + def test_eliminate_transpose(self): + """ + Input graph: + data -> constexpr_affine_dequantize -> transpose + + Output graph: + new_data -> constexpr_affine_dequantize + + where new_data is the value after applying transpose to data + """ + quantized_data = np.random.randint(0, 256, (1, 2, 3, 4)).astype(np.int8) + + @mb.program(input_specs=[], opset_version=ct.target.iOS16) + def prog(): + res = mb.constexpr_affine_dequantize( + quantized_data=quantized_data, + axis=0, + scale=8.9, + zero_point=np.int8(34), + ) + return mb.transpose(x=res, perm=(2, 0, 1, 3)) + + apply_pass_and_basic_check( + prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" + ) + assert get_op_types_in_program(prog) == ["constexpr_affine_dequantize"] + + new_op = prog.find_ops(op_type="constexpr_affine_dequantize", exactly_one=True)[0] + expected_quantized_data = np.transpose(quantized_data, (2, 0, 1, 3)) + np.testing.assert_array_equal(new_op.quantized_data.val, expected_quantized_data) + + def test_eliminate_reshape(self): + """ + Input graph: + data -> constexpr_affine_dequantize -> reshape + + Output graph: + new_data -> constexpr_affine_dequantize + + where new_data is the value after applying reshape to data + """ + quantized_data = np.random.randint(0, 256, (1, 2, 3, 4)).astype(np.int8) + + @mb.program(input_specs=[], opset_version=ct.target.iOS16) + def prog(): + res = mb.constexpr_affine_dequantize( + quantized_data=quantized_data, + axis=0, + scale=8.9, + zero_point=np.int8(34), + ) + return mb.reshape(x=res, shape=(3, -1)) + + apply_pass_and_basic_check( + prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" + ) + assert get_op_types_in_program(prog) == ["constexpr_affine_dequantize"] + + new_op = prog.find_ops(op_type="constexpr_affine_dequantize", exactly_one=True)[0] + expected_quantized_data = np.reshape(quantized_data, (3, 8)) + np.testing.assert_array_equal(new_op.quantized_data.val, expected_quantized_data) + + def test_eliminate_expand_dims(self): + """ + Input graph: + data -> constexpr_affine_dequantize -> expand_dims + + Output graph: + new_data -> constexpr_affine_dequantize + + where new_data is the value after applying expand_dims to data + """ + quantized_data = np.random.randint(0, 256, (2, 3, 4)).astype(np.int8) + + @mb.program(input_specs=[], opset_version=ct.target.iOS16) + def prog(): + res = mb.constexpr_affine_dequantize( + quantized_data=quantized_data, + axis=0, + scale=8.9, + zero_point=np.int8(34), + ) + return mb.expand_dims(x=res, axes=(0, 2, 4)) + + apply_pass_and_basic_check( + prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" + ) + assert get_op_types_in_program(prog) == ["constexpr_affine_dequantize"] + + new_op = prog.find_ops(op_type="constexpr_affine_dequantize", exactly_one=True)[0] + expected_quantized_data = np.expand_dims(quantized_data, axis=(0, 2, 4)) + np.testing.assert_array_equal(new_op.quantized_data.val, expected_quantized_data) + + @pytest.mark.parametrize("axis", [(0, 3), None]) + def test_eliminate_squeeze(self, axis): + """ + Input graph: + data -> constexpr_affine_dequantize -> squeeze + + Output graph: + new_data -> constexpr_affine_dequantize + + where new_data is the value after applying squeeze to data + """ + quantized_data = np.random.randint(0, 256, (1, 2, 3, 1, 4)).astype(np.int8) + + @mb.program(input_specs=[], opset_version=ct.target.iOS16) + def prog(): + res = mb.constexpr_affine_dequantize( + quantized_data=quantized_data, + axis=0, + scale=8.9, + zero_point=np.int8(34), + ) + return mb.squeeze(x=res, axes=axis) + + apply_pass_and_basic_check( + prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" + ) + assert get_op_types_in_program(prog) == ["constexpr_affine_dequantize"] + + new_op = prog.find_ops(op_type="constexpr_affine_dequantize", exactly_one=True)[0] + expected_quantized_data = np.squeeze(quantized_data, axis=axis) + np.testing.assert_array_equal(new_op.quantized_data.val, expected_quantized_data) + + def test_eliminate_multiple_ops(self): + """ + Input graph: + data -> constexpr_affine_dequantize -> transpose -> + reshape -> expand_dims -> squeeze + + Output graph: + new_data -> constexpr_affine_dequantize + + where new_data is the value after applying the same chain of transformations to data + """ + quantized_data = np.random.randint(0, 256, (1, 2, 3, 4)).astype(np.int8) + + @mb.program(input_specs=[], opset_version=ct.target.iOS16) + def prog(): + res = mb.constexpr_affine_dequantize( + quantized_data=quantized_data, + axis=0, + scale=8.9, + zero_point=np.int8(34), + ) + res = mb.transpose(x=res, perm=(1, 0, 3, 2)) + res = mb.reshape(x=res, shape=(8, 3)) + res = mb.expand_dims(x=res, axes=(0, 2, 4)) + return mb.squeeze(x=res, axes=(2,)) + + apply_pass_and_basic_check( + prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" + ) + assert get_op_types_in_program(prog) == ["constexpr_affine_dequantize"] + + new_op = prog.find_ops(op_type="constexpr_affine_dequantize", exactly_one=True)[0] + + expected_quantized_data = np.transpose(quantized_data, (1, 0, 3, 2)) + expected_quantized_data = np.reshape(expected_quantized_data, (8, 3)) + expected_quantized_data = np.expand_dims(expected_quantized_data, (0, 2, 4)) + expected_quantized_data = np.squeeze(expected_quantized_data, (2,)) + + np.testing.assert_array_equal(new_op.quantized_data.val, expected_quantized_data) + + def test_negative_channel_wise_pattern(self): + """ + If ``constexpr_affine_dequantize`` is not tensor-wise, + the graph is not changed. + """ + quantized_data = np.random.randint(0, 256, (2, 3, 4)).astype(np.int8) + + @mb.program(input_specs=[], opset_version=ct.target.iOS16) + def prog(): + x = mb.constexpr_affine_dequantize( + quantized_data=quantized_data, + axis=0, + scale=[8.9, 6.5], + zero_point=np.int8(34), + ) + y = mb.constexpr_affine_dequantize( + quantized_data=quantized_data, + axis=0, + scale=8.9, + zero_point=np.int8([34, 56]), + ) + return mb.transpose(x=x, perm=(1, 0, 2)), mb.transpose(x=y, perm=(1, 0, 2)) + + apply_pass_and_basic_check( + prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" + ) + assert get_op_types_in_program(prog) == [ + "constexpr_affine_dequantize", + "constexpr_affine_dequantize", + "transpose", + "transpose", + ] + + def test_negative_non_linked_list_pattern(self): + """ + If ``quantized_data`` feeds into multiple ``constexpr_affine_dequantize`` ops, + the graph will not be changed. + """ + quantized_data = np.random.randint(0, 256, (2, 3, 4)).astype(np.int8) + + @mb.program(input_specs=[], opset_version=ct.target.iOS16) + def prog(): + data = mb.const(val=quantized_data) + x = mb.constexpr_affine_dequantize( + quantized_data=data, + axis=0, + scale=8.9, + zero_point=np.int8(34), + ) + y = mb.constexpr_affine_dequantize( + quantized_data=data, + axis=0, + scale=8.1, + zero_point=np.int8(56), + ) + return mb.transpose(x=x, perm=(1, 0, 2)), mb.reshape(x=y, shape=(24,)) + + apply_pass_and_basic_check( + prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" + ) + assert get_op_types_in_program(prog) == [ + "constexpr_affine_dequantize", + "constexpr_affine_dequantize", + "transpose", + "reshape", + ] + + def test_eliminate_connected_outputs(self): + """ + The optimization stops when the node is a block output + """ + quantized_data = np.random.randint(0, 256, (2, 3, 4)).astype(np.int8) + + @mb.program(input_specs=[], opset_version=ct.target.iOS16) + def prog(): + x = mb.constexpr_affine_dequantize( + quantized_data=quantized_data, + axis=0, + scale=8.9, + zero_point=np.int8(34), + ) + x = mb.transpose(x=x, perm=(1, 0, 2)) + x = mb.reshape(x=x, shape=(2, 2, 3, 2)) + y = mb.transpose(x=x, perm=(0, 3, 2, 1)) + return x, y + + apply_pass_and_basic_check( + prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" + ) + assert get_op_types_in_program(prog) == [ + "constexpr_affine_dequantize", + "transpose", + ] + + new_op = prog.find_ops(op_type="constexpr_affine_dequantize", exactly_one=True)[0] + expected_quantized_data = np.transpose(quantized_data, (1, 0, 2)) + expected_quantized_data = np.reshape(expected_quantized_data, (2, 2, 3, 2)) + np.testing.assert_array_equal(new_op.quantized_data.val, expected_quantized_data) + + transpose_op = prog.find_ops(op_type="transpose", exactly_one=True)[0] + assert transpose_op.perm.val.tolist() == [0, 3, 2, 1] + + class QuantizationBaseTest: @staticmethod def generate_random_quantization_params( @@ -188,7 +462,10 @@ def prog(x): quantize_1_1 = mb.quantize(input=reshape, scale=0.1, output_dtype="int8") dequantize_2_1 = mb.dequantize(input=quantize_1_1, scale=0.1) - return dequantize_2_0, dequantize_2_1, + return ( + dequantize_2_0, + dequantize_2_1, + ) prev_prog, _, block = apply_pass_and_basic_check(prog, "common::int_op_canonicalization") if all_are_int: @@ -1917,3 +2194,232 @@ def prog(x): backend=("mlprogram", "fp16"), minimum_deployment_target=opset_version, ) + + +class TestInt32CastToInt16: + @pytest.mark.parametrize( + "x_dtype, dynamic, opset_version", + itertools.product( + [np.int32, np.float32], + [True, False], + [ct.target.iOS15, ct.target.iOS16, ct.target.iOS17], + ), + ) + def test_gather_int16_indices(self, x_dtype, dynamic, opset_version): + @mb.program(opset_version=opset_version) + def prog_static(): + params = np.array([[1, 2, 3], [4, 5, 6]], dtype=x_dtype) + indices = np.array([1, 0], dtype=np.int32) + return mb.gather(x=params, indices=indices, axis=-1) + + @mb.program( + [ + mb.TensorSpec(shape=(2, 3), dtype=types.numpy_type_to_builtin_type(x_dtype)), + mb.TensorSpec(shape=(2,), dtype=types.int32), + ], + opset_version=opset_version, + ) + def prog_dynamic(x, indices): + return mb.gather(x=x, indices=indices, axis=0) + + prog = prog_dynamic if dynamic else prog_static + assert get_op_types_in_program(prog) == ["gather"] + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::add_int16_cast") + + if opset_version <= ct.target.iOS16: + # iOS15 gather op's ``indices`` doesn't support int16, so this pass doesn't have effect. + # iOS16 cast op doesn't support int16, so this pass doesn't have effect. + assert get_op_types_in_program(prog) == get_op_types_in_program(prev_prog) + else: + # When input ``x`` is float32, the output is also float32, so no cast for output. + # When input ``x`` is int32 and cast to int16, the output will also be int16, so there + # is another cast op to cast it back to int32. + expected_ops = ["cast", "gather"] + if x_dtype == np.int32: + expected_ops = ["cast", "cast", "gather", "cast"] + assert get_op_types_in_program(prog) == expected_ops + indices_cast_op_idx = 1 if x_dtype == np.int32 else 0 + cast_op = block.find_ops(op_type="cast")[indices_cast_op_idx] + assert cast_op.dtype.val == "int16" + assert len(cast_op.outputs) == 1 + assert len(cast_op.outputs[0].child_ops) == 1 + assert cast_op.outputs[0].child_ops[0].op_type == "gather" + assert cast_op.outputs[0] == block.find_ops(op_type="gather")[0].indices + + if not dynamic: + np.testing.assert_allclose( + np.array([[2, 1], [5, 4]], dtype=np.float32), + prog.functions["main"].find_ops(op_type="gather")[0].outputs[0].val, + atol=1e-04, + rtol=1e-05, + ) + + @pytest.mark.parametrize( + "x_dtype, dynamic, opset_version", + itertools.product( + [np.int32, np.float32], + [True, False], + [ct.target.iOS15, ct.target.iOS16, ct.target.iOS17], + ), + ) + def test_gather_along_axis_int16_indices(self, x_dtype, dynamic, opset_version): + @mb.program(opset_version=opset_version) + def prog_static(): + params = np.array([[1, 2, 3], [4, 5, 6]], dtype=x_dtype) + indices = np.array([[1, 0, 1], [1, 1, 0]], dtype=np.int32) + return mb.gather_along_axis(x=params, indices=indices, axis=-1) + + @mb.program( + [ + mb.TensorSpec(shape=(2, 3), dtype=types.numpy_type_to_builtin_type(x_dtype)), + mb.TensorSpec(shape=(2, 3), dtype=types.int32), + ], + opset_version=opset_version, + ) + def prog_dynamic(x, indices): + return mb.gather_along_axis(x=x, indices=indices, axis=0) + + prog = prog_dynamic if dynamic else prog_static + assert get_op_types_in_program(prog) == ["gather_along_axis"] + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::add_int16_cast") + + if opset_version <= ct.target.iOS16: + # iOS15 gather op's ``indices`` doesn't support int16, so this pass doesn't have effect. + # iOS16 cast op doesn't support int16, so this pass doesn't have effect. + assert get_op_types_in_program(prog) == get_op_types_in_program(prev_prog) + else: + # When input ``x`` is float32, the output is also float32, so no cast for output. + # When input ``x`` is int32 and cast to int16, the output will also be int16, so there + # is another cast op to cast it back to int32. + expected_ops = ["cast", "gather_along_axis"] + if x_dtype == np.int32: + expected_ops = ["cast", "cast", "gather_along_axis", "cast"] + assert get_op_types_in_program(prog) == expected_ops + indices_cast_op_idx = 1 if x_dtype == np.int32 else 0 + cast_op = block.find_ops(op_type="cast")[indices_cast_op_idx] + assert cast_op.dtype.val == "int16" + assert len(cast_op.outputs) == 1 + assert len(cast_op.outputs[0].child_ops) == 1 + assert cast_op.outputs[0].child_ops[0].op_type == "gather_along_axis" + assert cast_op.outputs[0] == block.find_ops(op_type="gather_along_axis")[0].indices + + if not dynamic: + np.testing.assert_allclose( + np.array([[2, 1, 2], [5, 5, 4]], dtype=np.float32), + prog.functions["main"].find_ops(op_type="gather_along_axis")[0].outputs[0].val, + atol=1e-04, + rtol=1e-05, + ) + + @pytest.mark.parametrize("overflow", [True, False]) + def test_gather_dynamic_overflow_int16(self, overflow): + """Dynamic input indices should also be cast if x dim size doesn't overflow int16 range.""" + + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(32769 if overflow else 2, 3)), + mb.TensorSpec(shape=(2,), dtype=types.int32), + ], + opset_version=ct.target.iOS17, + ) + def prog(x, indices): + return mb.gather(x=x, indices=indices, axis=0) + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::add_int16_cast") + if overflow: + assert get_op_types_in_program(prog) == get_op_types_in_program(prev_prog) + else: + assert get_op_types_in_program(prog) == ["cast", "gather"] + cast_op = block.find_ops(op_type="cast")[0] + assert cast_op.dtype.val == "int16" + assert cast_op.outputs[0] == block.find_ops(op_type="gather")[0].indices + + def test_gather_static_overflow_int16(self): + """Indices cannot be represented by int16 range, don't cast to int16.""" + + @mb.program(opset_version=ct.target.iOS17) + def prog(): + params = np.array([[1, 2]] * 32769, dtype=np.float32) + indices = np.array([32768, 0], dtype=np.int32) + return mb.gather(x=params, indices=indices, axis=0) + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::add_int16_cast") + assert get_op_types_in_program(prog) == get_op_types_in_program(prev_prog) + + @patch( + "coremltools.converters.mil.mil.passes.defs.quantization.add_int16_cast._PREFER_INT16_OPS", + set(), + ) + def test_int16_no_effect(self): + """After patching the pass, no op should be cast to int16""" + + @mb.program( + input_specs=[mb.TensorSpec(shape=(2, 3)), mb.TensorSpec(shape=(2,), dtype=types.int32)], + opset_version=ct.target.iOS17, + ) + def prog(x, indices): + return mb.gather(x=x, indices=indices, axis=0) + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::add_int16_cast") + assert get_op_types_in_program(prog) == get_op_types_in_program(prev_prog) + + @pytest.mark.skipif(not _HAS_TORCH, reason=MSG_TORCH_NOT_FOUND) + @pytest.mark.parametrize( + "compute_precision, num_embeddings, minimum_deployment_target, symbolic", + itertools.product( + [ct.precision.FLOAT16, ct.precision.FLOAT32], + [10, 32769], + [ct.target.iOS15, ct.target.iOS16, ct.target.iOS17], + [True, False], + ), + ) + def test_int16_embedding_e2e( + self, compute_precision, num_embeddings, minimum_deployment_target, symbolic + ): + """End-to-end conversion from a torch embedding model.""" + + class EmbeddingModel(nn.Module): + def __init__(self): + super(EmbeddingModel, self).__init__() + self.embedding = torch.nn.Embedding(num_embeddings=num_embeddings, embedding_dim=2) + + def forward(self, x): + return self.embedding(x) + + input_data = np.random.randint(low=0, high=num_embeddings, size=(3, 5)) + input_data = torch.from_numpy(input_data) + model = EmbeddingModel() + model.eval() + traced_model = torch.jit.trace(model, input_data) + input_shape = (ct.RangeDim(1, 32), ct.RangeDim(1, 32)) if symbolic else input_data.shape + converted_model = ct.convert( + traced_model, + inputs=[ct.TensorType(shape=input_shape, name="input", dtype=np.int32)], + convert_to="mlprogram", + compute_precision=compute_precision, + compute_units=ct.ComputeUnit.CPU_ONLY, + minimum_deployment_target=minimum_deployment_target, + ) + prog = converted_model._mil_program + + # The embedding layer is lowered to `gather` op. + expected_ops = ["gather"] + if ( + compute_precision == ct.precision.FLOAT16 + and minimum_deployment_target < ct.target.iOS16 + ): + # Cast from fp16 to fp32 because fp16 is not supported in I/O before iOS16. + expected_ops.append("cast") + if ( + minimum_deployment_target >= ct.target.iOS17 + and compute_precision == ct.precision.FLOAT16 + and num_embeddings <= np.iinfo(np.int16).max + ): + # The int16 cast only happens for iOS17+ with fp16 precision and there is no overflow. + expected_ops.insert(0, "cast") + cast_op = prog["main"].find_ops(op_type="cast")[0] + assert cast_op.dtype.val == "int16" + assert cast_op.outputs[0] == prog["main"].find_ops(op_type="gather")[0].indices + assert get_op_types_in_program(prog) == expected_ops diff --git a/coremltools/converters/mil/mil/program.py b/coremltools/converters/mil/mil/program.py index fe103d20b..462e88ffc 100644 --- a/coremltools/converters/mil/mil/program.py +++ b/coremltools/converters/mil/mil/program.py @@ -3,6 +3,9 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause +from collections import defaultdict +from typing import Dict, List + import numpy as _np import sympy as _sm @@ -15,6 +18,7 @@ from . import types from .block import Function +from .operation import Operation from .types.symbolic import k_num_internal_syms, k_used_symbols from .var import Var @@ -24,6 +28,13 @@ class Program: def _get_opset_str_value(op): return f"coremltools.target.{op.name}" + @staticmethod + def _get_supported_dialect_opset() -> List[str]: + """ + Return a list of supported dialect opsets at runtime. + """ + return [] + def __init__(self): self.main_input_types = [] self.main_output_types = None @@ -31,22 +42,32 @@ def __init__(self): self.parameters = {} self.skip_all_passes = False + def _get_dialect_namespaces(self) -> Dict[str, List[Operation]]: + """ + Return a dict which maps the dialect namespace into a list of corresponding operations. + """ + res = defaultdict(list) + + def get_dialect_namespaces_block(block): + for op in list(block.operations): + for b in op.blocks: + get_dialect_namespaces_block(b) + if hasattr(op, "_dialect_namespace"): + dialect_namespace = op._dialect_namespace + res[dialect_namespace].append(op) + + for func in self.functions.values(): + get_dialect_namespaces_block(func) + return res + def _get_max_opset_version_and_op(self): max_opset_version = _target.iOS13 op_with_max_opset_version = None - def update_max_opset_version_block(block): - nonlocal max_opset_version - nonlocal op_with_max_opset_version - for op in list(block.operations): - for b in op.blocks: - update_max_opset_version_block(b) - if not hasattr(op, "_op_variants") or not isinstance(op._op_variants, dict): - continue - if op.opset_version > max_opset_version: - max_opset_version = op.opset_version - op_with_max_opset_version = op for func in self.functions.values(): - update_max_opset_version_block(func) + cur_max_opset, cur_op = func.get_max_opset_version_and_op() + if cur_max_opset > max_opset_version: + max_opset_version = cur_max_opset + op_with_max_opset_version = cur_op return max_opset_version, op_with_max_opset_version def _check_ops_version_compatibility(self, max_opset_version): @@ -95,24 +116,57 @@ def _check_program_opset_version(self): self._check_ops_version_compatibility(max_opset_version) self._check_or_set_functions_opset_version(max_opset_version) - def _check_invalid_program(self): + @staticmethod + def _get_runtime_supported_dialect_opset() -> List[str]: """ - Early error out for - 1. tensor with rank >= 6 - 2. non const tensor feed in const input + Return a list of supported dialect opsets at runtime. """ + return [] + def _check_invalid_opset(self): + """ + Check if the program consists of opsets not supported by runtime. + """ + dialect_namespaces = self._get_dialect_namespaces() + if len(dialect_namespaces) != 0: + for dialect_key in list(dialect_namespaces.keys()): + if dialect_key not in self._get_runtime_supported_dialect_opset(): + invalid_op = dialect_namespaces[dialect_key][0] + raise ValueError( + f'Core ML only support core opset. Got unsupported op "{invalid_op.name}" with type "{invalid_op.op_type}" of dialect namespace "{invalid_op._dialect_namespace}".' + ) + + def _check_invalid_tensor_rank(self): + """ + Check if the program consists of tensors with rank >= 6. + """ def _check_invalid_tensor_rank_block(block): for op in block.operations: for b in op.blocks: _check_invalid_tensor_rank_block(b) for o in op.outputs: if not isinstance(o, ListVar) and (o.rank < 0 or o.rank >= 6): + if op.op_type == "const" and len(o.child_ops) == 1 and \ + o.child_ops[0].op_type == "constexpr_lut_to_dense": + # For lut op, the lookup table is allowed to have rank > 5. + continue raise ValueError( f'Core ML only supports tensors with rank <= 5. Layer "{op.name}", ' f'with type "{op.op_type}", outputs a rank {o.rank} tensor. ' ) + for f in self.functions.values(): + _check_invalid_tensor_rank_block(f) + + def _check_invalid_const_tensor_input(self): + """ + Check if non const tensor feed into const input. + This might happen in the early stage of conversion, for instance: + constexpr_ -> reshape -> transpose -> linear + + However, the pattern is optimized into the following in a graph pass. + constexpr_ -> linear + """ def _check_invalid_const_tensor_input_block(block): for op in block.operations: for b in op.blocks: @@ -130,12 +184,20 @@ def _check_invalid_const_tensor_input_block(block): f"In op {op.name}. Input {k} ({v.name}) must be const or constexpr ops." ) - for f in self.functions.values(): - _check_invalid_tensor_rank_block(f) - for f in self.functions.values(): _check_invalid_const_tensor_input_block(f) + def _check_early_error_out_for_invalid_program(self): + """ + Early error out for + 1. tensor with rank >= 6 + 2. non const tensor feed into const input + 3. program consist of non mil core ops + """ + self._check_invalid_tensor_rank() + self._check_invalid_const_tensor_input() + self._check_invalid_opset() + def add_function(self, name, ssa_func): if not isinstance(ssa_func, Function): raise ValueError("Only Function can be added to Program.") @@ -229,15 +291,8 @@ def __init__(self, sym_shape, dtype=None, name=None, allow_rank0_input=False): self.dtype = dtype if self.dtype is None: self.dtype = types.float - sym_type = self.type_inference() - - # Globally unique var name for placeholders - if name is None: - name = 'placeholder_' + str(self.__class__.counter) - self.__class__.counter += 1 - - # List of output vars (consistent w/ other ops) - self.outputs = [Var(name, sym_type)] + self.name = name + self._infer_output_var() def set_name(self, name): self.name = name @@ -251,6 +306,16 @@ def type_inference(self): def __str__(self): return str(self.outputs[0]) + def _infer_output_var(self): + sym_type = self.type_inference() + + # Globally unique var name for placeholders + if self.name is None: + self.name = f"{self.__class__.__name__}_{self.__class__.counter}" + self.__class__.counter += 1 + + # List of output vars (consistent w/ other ops) + self.outputs = [Var(self.name, sym_type)] def get_new_variadic_symbol(): global k_num_internal_syms diff --git a/coremltools/converters/mil/mil/tests/test_block.py b/coremltools/converters/mil/mil/tests/test_block.py index d0674920d..0f943c710 100644 --- a/coremltools/converters/mil/mil/tests/test_block.py +++ b/coremltools/converters/mil/mil/tests/test_block.py @@ -27,7 +27,6 @@ the core API being tested here. """ - def test_empty_block(): """ Test an empty program diff --git a/coremltools/converters/mil/mil/tests/test_programs.py b/coremltools/converters/mil/mil/tests/test_programs.py index 4fbbd69a6..139bf4d62 100644 --- a/coremltools/converters/mil/mil/tests/test_programs.py +++ b/coremltools/converters/mil/mil/tests/test_programs.py @@ -9,7 +9,7 @@ import coremltools as ct from coremltools import _logger as logger from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import types +from coremltools.converters.mil.mil import Function, Program, types from coremltools.converters.mil.mil.passes.tests.test_passes import CONSTEXPR_FUNCS np.random.seed(0) @@ -199,8 +199,10 @@ def false_fn(): return mb.cond(pred=mb.cast(x=pred, dtype="bool"), _true_fn=true_fn, _false_fn=false_fn) return prog -class TestMLProgramVersionHandling: - +class TestMILProgramVersionHandling: + """ + Test basic functionality of opset version handling in pymil + """ @staticmethod def test_multi_versions_op_selection(): ''' @@ -306,6 +308,110 @@ def test_bulid_non_compatible_program_early_error_out(): with pytest.raises(ValueError, match=expected_err_str): get_simple_topk_pixel_unshuffle_program() +class TestMILBuilderAPI: + """ + Test the basic builder API. + """ + def test_create_function(self): + """ + Test mb.function API + """ + @mb.function(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def func(x): + return mb.add(x=x, y=0.0) + + assert isinstance(func, Function) + assert len(func.operations) == 2 # add, const + assert len(func.inputs) == 1 + assert len(func.outputs) == 1 + + def test_create_program(self): + """ + Test mb.program API + """ + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + return mb.add(x=x, y=0.0) + + assert isinstance(prog, Program) + func = prog.functions["main"] + assert len(func.operations) == 2 # add, const + assert len(func.inputs) == 1 + assert len(func.outputs) == 1 + + def test_create_program_function_name(self): + """ + If ``function_name`` is not provide, mb.program creates function with name "main" by default. + """ + # defaults to "main" + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x0): + return x0 + + assert len(prog.functions) == 1 + assert "main" in prog.functions + + # user can also provide function_name + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))], function_name="good_function") + def prog(x0): + return x0 + + assert len(prog.functions) == 1 + assert "good_function" in prog.functions + + def test_program_with_multiple_functions(self): + """ + Basic creation of a program with multiple functions + """ + @mb.function(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def func_1(x): + return x + + @mb.function(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def func_2(x): + return x + + @mb.function(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def func_3(x): + return x + + prog = Program() + prog.add_function("func_1", func_1) + prog.add_function("func_2", func_2) + prog.add_function("func_3", func_3) + + assert set(prog.functions.keys()) == set(["func_1", "func_2", "func_3"]) + + def test_error_out_incompatible_functions(self): + """ + ``add_function`` should error out when a function with different + opset is added to a program. + """ + @mb.function(input_specs=[mb.TensorSpec(shape=(2, 4))], opset_version=ct.target.iOS13) + def func_1(x): + return x + + @mb.function(input_specs=[mb.TensorSpec(shape=(2, 4))], opset_version=ct.target.iOS17) + def func_2(x): + return x + + err_msg = "all functions must have the same opset_version." + + prog = Program() + prog.add_function("func_1", func_1) + with pytest.raises(ValueError, match=err_msg): + prog.add_function("func_2", func_2) + + prog = Program() + prog.add_function("func_2", func_2) + with pytest.raises(ValueError, match=err_msg): + prog.add_function("func_1", func_1) + + +class TestMILBasic: + """ + Test the basic error handling / validation in pymil. + """ @staticmethod def test_type_domain_validation(): ''' @@ -320,6 +426,49 @@ def prog(x): res = mb.rsqrt(x=x, epsilon=1) return res + @staticmethod + def test_get_dialect_namespaces(): + """ + Test we can get a dict of dialect namespaces in the program. + """ + # The pymil program is mixed of torch / complex dialect opset + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 2, 3, 4), dtype=types.fp32)]) + def prog(x): + real_data = mb.torch_upsample_nearest_neighbor( + x=x, output_height=10, output_width=5, name="op_1" + ) + imag_data = mb.add(x=real_data, y=8.9, name="op_2") + return mb.complex(real_data=real_data, imag_data=imag_data, name="op_3") + + dialect_namespaces = prog._get_dialect_namespaces() + assert len(dialect_namespaces["torch"]) == 1 + assert dialect_namespaces["torch"][0].name == "op_1" + assert len(dialect_namespaces["complex"]) == 1 + assert dialect_namespaces["complex"][0].name == "op_3" + + # The pymil program with only core ops returns an empty dict + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 2, 3, 4), dtype=types.fp32)]) + def prog(x): + return mb.add(x=x, y=8.9) + + assert len(prog._get_dialect_namespaces()) == 0 + + @staticmethod + def test_invalid_dialect_namespaces_error_out(): + """ + The converter should early error out if dialect opset is detected in the pymil program. + """ + # The pymil program of torch dialect opset cannot be lowered to backend + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 2, 3, 4), dtype=types.fp32)]) + def prog(x): + return mb.torch_upsample_nearest_neighbor( + x=x, output_height=10, output_width=5, name="op_1" + ) + + expected_err_str = 'Core ML only support core opset. Got unsupported op "op_1" with type "torch_upsample_nearest_neighbor" of dialect namespace "torch".' + with pytest.raises(ValueError, match=expected_err_str): + ct.convert(prog, convert_to="mlprogram", pass_pipeline=ct.PassPipeline.EMPTY) + @staticmethod def test_rank6_tensor_early_error_out(): ''' diff --git a/coremltools/converters/mil/testing_utils.py b/coremltools/converters/mil/testing_utils.py index 18f2d8865..1ed4c66c5 100644 --- a/coremltools/converters/mil/testing_utils.py +++ b/coremltools/converters/mil/testing_utils.py @@ -184,6 +184,13 @@ def assert_same_input_names(prog1, prog2, func_name="main"): assert prog1_input_names == prog2_input_names +def assert_numerical_value(mil_var, expected_value): + if mil_var is None: + assert expected_value is None + else: + np.testing.assert_allclose(mil_var.val, expected_value) + + def assert_same_input_types(prog1, prog2, func_name="main"): prog1_input_types = [x.dtype for x in list(prog1[func_name].inputs.values())] prog2_input_types = [x.dtype for x in list(prog2[func_name].inputs.values())] diff --git a/coremltools/models/__init__.py b/coremltools/models/__init__.py index 56c47c3db..698652f07 100644 --- a/coremltools/models/__init__.py +++ b/coremltools/models/__init__.py @@ -31,6 +31,7 @@ _QUANTIZATION_MODE_DEQUANTIZE, _METADATA_VERSION, _METADATA_SOURCE, + _METADATA_SOURCE_DIALECT, ) from . import neural_network diff --git a/coremltools/models/model.py b/coremltools/models/model.py index e60e5f29a..9d409f530 100644 --- a/coremltools/models/model.py +++ b/coremltools/models/model.py @@ -93,7 +93,7 @@ _METADATA_VERSION = "com.github.apple.coremltools.version" _METADATA_SOURCE = "com.github.apple.coremltools.source" - +_METADATA_SOURCE_DIALECT = "com.github.apple.coremltools.source_dialect" class _FeatureDescription: diff --git a/coremltools/optimize/coreml/_config.py b/coremltools/optimize/coreml/_config.py index edf8e2272..126c6ec1f 100644 --- a/coremltools/optimize/coreml/_config.py +++ b/coremltools/optimize/coreml/_config.py @@ -529,6 +529,7 @@ def lut_function(weight): weight_threshold: Optional[int] = field(default=2048, validator=validators.optional([validators.instance_of(int), _check_weight_threshold])) _WEIGHT_PALETTIZATION_MODES = ("KMEANS", "UNIFORM", "UNIQUE", "CUSTOM") + _VALID_NBITS = (1, 2, 4, 6, 8) @nbits.validator def check_nbits(self, attr, nbits): @@ -540,9 +541,9 @@ def check_nbits(self, attr, nbits): if nbits is not None and mode in ("UNIQUE", "CUSTOM"): raise ValueError(f"\"nbits\" must NOT be provided for {self.mode} mode") - if nbits is not None and nbits not in [1, 2, 4, 6, 8]: + if nbits is not None and nbits not in self._VALID_NBITS: raise ValueError( - f"Invalid value of \"nbits\" ({nbits}) for palettization. Supported \"nbits\" are {{1, 2, 4, 6, 8}}" + f'Invalid value of "nbits" ({nbits}) for palettization. Supported "nbits" are {self._VALID_NBITS}' ) @mode.validator diff --git a/coremltools/optimize/coreml/_quantization_passes.py b/coremltools/optimize/coreml/_quantization_passes.py index 79a1cc520..fba517d78 100644 --- a/coremltools/optimize/coreml/_quantization_passes.py +++ b/coremltools/optimize/coreml/_quantization_passes.py @@ -3,12 +3,14 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause +from typing import Callable, Optional, Tuple + import numpy as np from tqdm import tqdm from coremltools import _logger as logger -from coremltools.converters.mil.backend.mil.load import should_use_weight_file from coremltools.converters.mil._deployment_compatibility import AvailableTarget +from coremltools.converters.mil.backend.mil.load import should_use_weight_file from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import Operation, Program, types from coremltools.converters.mil.mil.block import is_current_opset_version_compatible_with @@ -166,6 +168,31 @@ def get_supported_types_as_str(supported_type): supported_type_str = get_supported_types_as_str(self._SUPPORTED_CONFIG_TYPE) raise ValueError(f"{self.__class__.__name__} only accept {supported_type_str} type config. Got {config.__class__.__name__}.") + @staticmethod + def pick_channnel_axis(op: Operation) -> int: + """ + By default, output channel is used as the channel axis. Here are some representative ops: + - linear: [D_out, D_in] + - matmul's y: [..., D_in, D_out] if transpose_y is False, else [..., D_out, D_in] + - conv: [C_out, C_in_div_group, KH, KW] + - conv_transpose: [C_in, C_out_div_group, KH, KW] + + So the channel axis picking criterial is: + - For conv_transpose it's 1 + - For matmul's y it's -1 (transpose_y=False) or -2 (transpose_y=True) + - For all other ops, it's 0 + """ + channel_axis = 0 + var = op.outputs[0] + if len(var.child_ops) == 1: + child_op = var.child_ops[0] + if child_op.op_type == "conv_transpose": + channel_axis = 1 + if child_op.op_type == "matmul" and child_op.y == var: + channel_axis = -1 if child_op.transpose_y else -2 + return channel_axis + + @register_pass(namespace="compression") class prune_weights(AbstractCompressionPass): """ @@ -424,6 +451,7 @@ class palettize_weights(AbstractCompressionPass): - Old ``const`` op is replaced by a newly created operation. """ _SUPPORTED_CONFIG_TYPE = OpPalettizerConfig + _SUPPORTED_NBITS = (1, 2, 4, 6, 8) def is_valid_op(self, op: Operation): if op.op_type == "const" and should_use_weight_file(op.outputs[0].val): @@ -431,8 +459,19 @@ def is_valid_op(self, op: Operation): return False @staticmethod - def compress(val, mode, nbits=None, lut_function=None): + def _get_nbits_for_unique_mode(val: np.ndarray, allowed_nbits: Tuple[int, ...]) -> int: + val = val.flatten() + unique_vals = np.unique(val).tolist() + for nbits in allowed_nbits: + if len(unique_vals) <= 1 << nbits: + return nbits + raise ValueError("Unique values in weight cannot be represented by 8 bits palettization.") + @staticmethod + def _get_lut_and_indices( + val: np.ndarray, mode: str, nbits: Optional[int], lut_function: Optional[Callable] + ) -> Tuple[np.ndarray, np.ndarray]: + """Calculate look-up-table (LUT) and indices.""" def compress_kmeans(val, nbits): lut, indices = _get_kmeans_lookup_table_and_weight(nbits, val) lut = lut.astype(val.dtype) @@ -451,16 +490,6 @@ def compress_uniform(val, nbits): lut = lut.astype(val.dtype) return lut, indices - def get_nbits_for_unique_mode(val): - val = val.flatten() - unique_vals = np.unique(val).tolist() - for nbits in (1, 2, 4, 6, 8): - if len(unique_vals) <= 1 << nbits: - return nbits - msg = "weight value cannot be represented in an 8 bits palettization. Skipped." - logger.warning(msg) - return None - def compress_unique(val, nbits): val = val.flatten() unique_vals = np.unique(val).tolist() @@ -483,6 +512,25 @@ def compress_unique(val, nbits): indices = indices.astype(np.uint8) return lut, indices + if mode == "KMEANS": + lut, indices = compress_kmeans(val, nbits) + elif mode == "UNIFORM": + lut, indices = compress_uniform(val, nbits) + elif mode == "UNIQUE": + if nbits is None: + nbits = palettize_weights._get_nbits_for_unique_mode( + val, palettize_weights._SUPPORTED_NBITS + ) + lut, indices = compress_unique(val, nbits) + else: + if mode != "CUSTOM": + raise AssertionError(f"Invalid mode {mode}") + lut, indices = lut_function(val) + + return lut, indices + + @staticmethod + def compress(val, mode, nbits=None, lut_function=None) -> LutParams: def check_lut_parameters_are_valid(val, lut, indices): if not isinstance(lut, np.ndarray) or not isinstance(indices, np.ndarray): raise ValueError("LUT and indices must be type of numpy array.") @@ -508,17 +556,7 @@ def check_lut_parameters_are_valid(val, lut, indices): if not isinstance(val, (np.ndarray, np.generic)): raise ValueError(f"Only numpy arrays are supported. Got {type(val)}") - if mode == "KMEANS": - lut, indices = compress_kmeans(val, nbits) - elif mode == "UNIFORM": - lut, indices = compress_uniform(val, nbits) - elif mode == "UNIQUE": - nbits = get_nbits_for_unique_mode(val) - if nbits is None: - return None - lut, indices = compress_unique(val, nbits) - elif mode == "CUSTOM": - lut, indices = lut_function(val) + lut, indices = palettize_weights._get_lut_and_indices(val, mode, nbits, lut_function) check_lut_parameters_are_valid(val, lut, indices) @@ -541,6 +579,15 @@ def transform_op(self, op: Operation): if not self.need_compress_const(op, self.config._is_deprecated, op_config.weight_threshold): return + if op_config.mode == "UNIQUE": + try: + palettize_weights._get_nbits_for_unique_mode( + op.outputs[0].val, self._SUPPORTED_NBITS + ) + except ValueError as e: + logger.warning(f"Skip op {op.name} for palettization, because {e}") + return + lut_params = self.compress( op.outputs[0].val, op_config.mode, @@ -548,9 +595,6 @@ def transform_op(self, op: Operation): op_config.lut_function ) - if lut_params is None: - return - if not self.fake_compression: new_var = mb.constexpr_lut_to_dense( indices=lut_params.indices, @@ -591,46 +635,28 @@ class linear_quantize_weights(AbstractCompressionPass): - If ``fake_compression=True``, compressed value is decompressed and then encoded using the ``const`` op. """ _SUPPORTED_CONFIG_TYPE = OpLinearQuantizerConfig + _MODE_DTYPE_TO_RANGE = { + (types.int8, "LINEAR"): (-128, 127), + (types.int8, "LINEAR_SYMMETRIC"): (-127, 127), + (types.uint8, "LINEAR"): (0, 255), + (types.uint8, "LINEAR_SYMMETRIC"): (0, 254), + } def is_valid_op(self, op: Operation): if op.op_type == "const" and should_use_weight_file(op.outputs[0].val): return True return False - @staticmethod - def _get_axis(op): - axis = 0 - var = op.outputs[0] - if len(var.child_ops) == 1 and var.child_ops[0].op_type == "conv_transpose": - axis = 1 - return axis + @classmethod + def _get_quantized_data( + cls, original_data: np.ndarray, axes: Tuple[int, ...], mode: str, dtype: type + ) -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]: + """Get quantized data along with metadata (scale, zero_point).""" + if not np.issubdtype(original_data.dtype, np.floating): + raise ValueError("Only floating numpy arrays are supported.") - @staticmethod - def compress(val, axis, mode, dtype): - def _ensure_numerical_range_and_cast(val, low, high, np_dtype): - ''' - For some cases, the computed quantized data might exceed the data range. - For instance, after rounding and addition, we might get `128` for the int8 quantization. - This utility function ensures the val in the data range before doing the cast. - ''' - val = np.minimum(val, high) - val = np.maximum(val, low) - return val.astype(np_dtype) - - mode_dtype_to_range = { - (types.int8, "LINEAR"): (-128, 127), - (types.int8, "LINEAR_SYMMETRIC"): (-127, 127), - (types.uint8, "LINEAR"): (0, 255), - (types.uint8, "LINEAR_SYMMETRIC"): (0, 254), - } - - if not isinstance(val, (np.ndarray, np.generic)): - raise ValueError("Only numpy arrays are supported") - - params = AffineQuantParams() - axes = tuple([i for i in range(len(val.shape)) if i != axis]) - val_min = np.amin(val, axis=axes, keepdims=True) - val_max = np.amax(val, axis=axes, keepdims=True) + val_min = np.amin(original_data, axis=axes, keepdims=True) + val_max = np.amax(original_data, axis=axes, keepdims=True) if mode == "LINEAR_SYMMETRIC": # For the linear_symmetric mode, the range is symmetrical to 0 @@ -643,39 +669,42 @@ def _ensure_numerical_range_and_cast(val, low, high, np_dtype): val_min = np.minimum(0.0, val_min) val_max = np.maximum(0.0, val_max) - q_val_min, q_val_max = mode_dtype_to_range[(dtype, mode)] - - # Set the zero point to symmetric mode + q_val_min, q_val_max = cls._MODE_DTYPE_TO_RANGE[(dtype, mode)] np_dtype = nptype_from_builtin(dtype) + zero_point = None if mode == "LINEAR_SYMMETRIC": - if dtype == types.int8: - params.zero_point = (0 * np.ones(val_min.shape)).astype(np.int8) - else: - assert dtype == types.uint8 - params.zero_point = (127 * np.ones(val_min.shape)).astype(np.uint8) + if dtype.is_unsigned(): + zero_point_shift = q_val_max // 2 + zero_point = zero_point_shift * np.ones(val_min.shape) else: assert mode == "LINEAR" - params.zero_point = (q_val_min * val_max - q_val_max * val_min) / (val_max - val_min) - params.zero_point = np.round(params.zero_point) - params.zero_point = _ensure_numerical_range_and_cast(params.zero_point, q_val_min, q_val_max, np_dtype) - - # compute the params - params.scale = (val_max - val_min) / (q_val_max - q_val_min) - params.scale = params.scale.astype(val.dtype).squeeze() - - params.quantized_data = np.round( - val * (q_val_max - q_val_min) / (val_max - val_min) - ) - params.quantized_data = (params.quantized_data + params.zero_point) - params.quantized_data = _ensure_numerical_range_and_cast(params.quantized_data, q_val_min, q_val_max, np_dtype) - - params.zero_point = params.zero_point.squeeze() - params.axis = axis - - return params + zero_point = (q_val_min * val_max - q_val_max * val_min) / (val_max - val_min) + zero_point = np.round(zero_point) + zero_point = np.clip(zero_point, q_val_min, q_val_max) + + scale = (val_max - val_min) / (q_val_max - q_val_min) + quantized_data = np.round(original_data / scale) + if zero_point is not None: + quantized_data += zero_point + zero_point = zero_point.squeeze().astype(np_dtype) + quantized_data = np.clip(quantized_data, q_val_min, q_val_max).astype(np_dtype) + scale = scale.astype(original_data.dtype).squeeze() + + return quantized_data, scale, zero_point + + @classmethod + def compress(cls, val: np.ndarray, axis: int, mode: str, dtype: type) -> AffineQuantParams: + if not isinstance(val, (np.ndarray, np.generic)): + raise ValueError("Only numpy arrays are supported") + axes = tuple([i for i in range(len(val.shape)) if i != axis]) + quantized_data, scale, zero_point = cls._get_quantized_data(val, axes, mode, dtype) + if zero_point is None: + # The iOS16 constexpr_affine_dequantize op requires zero_point. + zero_point = np.zeros_like(scale).astype(quantized_data.dtype) + return AffineQuantParams(quantized_data, zero_point, scale, axis) @staticmethod - def decompress(params): + def decompress(params: AffineQuantParams) -> np.ndarray: if not isinstance(params, AffineQuantParams): raise ValueError("Invalid type of params") return constexpr_affine_dequantize.decompress( @@ -689,7 +718,9 @@ def transform_op(self, op: Operation): if not self.need_compress_const(op, self.config._is_deprecated, op_config.weight_threshold): return - quant_params = self.compress(op.outputs[0].val, self._get_axis(op), op_config.mode, op_config.dtype) + quant_params = self.compress( + op.outputs[0].val, self.pick_channnel_axis(op), op_config.mode, op_config.dtype + ) if not self.fake_compression: new_var = mb.constexpr_affine_dequantize( diff --git a/coremltools/optimize/torch/pruning/magnitude_pruner.py b/coremltools/optimize/torch/pruning/magnitude_pruner.py index d837fc635..6d68eb531 100644 --- a/coremltools/optimize/torch/pruning/magnitude_pruner.py +++ b/coremltools/optimize/torch/pruning/magnitude_pruner.py @@ -304,7 +304,7 @@ def __attrs_post_init__(self): if self.initial_sparsity is not None and self.initial_sparsity > 0.0: raise ValueError( f"Received initial_sparsity = {self.initial_sparsity} and " - f"n_m_ratio = {self.n_m_ratio}. When n_m_ratio != None, the only allowed " + f"n_m_ratio = {self.nm_ratio}. When n_m_ratio != None, the only allowed " f"value of initial_sparsity is 0." ) diff --git a/coremltools/test/ml_program/test_compression.py b/coremltools/test/ml_program/test_compression.py index c08899458..7452c1710 100644 --- a/coremltools/test/ml_program/test_compression.py +++ b/coremltools/test/ml_program/test_compression.py @@ -3,20 +3,36 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause +from typing import Optional + import numpy as np import torch import coremltools as ct +from coremltools.converters.mil.testing_utils import get_op_types_in_program from coremltools.models.ml_program.compression_utils import ( affine_quantize_weights, decompress_weights, palettize_weights, sparsify_weights, ) -from coremltools.converters.mil.testing_utils import get_op_types_in_program +from coremltools.optimize.coreml._config import OpCompressorConfig + +def get_test_model_and_data( + multi_layer: bool = False, quantize_config: Optional[OpCompressorConfig] = None +): + """ + Prepare test model and data. + + :param multi_layer: If set, the test model will have multiple `nn.Conv2d` layers. + :param quantize_config: If set, the weights in the test model will be nbits quantization-friendly, + which means it will be first quantized according to the config, and then dequantized, so the + numerical error introduced during the quantization test will be minimum. + """ + if quantize_config is not None and multi_layer: + raise AssertionError("Multi-layer model doesn't support pre_quantize_nbits.") -def get_test_model_and_data(multi_layer=False): inputs = [ct.TensorType(name="data", shape=(1, 64, 10, 10))] torch_input_values = [torch.rand(*i.shape.to_list()) for i in inputs] coreml_input_values = { @@ -37,6 +53,25 @@ def forward(self, x): model = Model().eval() else: model = torch.nn.Conv2d(in_channels=64, out_channels=32, kernel_size=2) + if quantize_config is not None: + # Manually change weight to make it quantization friendly. + nbits_range_max = 2 ** (quantize_config.nbits - 1) - 1 + mode_to_range = { + "LINEAR": (-nbits_range_max - 1, nbits_range_max), + "LINEAR_SYMMETRIC": (-nbits_range_max, nbits_range_max), + } + q_val_min, q_val_max = mode_to_range[quantize_config.mode] + original_shape = model.weight.detach().numpy().shape + fake_scale = 2.0 + quantize_friendly_weight = ( + np.random.randint(low=q_val_min, high=q_val_max + 1, size=original_shape) + * fake_scale + ) + with torch.no_grad(): + model.weight = torch.nn.Parameter( + torch.from_numpy(quantize_friendly_weight).float() + ) + model = model.eval() return model, inputs, torch_input_values, coreml_input_values diff --git a/coremltools/test/neural_network/test_numpy_nn_layers.py b/coremltools/test/neural_network/test_numpy_nn_layers.py index 404d60f97..134a4a515 100644 --- a/coremltools/test/neural_network/test_numpy_nn_layers.py +++ b/coremltools/test/neural_network/test_numpy_nn_layers.py @@ -6400,6 +6400,7 @@ def _test_pool3d_single_case( input_shape=shape[2:], strides=stride, ) + total_paddings = list(total_paddings) total_paddings.reverse() for p in total_paddings: before = int(math.floor(float(p) / 2.0)) diff --git a/coremltools/test/neural_network/test_tf_numeric.py b/coremltools/test/neural_network/test_tf_numeric.py index 3fcdca7e5..2899aca77 100644 --- a/coremltools/test/neural_network/test_tf_numeric.py +++ b/coremltools/test/neural_network/test_tf_numeric.py @@ -165,6 +165,11 @@ def test_data_reorganize_cpu_only(self): self.test_data_reorganize(cpu_only=True) def test_depthwise_conv(self, cpu_only=False): + if not cpu_only: + pytest.xfail( + "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" + ) + def get_coreml_model_depthwise(X, params, w): eval = True mlmodel = None diff --git a/coremltools/test/optimize/coreml/test_post_training_quantization.py b/coremltools/test/optimize/coreml/test_post_training_quantization.py index 7fb842bfb..3d0c17a57 100644 --- a/coremltools/test/optimize/coreml/test_post_training_quantization.py +++ b/coremltools/test/optimize/coreml/test_post_training_quantization.py @@ -98,10 +98,9 @@ def create_unique_weight(weight, nbits): size = weight.detach().numpy().size unique_number = 1 << nbits - weight = [] - partition_len = size // unique_number + 1 - for i in range(unique_number): - weight += [i] * (partition_len) + weight = list(range(unique_number)) + if size > unique_number: + weight.extend([unique_number - 1] * (size - unique_number)) weight = np.reshape(np.array(weight[:size]).astype(np.float32), shape) return weight @@ -324,7 +323,7 @@ def test_weight_palettization_unique_case_2(self, caplog): # validate parameters # converter should warn the user that one weight is not compressed mlmodel_palettized = palettize_weights(mlmodel, mode="unique") - warning_msg = "weight value cannot be represented in an 8 bits palettization. Skipped." + warning_msg = "Unique values in weight cannot be represented by 8 bits palettization." assert any([warning_msg in rec.message for rec in caplog.records]) expected_ops = ['constexpr_lut_to_dense', 'cast', 'conv', 'conv', 'cast'] diff --git a/coremltools/version.py b/coremltools/version.py index 27e3a7666..c21924b8c 100644 --- a/coremltools/version.py +++ b/coremltools/version.py @@ -4,4 +4,4 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause -__version__ = "7.0" # VERSION_STRING +__version__ = "7.1" # VERSION_STRING diff --git a/reqs/test.pip b/reqs/test.pip index e90870439..784ce6769 100644 --- a/reqs/test.pip +++ b/reqs/test.pip @@ -24,9 +24,9 @@ scipy==1.9.2; python_version == '3.11' six sympy > 1.6 gast==0.4.0 -torch==2.0.1 -torchaudio==2.0.2 -torchvision==0.15.2 +torch==2.1.0 +torchaudio==2.1.0 +torchvision==0.16.0 xgboost==1.4.2; platform_machine != "arm64" mock wrapt diff --git a/scripts/build.sh b/scripts/build.sh index f74c15eab..43e1059b7 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -91,7 +91,7 @@ cd ${BUILD_DIR} ADDITIONAL_CMAKE_OPTIONS="" if [[ "$OSTYPE" == "darwin"* ]]; then NUM_PROCS=$(sysctl -n hw.ncpu) - ADDITIONAL_CMAKE_OPTIONS="-DCMAKE_OSX_DEPLOYMENT_TARGET=10.15" + ADDITIONAL_CMAKE_OPTIONS="-DCMAKE_OSX_DEPLOYMENT_TARGET=12.3" else NUM_PROCS=$(nproc) fi