diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index 9c7b1cf8b5cb1f..31e4ec4358f494 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -87,7 +87,6 @@ jobs: git submodule update --init -- ${OPENVINO_REPO}/thirdparty/json git submodule update --init -- ${OPENVINO_REPO}/thirdparty/gtest git submodule update --init -- ${OPENVINO_REPO}/thirdparty/gflags - git submodule update --init -- ${OPENVINO_REPO}/thirdparty/open_model_zoo popd - name: Clone vcpkg diff --git a/.github/workflows/job_debian_packages.yml b/.github/workflows/job_debian_packages.yml index 1124963034a496..b9c7823a191940 100644 --- a/.github/workflows/job_debian_packages.yml +++ b/.github/workflows/job_debian_packages.yml @@ -43,7 +43,7 @@ jobs: - name: Install debian packages & check conflicts run: | apt-get update -y - + if [[ "${{ runner.arch }}" == "X64" ]]; then # Install debian packages from previous release apt-get install --no-install-recommends -y gnupg wget ca-certificates @@ -66,21 +66,26 @@ jobs: run: | /usr/share/openvino/samples/cpp/build_samples.sh /usr/share/openvino/samples/c/build_samples.sh - + [[ "${{ runner.arch }}" == "X64" ]] && path_by_arch="intel64" || path_by_arch="aarch64" ~/openvino_cpp_samples_build/$path_by_arch/Release/hello_query_device - + + # check integrity of OpenVINO Python API installation + apt-get install python3-pip -y + python3 -m pip check + python3 /usr/share/openvino/samples/python/hello_query_device/hello_query_device.py python3 -c 'from openvino import Core; Core().get_property("CPU", "AVAILABLE_DEVICES")' - + if [[ "${{ runner.arch }}" == "X64" ]]; then python3 -c 'from openvino import Core; Core().get_property("GPU", "AVAILABLE_DEVICES")' fi - + python3 -c 'from openvino import Core; Core().get_property("AUTO", "SUPPORTED_PROPERTIES")' python3 -c 'from openvino import Core; Core().get_property("MULTI", "SUPPORTED_PROPERTIES")' python3 -c 'from openvino import Core; Core().get_property("HETERO", "SUPPORTED_PROPERTIES")' python3 -c 'from openvino import Core; Core().get_property("BATCH", "SUPPORTED_PROPERTIES")' python3 -c 'from openvino.frontend import FrontEndManager; assert len(FrontEndManager().get_available_front_ends()) == 6' benchmark_app --help + opt_in_out --help ovc --help diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index b488f0a63730bf..aaad511e088f94 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -259,8 +259,6 @@ jobs: - name: TensorFlow 1 Layer Tests - TF FE if: fromJSON(inputs.affected-components).TF_FE.test run: | - # requires 'unit_tests' from 'mo' - export PYTHONPATH=${INSTALL_TEST_DIR}/mo python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow_tests/ -m precommit -n logical --junitxml=${INSTALL_TEST_DIR}/TEST-tf_fe.xml env: TEST_DEVICE: CPU @@ -269,8 +267,6 @@ jobs: - name: TensorFlow 2 Layer Tests - TF FE if: fromJSON(inputs.affected-components).TF_FE.test && runner.os != 'macOS' # Ticket: 123322 run: | - # requires 'unit_tests' from 'mo' - export PYTHONPATH=${INSTALL_TEST_DIR}/mo python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow2_keras_tests/ -n logical -m precommit_tf_fe --junitxml=${INSTALL_TEST_DIR}/TEST-tf2_fe.xml env: TEST_DEVICE: CPU diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 61f46dca78cac5..b29e2801572865 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -760,16 +760,114 @@ jobs: path: ${{ env.EXTENSION_BUILD_DIR }}/*.whl if-no-files-found: 'error' - GPU_Stub: + GPU: + name: GPU ${{ matrix.TEST_TYPE }} Tests needs: [ Build, Smart_CI ] - runs-on: ubuntu-latest if: fromJSON(needs.smart_ci.outputs.affected_components).GPU + timeout-minutes: 80 + runs-on: [ self-hosted, gpu ] + strategy: + max-parallel: 2 + fail-fast: false + matrix: + TEST_TYPE: ['unit', 'func'] + container: + image: ubuntu:20.04 + options: --device /dev/dri:/dev/dri --group-add 109 --group-add 44 + volumes: + - /dev/dri:/dev/dri + defaults: + run: + shell: bash + env: + DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + GTEST_PARALLEL_SCRIPT: ${{ github.workspace }}/gtest_parallel.py steps: - - name: GPU stub + - name: Download OpenVINO package + uses: actions/download-artifact@v4 + with: + name: 'openvino_package' + path: ${{ env.INSTALL_DIR }} + + - name: Download OpenVINO tests package + uses: actions/download-artifact@v4 + with: + name: 'openvino_tests' + path: ${{ env.INSTALL_TEST_DIR }} + + # Needed as ${{ github.workspace }} is not working correctly when using Docker + - name: Setup Variables run: | - echo "This is only a stub to collect statistics of GPU runs filtered by Smart CI. - It will help us to estimate hardware requirements" - shell: bash + echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" + echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" + echo "GTEST_PARALLEL_SCRIPT=$GITHUB_WORKSPACE/gtest_parallel.py" >> "$GITHUB_ENV" + + - name: Extract OpenVINO packages + run: | + pushd $INSTALL_DIR + tar -xzf openvino_package.tar.gz -C $INSTALL_DIR + popd + pushd $INSTALL_TEST_DIR + tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR + popd + + - name: Install dependencies (Linux) + run: | + $INSTALL_DIR/install_dependencies/install_openvino_dependencies.sh -c=core -c=dev -c=gpu -y + + apt-get update && apt-get install -y wget software-properties-common ca-certificates gpg-agent tzdata + env: + DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input + TZ: "Europe/London" # to prevent tzdata from waiting user input + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Get gtest-parallel script + run: wget https://raw.githubusercontent.com/google/gtest-parallel/master/gtest_parallel.py + + - name: Install GPU Drivers + run: | + wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.15985.7/intel-igc-core_1.0.15985.7_amd64.deb + wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.15985.7/intel-igc-opencl_1.0.15985.7_amd64.deb + wget https://github.com/intel/compute-runtime/releases/download/24.05.28454.6/intel-level-zero-gpu-dbgsym_1.3.28454.6_amd64.ddeb + wget https://github.com/intel/compute-runtime/releases/download/24.05.28454.6/intel-level-zero-gpu_1.3.28454.6_amd64.deb + wget https://github.com/intel/compute-runtime/releases/download/24.05.28454.6/intel-opencl-icd-dbgsym_24.05.28454.6_amd64.ddeb + wget https://github.com/intel/compute-runtime/releases/download/24.05.28454.6/intel-opencl-icd_24.05.28454.6_amd64.deb + wget https://github.com/intel/compute-runtime/releases/download/24.05.28454.6/libigdgmm12_22.3.11_amd64.deb + dpkg -i *.deb + + # + # Tests + # + + - name: OpenVINO GPU ${{ matrix.TEST_TYPE }} Tests + run: | + source ${INSTALL_DIR}/setupvars.sh + + rm -rf ${INSTALL_TEST_DIR}/gpu_${{ matrix.TEST_TYPE }}_tests && mkdir -p ${INSTALL_TEST_DIR}/gpu_${{ matrix.TEST_TYPE }}_tests + + test_filter='' + if [[ "${{ matrix.TEST_TYPE }}" == "unit" ]]; then + # Ticket: 138018 + test_filter='-*scatter_nd_update_gpu.dynamic_padded_output*:*border_gpu.basic_zero_input*:*bicubic_zeros_no_align_data1x1*:*bicubic_border_align_batches*:*bilinear_zeros_no_align_data1x1*:*non_zero_gpu.empty_input*:*mark_shape_of_subgraphs.concat_with_empty_tensor_inputs*:*concat_cpu_impl.dynamic_4d_f*:*border_gpu.basic_zero_input_dynamic*:*network_test.model_with_empty_input_is_not_dynamic*:*bicubic_zeros_align_data1x1*' + else + test_filter='*smoke*' + fi + python3 ${GTEST_PARALLEL_SCRIPT} ${INSTALL_TEST_DIR}/ov_gpu_${{ matrix.TEST_TYPE }}_tests --dump_json_test_results=${INSTALL_TEST_DIR}/gpu_${{ matrix.TEST_TYPE }}_tests/ov_gpu_${{ matrix.TEST_TYPE }}_tests.json -- --report_unique_name --gtest_filter=$test_filter + + + - name: Upload Test Results + uses: actions/upload-artifact@v4 + if: always() + with: + name: test-results-${{ matrix.TEST_TYPE }}-gpu + path: ${{ env.INSTALL_TEST_DIR }}/gpu_${{ matrix.TEST_TYPE }}_tests + if-no-files-found: 'error' Overall_Status: name: ci/gha_overall_status diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 3e0e13d21a7efb..eee8dc9c5e57ad 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -125,6 +125,9 @@ jobs: update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 30 update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 30 + # For building the latest h5py + apt install --assume-yes --no-install-recommends libhdf5-dev + - name: Install sccache uses: mozilla-actions/sccache-action@v0.0.4 with: diff --git a/.github/workflows/linux_riscv.yml b/.github/workflows/linux_riscv.yml index 90a1e7c9480822..cb7bd0c8b0251f 100644 --- a/.github/workflows/linux_riscv.yml +++ b/.github/workflows/linux_riscv.yml @@ -107,6 +107,7 @@ jobs: git submodule update --init -- ${OPENVINO_REPO}/thirdparty/json git submodule update --init -- ${OPENVINO_REPO}/thirdparty/gtest git submodule update --init -- ${OPENVINO_REPO}/thirdparty/gflags + git submodule update --init -- ${OPENVINO_REPO}/thirdparty/telemetry git submodule update --init -- ${OPENVINO_REPO}/src/plugins/intel_cpu git submodule update --init -- ${OPENVINO_REPO}/thirdparty/open_model_zoo popd diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index becd5bf6087f53..6ced43ed2cc826 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -617,8 +617,6 @@ jobs: if: fromJSON(needs.smart_ci.outputs.affected_components).TF_FE.test shell: cmd run: | - :: requires 'unit_tests' from 'tools/mo' - set PYTHONPATH=${{ env.INSTALL_TEST_DIR }}\mo;%PYTHONPATH% python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_tests/ -n logical -m precommit --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf_fe.xml env: TEST_DEVICE: CPU @@ -628,9 +626,6 @@ jobs: if: fromJSON(needs.smart_ci.outputs.affected_components).TF_FE.test shell: cmd run: | - :: requires 'unit_tests' from 'tools/mo' - set PYTHONPATH=${{ env.INSTALL_TEST_DIR }}\mo;%PYTHONPATH% - python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow2_keras_tests/ -m precommit_tf_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf2_fe.xml env: TEST_DEVICE: CPU diff --git a/.gitmodules b/.gitmodules index b483bfe8e9e171..c8bf9375572131 100644 --- a/.gitmodules +++ b/.gitmodules @@ -78,3 +78,6 @@ [submodule "src/plugins/intel_npu/thirdparty/level-zero-ext"] path = src/plugins/intel_npu/thirdparty/level-zero-ext url = https://github.com/intel/level-zero-npu-extensions.git +[submodule "thirdparty/telemetry"] + path = thirdparty/telemetry + url = https://github.com/openvinotoolkit/telemetry.git diff --git a/cmake/packaging/debian.cmake b/cmake/packaging/debian.cmake index 3fb78283d76773..49b489da23799d 100644 --- a/cmake/packaging/debian.cmake +++ b/cmake/packaging/debian.cmake @@ -53,6 +53,8 @@ macro(ov_cpack_settings) (NOT item MATCHES "^${OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE}_python.*" OR ENABLE_PYTHON_PACKAGING) AND # temporary block nvidia NOT item STREQUAL "nvidia" AND + # don't install node_addon + NOT item MATCHES "node_addon" AND # don't install Intel OpenMP NOT item STREQUAL "omp" AND # the same for pugixml diff --git a/cmake/packaging/rpm.cmake b/cmake/packaging/rpm.cmake index b3863f68aa173f..3f88d22ad7616c 100644 --- a/cmake/packaging/rpm.cmake +++ b/cmake/packaging/rpm.cmake @@ -39,6 +39,8 @@ macro(ov_cpack_settings) (NOT item MATCHES "^${OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE}_python.*" OR ENABLE_PYTHON_PACKAGING) AND # temporary block nvidia NOT item STREQUAL "nvidia" AND + # don't install node_addon + NOT item MATCHES "node_addon" AND # temporary block npu NOT item STREQUAL "npu" AND # don't install Intel OpenMP diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-yolo.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-yolo.rst index 67c175c25bde21..10946ce84100da 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-yolo.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-tensorflow-yolo.rst @@ -58,12 +58,19 @@ This section explains how to convert the YOLOv4 Keras model from the `repository python keras-YOLOv3-model-set/tools/model_converter/convert.py /yolov4-tiny.cfg /yolov4-tiny.weights -4. Run model conversion for from the TensorFlow 2 format to an IR: +4. Run model conversion from the TensorFlow 2 to an IR format: .. note:: Before you run the conversion, make sure you have installed all the model conversion API dependencies for TensorFlow 2. + If you get errors, you may need to add the additional step to divide the input by 255: + + .. code-block:: sh + + --scale_values=image_input[255] + + .. code-block:: sh mo --saved_model_dir yolov4 --output_dir models/IRs --input_shape [1,608,608,3] --model_name yolov4 diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst index b06528d3b27dea..8f8f66a2e51d8a 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst @@ -169,6 +169,7 @@ Table of Contents * :doc:`RNNCell <../operation-specs/sequence/rnn-cell-3>` * :doc:`RNNSequence <../operation-specs/sequence/rnn-sequence-5>` * :doc:`ROIAlign <../operation-specs/detection/roi-align-9>` +* :doc:`ROIAlignRotated <../operation-specs/detection/roi-align-rotated-14>` * :doc:`ROIPooling <../operation-specs/detection/roi-pooling-1>` * :doc:`Roll <../operation-specs/movement/roll-7>` * :doc:`Round <../operation-specs/arithmetic/round-5>` diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst index 250ef955bb41a8..a39de0b72d5a8e 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst @@ -188,6 +188,7 @@ Operation Specifications RNNSequence-5 ROIAlign-3 ROIAlign-9 + ROIAlignRotated-14 ROIPooling-1 Roll-7 Round-5 diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/detection/roi-align-rotated-14.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/detection/roi-align-rotated-14.rst new file mode 100644 index 00000000000000..7ec8acdd2238b6 --- /dev/null +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/detection/roi-align-rotated-14.rst @@ -0,0 +1,123 @@ +.. {#openvino_docs_ops_detection_ROIAlignRotated_14} +ROIAlignRotated +=============== + + +.. meta:: + :description: Learn about ROIAlignRotated-14 - an object detection operation, + which can be performed on three required input tensors. + + +**Versioned name**: *ROIAlignRotated-14* + +**Category**: *Object detection* + +**Short description**: *ROIAlignRotated* is a *pooling layer* used over feature maps of non-uniform input sizes and outputs a feature map of a fixed size. + +**Detailed description**: `Reference `__. + +*ROIAlignRotated* performs the following for each Region of Interest (ROI) for each input feature map: + +1. Multiply ROI box coordinates with *spatial_scale* to produce box coordinates relative to the input feature map size. +2. Rotate ROI box according to given angle in radians and *clockwise_mode*. +3. Divide the box into equal bins. One bin is mapped to single output feature map element. +4. Inside every bin, calculate regularly spaced sample points, according to the *sampling_ratio* attribute. +5. To calculate the value of single sample point, calculate further 4 points around each sample point to apply bilinear interpolation. +6. Calculate the average of all sample points in the bin to produce output feature map element. + +The 4 points used for bilinear interpolation are calculated as the closest integer coordinates to the sample point. +As an example, if the sample point is [2.14, 3.56], then the 4 integer points are [2, 3], [2, 4], [3, 3], [3, 4]. + +Each ROI box's center is shifted by [-0.5, -0.5] before pooling to achive better alignment with the closest integer coordinates used for bilinear filtering. + +**Attributes** + +* *pooled_h* + + * **Description**: *pooled_h* is the height of the ROI output feature map. + * **Range of values**: a positive integer + * **Type**: ``int`` + * **Required**: *yes* + +* *pooled_w* + + * **Description**: *pooled_w* is the width of the ROI output feature map. + * **Range of values**: a positive integer + * **Type**: ``int`` + * **Required**: *yes* + +* *sampling_ratio* + + * **Description**: *sampling_ratio* describes the number of sampling points bins over height and width to use to calculate each output feature map element. If the value is greater than 0, then ``bin_points_h = sampling_ratio`` and ``bin_points_w = sampling_ratio``. If the value is equal to 0 then adaptive number of elements over height and width is used: ``bin_points_h = ceil(roi_height / pooled_h)`` and ``bin_points_w = ceil(roi_width / pooled_w)`` respectively. The total number of sampling points for a single bin is equal to ``bin_points_w * bin_points_h``. + * **Range of values**: a non-negative integer + * **Type**: ``int`` + * **Required**: *yes* + +* *spatial_scale* + + * **Description**: *spatial_scale* is a multiplicative spatial scale factor to that is applied to the ROI box(height, weight and center vector) before pooling. + WARNING! + Spatial scale is also applied to the center point of the ROI box. It means that scaling does not only change the size of the ROI box, but also its position. + For example, if the spatial scale is 2.0, ROI box center is [0.5, 0.5], box width is 1.0 and box height is 1.0, then after scaling the ROI box center will be [1.0, 1.0], box width will be 2.0 and box height will be 2.0. + * **Range of values**: a positive floating-point number + * **Type**: ``float`` + * **Required**: *yes* + +* *clockwise_mode* + + * **Description**: If True, the angle for each ROI represents a clockwise rotation, otherwise - counterclockwise rotation. + * **Type**: ``bool`` + * **Default value**: False + * **Required**: *no* + +**Inputs**: + +* **1**: 4D input tensor of shape ``[N, C, H, W]`` with feature maps of type *T*. **Required.** + +* **2**: 2D input tensor of shape ``[NUM_ROIS, 5]`` describing ROI box consisting of 5 element tuples: ``[center_x, center_y, width, height, angle]`` in relative coordinates of type *T*. The angle is always in radians. + * **Required.** + +* **3**: 1D input tensor of shape ``[NUM_ROIS]`` with batch indices of type *IND_T*. **Required.** + +**Outputs**: + +* **1**: 4D output tensor of shape ``[NUM_ROIS, C, pooled_h, pooled_w]`` with feature maps of type *T*. + +**Types** + +* *T*: any supported floating-point type. + +* *IND_T*: any supported integer type. + + +**Example** + +.. code-block:: xml + :force: + + + + + + 7 + 256 + 200 + 200 + + + 1000 + 5 + + + 1000 + + + + + 1000 + 256 + 6 + 6 + + + diff --git a/docs/home.rst b/docs/home.rst index 6812ee27f3f804..adba092ae44892 100644 --- a/docs/home.rst +++ b/docs/home.rst @@ -1,131 +1,178 @@ ============================ -OpenVINO 2024 +OpenVINO 2024.0 ============================ .. meta:: :google-site-verification: _YqumYQ98cmXUTwtzM_0WIIadtDc6r_TMYGbmGgNvrk + +**OpenVINO is an open-source toolkit** for optimizing and deploying deep learning models from cloud +to edge. It accelerates deep learning inference across various use cases, such as generative AI, video, +audio, and language with models from popular frameworks like PyTorch, TensorFlow, ONNX, and more. +Convert and optimize models, and deploy across a mix of Intel® hardware and environments, on-premises +and on-device, in the browser or in the cloud. -.. raw:: html - - - +Check out the `OpenVINO Cheat Sheet. `__ .. container:: :name: ov-homepage-banner - OpenVINO 2024.0 - .. raw:: html +
    -
  • An open-source toolkit for optimizing and deploying deep learning models.
    Boost your AI deep-learning inference performance!
  • - -
  • Better OpenVINO integration with PyTorch!
    Use PyTorch models directly, without converting them first.
    - Learn more... +
  • +

    An open-source toolkit for optimizing and deploying deep learning models.

    +

    Boost your AI deep-learning inference performance!

    + Learn more
  • -
  • OpenVINO via PyTorch 2.0 torch.compile()
    Use OpenVINO directly in PyTorch-native applications!
    - Learn more... +
  • +

    Better OpenVINO integration with PyTorch!

    +

    Use PyTorch models directly, without converting them first.

    + Learn more
  • -
  • Do you like Generative AI? You will love how it performs with OpenVINO!
    - Check out our new notebooks... -
+
  • +

    OpenVINO via PyTorch 2.0 torch.compile()

    +

    Use OpenVINO directly in PyTorch-native applications!

    + Learn more +
  • +
  • +

    Do you like Generative AI?

    +

    You will love how it performs with OpenVINO!

    + Check out our new notebooks +
  • +
  • +

    Boost your AI deep learning interface perfmormance.

    +

    Use Intel's open-source OpenVino toolkit for optimizing and deploying deep learning models.

    + Learn more +
  • +
    - .. button-ref:: get-started - :ref-type: doc - :class: ov-homepage-banner-btn - :color: primary - :outline: +| +| - Get started +.. image:: _static/images/openvino-overview-diagram.jpg + :align: center + :alt: openvino diagram -.. rst-class:: openvino-diagram - - .. image:: _static/images/ov_homepage_diagram.png - :align: center +| +Places to Begin +++++++++++++++++++++++++++++ .. grid:: 2 2 3 3 :class-container: ov-homepage-higlight-grid - .. grid-item-card:: Performance Benchmarks - :link: about-openvino/performance-benchmarks - :link-alt: performance benchmarks - :link-type: doc - - See latest benchmark numbers for OpenVINO and OpenVINO Model Server - - .. grid-item-card:: Work with Multiple Model Formats - :link: openvino-workflow/model-preparation - :link-alt: Supported Model Formats - :link-type: doc - - OpenVINO supports different model formats: PyTorch, TensorFlow, TensorFlow Lite, ONNX, and PaddlePaddle. - - .. grid-item-card:: Deploy at Scale with OpenVINO Model Server - :link: ovms_what_is_openvino_model_server - :link-alt: model server - :link-type: doc - - Cloud-ready deployments for microservice applications - - .. grid-item-card:: Optimize Models - :link: openvino-workflow/model-optimization - :link-alt: model optimization - :link-type: doc - - Boost performance using quantization and compression with NNCF + .. grid-item-card:: Installation + :img-top: ./_static/images/home_begin_tile_01.png + :class-card: homepage_begin_tile + + This guide introduces installation and learning materials for Intel® Distribution of OpenVINO™ toolkit. + + .. button-link:: get-started/install-openvino.html + :color: primary + :outline: - .. grid-item-card:: Use OpenVINO with PyTorch Apps with torch.compile() - :link: openvino-workflow/torch-compile - :link-alt: torch.compile - :link-type: doc + Get Started - Optimize generation of the graph model with PyTorch 2.0 torch.compile() backend - - .. grid-item-card:: Optimize and Deploy Generative AI - :link: learn-openvino/llm_inference_guide - :link-alt: gen ai - :link-type: doc - - Enhance the efficiency of Generative AI - - -Feature Overview -############################## - -.. grid:: 1 2 2 2 - :class-container: ov-homepage-feature-grid - - .. grid-item-card:: Local Inference & Model Serving - - You can either link directly with OpenVINO Runtime to run inference locally or use OpenVINO Model Server - to serve model inference from a separate server or within Kubernetes environment - - .. grid-item-card:: Improved Application Portability - - Write an application once, deploy it anywhere, achieving maximum performance from hardware. Automatic device - discovery allows for superior deployment flexibility. OpenVINO Runtime supports Linux, Windows and MacOS and - provides Python, C++ and C API. Use your preferred language and OS. - - .. grid-item-card:: Minimal External Dependencies - - Designed with minimal external dependencies reduces the application footprint, simplifying installation and - dependency management. Popular package managers enable application dependencies to be easily installed and - upgraded. Custom compilation for your specific model(s) further reduces final binary size. + .. grid-item-card:: Performance Benchmarks + :img-top: ./_static/images/home_begin_tile_02.png + :class-card: homepage_begin_tile + + See latest benchmark numbers for OpenVINO and OpenVINO Model Server. + + .. button-link:: about-openvino/performance-benchmarks.html + :color: primary + :outline: + + View data + + .. grid-item-card:: Framework Compatibility + :img-top: ./_static/images/home_begin_tile_03.png + :class-card: homepage_begin_tile + + Load models directly (for TensorFlow, ONNX, PaddlePaddle) or convert to OpenVINO format. + + .. button-link:: openvino-workflow/model-preparation.html + :color: primary + :outline: + + Load your model + + .. grid-item-card:: Easy Deployment + :img-top: ./_static/images/home_begin_tile_04.png + :class-card: homepage_begin_tile + + Get started in just a few lines of code. + + .. button-link:: openvino-workflow/running-inference.html + :color: primary + :outline: + + Run Inference + + .. grid-item-card:: Serving at scale + :img-top: ./_static/images/home_begin_tile_05.png + :class-card: homepage_begin_tile + + Cloud-ready deployments for microservice applications. + + .. button-link:: openvino-workflow/running-inference.html + :color: primary + :outline: + + Try it out + + .. grid-item-card:: Model Compression + :img-top: ./_static/images/home_begin_tile_06.png + :class-card: homepage_begin_tile + + Reach for performance with post-training and training-time compression with NNCF. + + .. button-link:: openvino-workflow/model-optimization.html + :color: primary + :outline: + + Optimize now + +| + +Key Features +++++++++++++++++++++++++++++ + + +.. grid:: 2 2 2 2 + :class-container: homepage_begin_container + + .. grid-item-card:: Model Compression + :img-top: ./_static/images/home_key_feature_01.png + :class-card: homepage_begin_key + + You can either link directly with OpenVINO Runtime to run inference locally or use OpenVINO Model Server to serve model inference from a separate server or within Kubernetes environment. + + .. grid-item-card:: Fast & Scalable Deployment + :img-top: ./_static/images/home_key_feature_02.png + :class-card: homepage_begin_key + + Write an application once, deploy it anywhere, achieving maximum performance from hardware. Automatic device discovery allows for superior deployment flexibility. OpenVINO Runtime supports Linux, Windows and MacOS and provides Python, C++ and C API. Use your preferred language and OS. + + .. grid-item-card:: Lighter Deployment + :img-top: ./_static/images/home_key_feature_03.png + :class-card: homepage_begin_key + + Designed with minimal external dependencies reduces the application footprint, simplifying installation and dependency management. Popular package managers enable application dependencies to be easily installed and upgraded. Custom compilation for your specific model(s) further reduces final binary size. .. grid-item-card:: Enhanced App Start-Up Time - - In applications where fast start-up is required, OpenVINO significantly reduces first-inference latency by using the - CPU for initial inference and then switching to another device once the model has been compiled and loaded to memory. - Compiled models are cached improving start-up time even more. - + :img-top: ./_static/images/home_key_feature_04.png + :class-card: homepage_begin_key + + In applications where fast start-up is required, OpenVINO significantly reduces first-inference latency by using the CPU for initial inference and then switching to another device once the model has been compiled and loaded to memory. Compiled models are cached, improving start-up time even more. .. toctree:: diff --git a/docs/sphinx_setup/_static/css/custom.css b/docs/sphinx_setup/_static/css/custom.css index 7234909a2df28a..a919e9902c2daa 100644 --- a/docs/sphinx_setup/_static/css/custom.css +++ b/docs/sphinx_setup/_static/css/custom.css @@ -1164,9 +1164,3 @@ input:-webkit-autofill { -webkit-box-shadow: 0 0 0px 1000px white inset; } - -/* Splide carousel */ -.splide__slide { - margin-right: 2rem; - overflow: hidden; -} diff --git a/docs/sphinx_setup/_static/css/homepage_style.css b/docs/sphinx_setup/_static/css/homepage_style.css index e505be4088e517..395bbd27dddc6a 100644 --- a/docs/sphinx_setup/_static/css/homepage_style.css +++ b/docs/sphinx_setup/_static/css/homepage_style.css @@ -1,25 +1,74 @@ -/* overrides */ -.switcher-set, .prev-next-bottom, .bd-toc {display: none!important;} -#openvino-documentation > h1 { - display: none; +.bd-toc { + display: none !important; } h1 { - /*font-size: var(--pst-font-size-h2);*/ - /*margin-bottom: 3rem;*/ - display: none!important; + font-size: 60px !important; } +#ov-homepage-banner { + border-bottom: 0px; +} + +.ov-homepage-label { + font-size: 14px; + font-weight: bold; +} + +.ov-homepage-slide-title { + color: white !important; + font-size: 27px !important; + font-weight: lighter !important; +} + +.ov-homepage-slide-subtitle { + color: white !important; + font-size: 18px !important; + font-weight: lighter !important; +} + +.splide__pagination { + bottom: .8em !important; +} + +#ov-homepage-banner #splide01-slide01 { + background-image: linear-gradient(350deg, #004CA9 0%, #381965 50%, #070862 100%); + padding: 32px 48px !important; +} + +#ov-homepage-banner #splide01-slide02 { + background-image: linear-gradient(270deg, #034CAA 00%, #4B9D77 50%, #034CAA 100%); + padding: 32px 48px !important; +} -#ov-homepage-banner, .openvino-diagram, .ov-homepage-higlight-grid { - margin-bottom: 90px!important; +#ov-homepage-banner #splide01-slide03 { + background-image: linear-gradient(230deg, #030B5E 0%, #285455 40%, #030B5E 100%); + padding: 32px 48px !important; +} + +#ov-homepage-banner #splide01-slide04 { + background-image: linear-gradient(110deg, #214DA4 0%, #03aadd 100%); + padding: 32px 48px !important; +} + +#ov-homepage-banner #splide01-slide05 { + background-image: linear-gradient(350deg, #034CAA 20%, #034CAA 30%, #4B9D77 100%); + padding: 32px 48px !important; } #ov-homepage-banner { - padding: 2rem; - background-color: #76CEFF; - background-image: linear-gradient(346deg, #728EFA 0%, #76CEFF 50%, #BBE8BD 100%); - border-bottom: 5px solid #0068b5; + p { + margin: 0.4rem 0 1.2rem 0; + } +} +.splide__pagination>li { + padding-right: 6px; +} +.is-active{ + color: white !important; +} +.homepage-begin-container { + padding: 0px; } #ov-homepage-banner p:first-of-type { @@ -37,100 +86,142 @@ h1 { #ov-homepage-banner .line-block { line-height: 1.5; text-align: left; - color: #000000; + color: white; } .ov-homepage-banner-btn { - transition: 0.7s; - font-weight: bold; - background-color: #0068b5; + font-size: 12px !important; + font-weight: bold !important; color: #ffffff !important; + border: 1px solid white; + padding: 10px 18px !important; } .ov-homepage-banner-btn:hover { - background-color: white!important; - color: var(--sd-color-primary)!important; + background-color: white !important; + color: var(--sd-color-primary) !important; } -#ov-homepage-banner > p:nth-child(3) { +#ov-homepage-banner>p:nth-child(3) { margin-bottom: 0; } #ov-homepage-banner a, #ov-homepage-banner a:visited { text-decoration: none; - color: #00A3F6; + color: white; transition: .7s; font-weight: 600; } #ov-homepage-banner a:hover { - color: #653171; + color: white; +} + +#homepage_key_container { + display: flex; + flex-direction: row; + flex-wrap: wrap; + justify-content: space-between; + padding: 0px !important; +} + +.homepage-begin-tile { + .sd-card-img-top { + width: 80px !important; + padding: 10px; + } +} + +.homepage-begin-tile:hover{ + box-shadow: none !important; +} + +.sd-card-img-top { + border-radius: 0; } -.openvino-diagram { - width: 65%; - margin-bottom: 3rem; +.sd-card-text { + font-size: 0.9rem !important; } -@media (max-width: 720px) { - .openvino-diagram { - width: 90%; +.homepage-begin-key { + height: 450px; + border: 0 !important; + .sd-card-body { + padding: 0% + } + .sd-card-img-top { + padding: 0px; + padding-bottom: 15px; } } +.sd-btn-outline-primary { + color: #0054AE !important; + text-decoration: none !important; + background-color: #FFF !important; + border-radius: 0; + position: absolute; + bottom: 20px; +} + +.homepage-begin-tile { + border-radius: 0; + margin-bottom: 5px; + position: relative; + border-width: 0.2cqb; +} + +.sd-shadow-sm { + box-shadow: none !important; +} + +.homepage-begin-tile:hover { + border-color: #0054AE; +} + +.sd-btn-outline-primary:hover { + border-color: #0054AE !important; + background-color: #0054AE !important; +} + +.sd-btn-outline-primary:hover a { + color: #fff !important; + background-color: #0054AE !important; +} + .ov-homepage-higlight-grid { padding: 0; } -.ov-homepage-higlight-grid > div { - justify-content:space-evenly; - row-gap: 20px; +.ov-homepage-higlight-grid>div { + justify-content: space-evenly; + row-gap: 10px; } -.ov-homepage-higlight-grid > div > div.sd-col { - width: 230px; +.ov-homepage-higlight-grid>div>div.sd-col { + width: 280px; min-height: 300px; padding: 0; - margin-inline: 5px; + margin-inline: 0px; } -.ov-homepage-higlight-grid .sd-card { - box-shadow: 0 0 20px 5px #f3f3f3!important; - transition: 0.5s; - overflow: hidden; -} .ov-homepage-higlight-grid .sd-card-hover:hover { - border-color: var(--sd-color-card-border)!important; - transform: scale(1.00)!important; -} - -.ov-homepage-higlight-grid .sd-shadow-sm:hover { - box-shadow: 0 0 10px 2px rgba(108,36,240,0.3) !important; + border-color: var(--sd-color-card-border) !important; + transform: scale(1.00) !important; + box-shadow: none !important; } .ov-homepage-higlight-grid .sd-card-title { - height: 52.781px; + height: 10px; margin-bottom: 2rem; } .ov-homepage-higlight-grid .sd-card-text { - font-size: 0.9rem; -} - -.ov-homepage-higlight-grid .sd-card::after { - align-self: flex-end; - display: block; - content: "LEARN MORE"; - width: 100%; font-size: 0.8rem; - text-align: center; - padding-top: 0.8rem; - font-weight: 600; - color: #00A3F6; - height: 3rem; - background-color: #CDEDFF; + height: 60px; } .ov-homepage-feature-grid .sd-col { @@ -140,8 +231,7 @@ h1 { .ov-homepage-feature-grid .sd-card { border: none; - box-shadow: 0 0 20px 2px #f3f3f3!important; - /* box-shadow: none!important; */ + box-shadow: none!important; } .ov-homepage-feature-grid .sd-row { @@ -149,25 +239,13 @@ h1 { justify-content: center; } - -/* =================================================================== */ -/* @media screen and (min-width: 720px) { - main.col-xl-7.bd-content { - flex: 0 0 75%!important; - max-width: 75%!important; - } -}*/ - @media screen and (max-width: 535px) { .ov-homepage-feature-grid .sd-row { flex-direction: column; align-items: center; } + .ov-homepage-feature-grid .sd-col { max-width: 100%; } -} - -.sd-row { - --sd-gutter-x: 0rem!important; -} +} \ No newline at end of file diff --git a/docs/sphinx_setup/_static/images/home_begin_tile_01.png b/docs/sphinx_setup/_static/images/home_begin_tile_01.png new file mode 100644 index 00000000000000..51a1c2ad044a94 --- /dev/null +++ b/docs/sphinx_setup/_static/images/home_begin_tile_01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2851ad88d3765f49d09713bcc66c29eaa9e09d39075205fbe684de23f85483da +size 2330 diff --git a/docs/sphinx_setup/_static/images/home_begin_tile_02.png b/docs/sphinx_setup/_static/images/home_begin_tile_02.png new file mode 100644 index 00000000000000..4aa42fe9d952bf --- /dev/null +++ b/docs/sphinx_setup/_static/images/home_begin_tile_02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b45da5ea223dab074b0b54641049b2f4a197a3e6dfa09fbad7a4ebbea482d5a0 +size 1230 diff --git a/docs/sphinx_setup/_static/images/home_begin_tile_03.png b/docs/sphinx_setup/_static/images/home_begin_tile_03.png new file mode 100644 index 00000000000000..7b4c45d6048ef5 --- /dev/null +++ b/docs/sphinx_setup/_static/images/home_begin_tile_03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcb537eab14c57db2fb3a682acf1628272b75d36ab59d4e31b0e8843d03f76fd +size 3414 diff --git a/docs/sphinx_setup/_static/images/home_begin_tile_04.png b/docs/sphinx_setup/_static/images/home_begin_tile_04.png new file mode 100644 index 00000000000000..9725419c8fa2dc --- /dev/null +++ b/docs/sphinx_setup/_static/images/home_begin_tile_04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c81a77030d7925e2e3a274e093cac2360dd7f2a59c7db4ccf040937fb7bb2002 +size 1635 diff --git a/docs/sphinx_setup/_static/images/home_begin_tile_05.png b/docs/sphinx_setup/_static/images/home_begin_tile_05.png new file mode 100644 index 00000000000000..da5f856b0dbbc8 --- /dev/null +++ b/docs/sphinx_setup/_static/images/home_begin_tile_05.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:886486c7771c60ef84e92aeb134776f1660a7ad2517fe47df25448bf42fb8e40 +size 2545 diff --git a/docs/sphinx_setup/_static/images/home_begin_tile_06.png b/docs/sphinx_setup/_static/images/home_begin_tile_06.png new file mode 100644 index 00000000000000..932408ca0557e3 --- /dev/null +++ b/docs/sphinx_setup/_static/images/home_begin_tile_06.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:861fbbcb3c4439c77c03b6bf5b896748ff87c38b7f5dc086957ac3f7a3cb78ab +size 1481 diff --git a/docs/sphinx_setup/_static/images/home_key_feature_01.png b/docs/sphinx_setup/_static/images/home_key_feature_01.png new file mode 100644 index 00000000000000..2bd10f757a25d1 --- /dev/null +++ b/docs/sphinx_setup/_static/images/home_key_feature_01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a63ad11f17a8daea3e29214fac4112e2d7e5721bb4904347055ebc0cf9edaf7a +size 759281 diff --git a/docs/sphinx_setup/_static/images/home_key_feature_02.png b/docs/sphinx_setup/_static/images/home_key_feature_02.png new file mode 100644 index 00000000000000..0d7fdc9344fc61 --- /dev/null +++ b/docs/sphinx_setup/_static/images/home_key_feature_02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7639854861ec873b990d9f573e7b6f1a5d67dfcb1279f5e4fbd57f66504e4e0c +size 851859 diff --git a/docs/sphinx_setup/_static/images/home_key_feature_03.png b/docs/sphinx_setup/_static/images/home_key_feature_03.png new file mode 100644 index 00000000000000..bb4bb0f0ce1ec6 --- /dev/null +++ b/docs/sphinx_setup/_static/images/home_key_feature_03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:088ff078d8a4238caac7f69920b55d7c6dd6f9775c934237256205237d14a8ef +size 507372 diff --git a/docs/sphinx_setup/_static/images/home_key_feature_04.png b/docs/sphinx_setup/_static/images/home_key_feature_04.png new file mode 100644 index 00000000000000..c4b0cc56dad2a3 --- /dev/null +++ b/docs/sphinx_setup/_static/images/home_key_feature_04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4308b7ca223434b058eac2ca1a6325fb2ac332810147e0d00ec34dc1d9cccd4 +size 447695 diff --git a/docs/sphinx_setup/_static/images/openvino-overview-diagram.jpg b/docs/sphinx_setup/_static/images/openvino-overview-diagram.jpg new file mode 100644 index 00000000000000..bfd3c6533446f3 --- /dev/null +++ b/docs/sphinx_setup/_static/images/openvino-overview-diagram.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:739d604dc4b8bae082e9c70e24328bcf9c30fa3fe5b1f884b9bd129509302b4e +size 1465073 diff --git a/docs/sphinx_setup/_static/images/ov_homepage_diagram.png b/docs/sphinx_setup/_static/images/ov_homepage_diagram.png deleted file mode 100644 index 92eca6aecebf01..00000000000000 --- a/docs/sphinx_setup/_static/images/ov_homepage_diagram.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1a48358ec0e4e256d9e7ec45dfdfe7ecf0e33f1395d67905ef2b6f659c197d76 -size 132735 diff --git a/docs/sphinx_setup/_static/js/custom.js b/docs/sphinx_setup/_static/js/custom.js index 4ad51222bd1da9..404332dce91173 100644 --- a/docs/sphinx_setup/_static/js/custom.js +++ b/docs/sphinx_setup/_static/js/custom.js @@ -257,18 +257,17 @@ function addFooter() { } function initSplide() { - const slides = $('.splide__slide'); - const height = (slides.length > 4) ? 96 + ((slides.length - 4) * 16) : 96 + var splide = new Splide('.splide', { - direction : 'ttb', - type : 'loop', - height : `${height}px`, + type : 'fade', + autoHeight : true, perPage : 1, autoplay : true, arrows : false, waitForTransition : true, wheel : true, wheelSleep : 250, + interval : 3000, }); splide.mount(); } diff --git a/docs/sphinx_setup/_static/js/graphs_ov_tf.js b/docs/sphinx_setup/_static/js/graphs_ov_tf.js deleted file mode 100644 index bf16e9dacc5fe6..00000000000000 --- a/docs/sphinx_setup/_static/js/graphs_ov_tf.js +++ /dev/null @@ -1,109 +0,0 @@ -$(document).ready(function () { - var chartBlock = $('.chart-block-tf-ov'); - chartBlock.each(function () { - var url = $(this).data('loadcsv'); - Papa.parse(url, { - download: true, - complete: renderData($(this)) - }) - }); - - function getLabels(data) { - return data - .map((item) => item[1]); - } - - function getChartOptions(title, displayLabels) { - return { - responsive: false, - maintainAspectRatio: false, - legend: { display: true, position: 'bottom' }, - title: { - display: true, - text: title - }, - scales: { - xAxes: [{ - ticks: { - beginAtZero: true - } - }], - yAxes: [{ - ticks: { - display: displayLabels, //this will remove only the label - beginAtZero: true - } - }] - }, - plugins: { - datalabels: { - color: "#4A4A4A", - anchor: "end", - align: "end", - clamp: false, - offset: 0, - display: true, - font: { - size: 8, - family: 'Roboto' - } - } - } - } - } - - function getChartData(data) { - function getDataset(data, col, label, color) { - return { - label: label, - data: data.map(function (item) { - return item[col] - }), - backgroundColor: color, - borderColor: 'rgba(170,170,170,0)', - barThickness: 12 - } - } - return { - labels: getLabels(data), - datasets: [getDataset(data, 2, 'openvino', '#00C7FD'), getDataset(data, 3, 'TF', '#8F5DA2')] - }; - } - - function renderData(currentChart) { - return function (result) { - var data = result.data; - // remove col names - data.shift(0); - var chartName = data[1][0]; - var chartSlug = chartName.replace(')', '').replace(' (', '-'); - var graphContainer = $('
    '); - var chartContainer = $('
    '); - graphContainer.attr('id', 'ov-graph-container-' + chartSlug); - chartContainer.addClass('chart-container'); - chartContainer.addClass('container'); - var chartWrap = $('
    '); - chartWrap.addClass('chart-wrap'); - chartWrap.addClass('container'); - chartContainer.append(chartWrap); - var chart = $('
    '); - chart.addClass('chart'); - chart.addClass('col-md-12'); - var canvas = $(''); - chart.append(canvas); - var container = $('
    '); - container.addClass('row'); - container.append(chart); - var context = canvas.get(0).getContext('2d'); - context.canvas.width = context.canvas.width * 2.5; - var chartTitle = chartName + ', Throughput (FPS) Precision: FP32 (Higher is better)'; - new Chart(context, { - type: 'horizontalBar', - data: getChartData(data), - options: getChartOptions(chartTitle, true) - }); - chartContainer.append(container); - currentChart.append(chartContainer); - } - } -}); diff --git a/docs/sphinx_setup/conf.py b/docs/sphinx_setup/conf.py index ddb70c4d2a672b..b4677d3e5a1eb2 100644 --- a/docs/sphinx_setup/conf.py +++ b/docs/sphinx_setup/conf.py @@ -50,6 +50,8 @@ } myst_enable_extensions = ["colon_fence"] +myst_heading_anchors = 4 +suppress_warnings = ['misc.highlighting_failure'] source_suffix = { '.rst': 'restructuredtext', diff --git a/src/bindings/js/node/CMakeLists.txt b/src/bindings/js/node/CMakeLists.txt index 90ce599bdac7c8..47686902b1620f 100644 --- a/src/bindings/js/node/CMakeLists.txt +++ b/src/bindings/js/node/CMakeLists.txt @@ -57,6 +57,7 @@ add_library(${PROJECT_NAME} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/src/preprocess/resize_algorithm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/errors.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/helper.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/type_validation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/tensor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/infer_request.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/compiled_model.cpp @@ -102,6 +103,8 @@ ov_set_install_rpath(${PROJECT_NAME} ov_add_clang_format_target(${PROJECT_NAME}_clang FOR_TARGETS ${PROJECT_NAME}) +ov_cpack_add_component(${OV_CPACK_COMP_NPM} HIDDEN) + install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${OV_CPACK_RUNTIMEDIR} COMPONENT ${PROJECT_NAME} ${OV_CPACK_COMP_NPM_EXCLUDE_ALL} RUNTIME DESTINATION ${OV_CPACK_RUNTIMEDIR} COMPONENT ${PROJECT_NAME} ${OV_CPACK_COMP_NPM_EXCLUDE_ALL} diff --git a/src/bindings/js/node/include/core_wrap.hpp b/src/bindings/js/node/include/core_wrap.hpp index f973a5fa751925..387dcfe1720d9d 100644 --- a/src/bindings/js/node/include/core_wrap.hpp +++ b/src/bindings/js/node/include/core_wrap.hpp @@ -93,6 +93,9 @@ class CoreWrap : public Napi::ObjectWrap { /** @brief Returns devices available for inference. */ Napi::Value get_available_devices(const Napi::CallbackInfo& info); + /** @brief Returns versions of the specified device. */ + Napi::Value get_versions(const Napi::CallbackInfo& info); + private: ov::Core _core; }; diff --git a/src/bindings/js/node/include/helper.hpp b/src/bindings/js/node/include/helper.hpp index cce376bc2c9c88..ffd944b3589033 100644 --- a/src/bindings/js/node/include/helper.hpp +++ b/src/bindings/js/node/include/helper.hpp @@ -140,6 +140,9 @@ ov::Tensor get_request_tensor(ov::InferRequest& infer_request, const size_t idx) /** @brief Creates ov::tensor from TensorWrap Object */ ov::Tensor cast_to_tensor(const Napi::Value& value); +/** @brief Creates ov::tensor from Napi::CallbackInfo value at specified index. */ +ov::Tensor cast_to_tensor(const Napi::CallbackInfo& info, int index); + /** @brief Creates ov::tensor from TypedArray using given shape and element type*/ ov::Tensor cast_to_tensor(const Napi::TypedArray& data, const ov::Shape& shape, const ov::element::Type_t& type); diff --git a/src/bindings/js/node/include/type_validation.hpp b/src/bindings/js/node/include/type_validation.hpp new file mode 100644 index 00000000000000..2e699f1f96b8ad --- /dev/null +++ b/src/bindings/js/node/include/type_validation.hpp @@ -0,0 +1,11 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once +#include + +#include "node/include/addon.hpp" +#include "openvino/openvino.hpp" + +/** @brief Checks if Napi::Value is a TensorWrap.*/ +bool is_tensor(const Napi::Env& env, const Napi::Value& value); diff --git a/src/bindings/js/node/lib/addon.ts b/src/bindings/js/node/lib/addon.ts index a5ed1598616a38..565839a57b0dab 100644 --- a/src/bindings/js/node/lib/addon.ts +++ b/src/bindings/js/node/lib/addon.ts @@ -38,6 +38,12 @@ interface Core { readModelSync(modelBuffer: Uint8Array, weightsBuffer?: Uint8Array): Model; importModelSync(modelStream: Buffer, device: string): CompiledModel; getAvailableDevices(): string[]; + getVersions(deviceName: string): { + [deviceName: string]: { + buildNumber: string, + description: string, + }, + }; setProperty(props: { [key: string]: string | number | boolean }): void; setProperty( deviceName: string, diff --git a/src/bindings/js/node/src/core_wrap.cpp b/src/bindings/js/node/src/core_wrap.cpp index 17c14d037990f2..442d9c3c79beee 100644 --- a/src/bindings/js/node/src/core_wrap.cpp +++ b/src/bindings/js/node/src/core_wrap.cpp @@ -61,6 +61,7 @@ Napi::Function CoreWrap::get_class(Napi::Env env) { InstanceMethod("getAvailableDevices", &CoreWrap::get_available_devices), InstanceMethod("importModelSync", &CoreWrap::import_model), InstanceMethod("getAvailableDevices", &CoreWrap::get_available_devices), + InstanceMethod("getVersions", &CoreWrap::get_versions), InstanceMethod("setProperty", &CoreWrap::set_property), InstanceMethod("getProperty", &CoreWrap::get_property)}); } @@ -273,6 +274,31 @@ Napi::Value CoreWrap::get_available_devices(const Napi::CallbackInfo& info) { return js_devices; } +Napi::Value CoreWrap::get_versions(const Napi::CallbackInfo& info) { + if (info.Length() == 0) { + reportError(info.Env(), "getVersions() method expects 1 argument of string type."); + return info.Env().Undefined(); + } + auto device_arg = info[0]; + if (!device_arg.IsString()) { + reportError(info.Env(), "The argument in getVersions() method must be a string or convertible to a string."); + return info.Env().Undefined(); + } + const auto& devices_map = _core.get_versions(device_arg.ToString()); + Napi::Object versions_object = Napi::Object::New(info.Env()); + + for (const auto& dev : devices_map) { + Napi::Object device_properties = Napi::Object::New(info.Env()); + + device_properties.Set("buildNumber", Napi::String::New(info.Env(), dev.second.buildNumber)); + device_properties.Set("description", Napi::String::New(info.Env(), dev.second.description)); + + versions_object.Set(dev.first, device_properties); + } + + return versions_object; +} + Napi::Value CoreWrap::import_model(const Napi::CallbackInfo& info) { if (info.Length() != 2) { reportError(info.Env(), "Invalid number of arguments -> " + std::to_string(info.Length())); diff --git a/src/bindings/js/node/src/helper.cpp b/src/bindings/js/node/src/helper.cpp index 002580aac56b74..b00fbb033c8447 100644 --- a/src/bindings/js/node/src/helper.cpp +++ b/src/bindings/js/node/src/helper.cpp @@ -4,6 +4,7 @@ #include "node/include/helper.hpp" #include "node/include/tensor.hpp" +#include "node/include/type_validation.hpp" const std::vector& get_supported_types() { static const std::vector supported_element_types = @@ -303,6 +304,14 @@ ov::Tensor cast_to_tensor(const Napi::Value& value) { } } +ov::Tensor cast_to_tensor(const Napi::CallbackInfo& info, int index) { + if (!is_tensor(info.Env(), info[index])) { + OPENVINO_THROW(std::string("Argument #" + std::to_string(index) + " must be a Tensor.")); + } + const auto tensor_wrap = Napi::ObjectWrap::Unwrap(info[index].ToObject()); + return tensor_wrap->get_tensor(); +} + ov::Tensor cast_to_tensor(const Napi::TypedArray& typed_array, const ov::Shape& shape, const ov::element::Type_t& type) { diff --git a/src/bindings/js/node/src/infer_request.cpp b/src/bindings/js/node/src/infer_request.cpp index 302548e691d7e5..a1bc3a5daa8b37 100644 --- a/src/bindings/js/node/src/infer_request.cpp +++ b/src/bindings/js/node/src/infer_request.cpp @@ -53,39 +53,45 @@ Napi::Object InferRequestWrap::wrap(Napi::Env env, ov::InferRequest infer_reques } void InferRequestWrap::set_tensor(const Napi::CallbackInfo& info) { - if (info.Length() != 2 || !info[0].IsString() || !info[1].IsObject()) { - reportError(info.Env(), "InferRequest.setTensor() invalid argument."); - } else { - std::string name = info[0].ToString(); - auto tensorWrap = Napi::ObjectWrap::Unwrap(info[1].ToObject()); - _infer_request.set_tensor(name, tensorWrap->get_tensor()); + try { + if (info.Length() != 2 || !info[0].IsString() || !info[1].IsObject()) { + OPENVINO_THROW(std::string("InferRequest.setTensor() invalid argument.")); + } else { + const std::string& name = info[0].ToString(); + _infer_request.set_tensor(name, cast_to_tensor(info, 1)); + } + } catch (std::exception& e) { + reportError(info.Env(), e.what()); } } void InferRequestWrap::set_input_tensor(const Napi::CallbackInfo& info) { - if (info.Length() == 1 && info[0].IsObject()) { - auto tensorWrap = Napi::ObjectWrap::Unwrap(info[0].ToObject()); - _infer_request.set_input_tensor(tensorWrap->get_tensor()); - } else if (info.Length() == 2 && info[0].IsNumber() && info[1].IsObject()) { - auto idx = info[0].ToNumber().Int32Value(); - auto tensorWrap = Napi::ObjectWrap::Unwrap(info[1].ToObject()); - _infer_request.set_input_tensor(idx, tensorWrap->get_tensor()); - } else { - reportError(info.Env(), "InferRequest.setInputTensor() invalid argument."); + try { + if (info.Length() == 1 && info[0].IsObject()) { + _infer_request.set_input_tensor(cast_to_tensor(info, 0)); + } else if (info.Length() == 2 && info[0].IsNumber() && info[1].IsObject()) { + const auto idx = info[0].ToNumber().Int32Value(); + _infer_request.set_input_tensor(idx, cast_to_tensor(info, 1)); + } else { + OPENVINO_THROW(std::string("InferRequest.setInputTensor() invalid argument.")); + } + } catch (std::exception& e) { + reportError(info.Env(), e.what()); } } void InferRequestWrap::set_output_tensor(const Napi::CallbackInfo& info) { - if (info.Length() == 1) { - auto tensorWrap = Napi::ObjectWrap::Unwrap(info[0].ToObject()); - auto t = tensorWrap->get_tensor(); - _infer_request.set_output_tensor(t); - } else if (info.Length() == 2 && info[0].IsNumber() && info[1].IsObject()) { - auto idx = info[0].ToNumber().Int32Value(); - auto tensorWrap = Napi::ObjectWrap::Unwrap(info[1].ToObject()); - _infer_request.set_output_tensor(idx, tensorWrap->get_tensor()); - } else { - reportError(info.Env(), "InferRequest.setOutputTensor() invalid argument."); + try { + if (info.Length() == 1 && info[0].IsObject()) { + _infer_request.set_output_tensor(cast_to_tensor(info, 0)); + } else if (info.Length() == 2 && info[0].IsNumber() && info[1].IsObject()) { + const auto idx = info[0].ToNumber().Int32Value(); + _infer_request.set_output_tensor(idx, cast_to_tensor(info, 1)); + } else { + OPENVINO_THROW(std::string("InferRequest.setOutputTensor() invalid argument.")); + } + } catch (std::exception& e) { + reportError(info.Env(), e.what()); } } diff --git a/src/bindings/js/node/src/type_validation.cpp b/src/bindings/js/node/src/type_validation.cpp new file mode 100644 index 00000000000000..08c6c0787130ee --- /dev/null +++ b/src/bindings/js/node/src/type_validation.cpp @@ -0,0 +1,9 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "node/include/type_validation.hpp" + +bool is_tensor(const Napi::Env& env, const Napi::Value& value) { + const auto& prototype = env.GetInstanceData()->tensor; + return value.ToObject().InstanceOf(prototype.Value().As()); +} diff --git a/src/bindings/js/node/tests/basic.test.js b/src/bindings/js/node/tests/basic.test.js index 07e0502053edd1..1236bd9c553520 100644 --- a/src/bindings/js/node/tests/basic.test.js +++ b/src/bindings/js/node/tests/basic.test.js @@ -20,6 +20,33 @@ it('Core.getAvailableDevices()', () => { assert.ok(devices.includes('CPU')); }); +describe('Core.getVersions()', () => { + + it('getVersions(validDeviceName: string)', () => { + const deviceVersion = core.getVersions('CPU'); + assert.strictEqual(typeof deviceVersion, 'object'); + assert.strictEqual(typeof deviceVersion.CPU, 'object'); + assert.strictEqual(typeof deviceVersion.CPU.buildNumber, 'string'); + assert.strictEqual(typeof deviceVersion.CPU.description, 'string'); + }); + + it('getVersions() throws if no arguments are passed into the function', () => { + assert.throws( + () => core.getVersions(), + {message: 'getVersions() method expects 1 argument of string type.'} + ); + }); + + it('getVersions() throws if non string coercable arguments are passed into the function', () => { + assert.throws( + () => core.getVersions({ deviceName: 'CPU' }), + {message: 'The argument in getVersions() method must be a string or convertible to a string.'} + ); + }); + +}); + + it('CompiledModel type', () => { assert.ok(compiledModel instanceof ov.CompiledModel); }); diff --git a/src/bindings/js/node/tests/infer_request.test.js b/src/bindings/js/node/tests/infer_request.test.js index 98d7f4e809ffa6..27543233f573dc 100644 --- a/src/bindings/js/node/tests/infer_request.test.js +++ b/src/bindings/js/node/tests/infer_request.test.js @@ -63,6 +63,7 @@ describe('InferRequest', () => { ['string', 'Cannot create a tensor from the passed Napi::Value.'], [tensorData.slice(-10), 'Memory allocated using shape and element::type mismatch passed data\'s size'], [new Float32Array(buffer, 4), 'TypedArray.byteOffset has to be equal to zero.'], + [{}, /Invalid argument/], // Test for object that is not Tensor ]; inputMessagePairs.forEach( ([tl, msg]) => { @@ -114,22 +115,39 @@ describe('InferRequest', () => { assert.deepStrictEqual(tensor.data[0], t1.data[0]); }); + it('Test setInputTensor(object) throws when passed object is not a Tensor.', () => { + assert.throws( + () => inferRequest.setInputTensor({}), + {message: /Argument #[0-9]+ must be a Tensor./} + ); + }); + it('Test setInputTensor(idx, tensor)', () => { inferRequest.setInputTensor(0, tensor); const t1 = inferRequest.getInputTensor(); assert.deepStrictEqual(tensor.data[0], t1.data[0]); }); - it('Test setInputTensor() - pass two tensors', () => { + it('Test setInputTensor(idx, tensor) throws', () => { + const testIdx = 10; + assert.throws ( + () => inferRequest.setInputTensor(testIdx, tensor), + {message: /Input port for index [0-9]+ was not found!/} + ); + }); + + it('Test setInputTensor(idx, object) throws when passed object is not a Tensor.', () => { assert.throws( - () => inferRequest.setInputTensor(resTensor, tensor), - {message: 'InferRequest.setInputTensor() invalid argument.'}); + () => inferRequest.setInputTensor(0, {}), + {message: /Argument #[0-9]+ must be a Tensor./} + ); }); - it('Test setInputTensor() - pass number as a single arg', () => { + it('Test setInputTensor(tensor, tensor) throws', () => { assert.throws( - () => inferRequest.setInputTensor(123), - {message: 'InferRequest.setInputTensor() invalid argument.'}); + () => inferRequest.setInputTensor(resTensor, tensor), + {message: / invalid argument./} + ); }); it('Test setOutputTensor(tensor)', () => { @@ -138,16 +156,38 @@ describe('InferRequest', () => { assert.deepStrictEqual(resTensor.data[0], res2.data[0]); }); + it('Test setOutputTensor(object) throws when passed object is not a Tensor.', () => { + assert.throws( + () => inferRequest.setOutputTensor({}), + {message: /Argument #[0-9]+ must be a Tensor./} + ); + }); + + it('Test setOutputTensor(idx, tensor) throws', () => { + const testIdx = 10; + assert.throws ( + () => inferRequest.setOutputTensor(testIdx, tensor), + {message: /Output port for index [0-9]+ was not found!/} + ); + }); + it('Test setOutputTensor(idx, tensor)', () => { inferRequest.setOutputTensor(0, resTensor); const res2 = inferRequest.getOutputTensor(); assert.deepStrictEqual(resTensor.data[0], res2.data[0]); }); + it('Test setOutputTensor(idx, tensor) throws when passed object is not a Tensor.', () => { + assert.throws( + () => inferRequest.setOutputTensor(0, {}), + {message: /Argument #[0-9]+ must be a Tensor./} + ); + }); + it('Test setOutputTensor() - pass two tensors', () => { assert.throws( () => inferRequest.setOutputTensor(resTensor, tensor), - {message: 'InferRequest.setOutputTensor() invalid argument.'}); + {message: / invalid argument./}); }); it('Test setTensor(string, tensor)', () => { @@ -157,22 +197,38 @@ describe('InferRequest', () => { assert.deepStrictEqual(resTensor.data[0], res2.data[0]); }); + it('Test setTensor(string, object) - throws', () => { + const testName = 'testName'; + assert.throws( + () => inferRequest.setTensor(testName, tensor), + {message: /Port for tensor name testName was not found./}); + }); + + it('Test setTensor(string, object) - throws', () => { + assert.throws( + () => inferRequest.setTensor('fc_out', {}), + {message: /Argument #[0-9]+ must be a Tensor./}); + }); + it('Test setTensor(string, tensor) - pass one arg', () => { assert.throws( () => inferRequest.setTensor('fc_out'), - {message: 'InferRequest.setTensor() invalid argument.'}); + {message: / invalid argument./} + ); }); it('Test setTensor(string, tensor) - pass args in wrong order', () => { assert.throws( () => inferRequest.setTensor(resTensor, 'fc_out'), - {message: 'InferRequest.setTensor() invalid argument.'}); + {message: / invalid argument./} + ); }); it('Test setTensor(string, tensor) - pass number as first arg', () => { assert.throws( () => inferRequest.setTensor(123, 'fc_out'), - {message: 'InferRequest.setTensor() invalid argument.'}); + {message: / invalid argument/} + ); }); const irGetters = compiledModel.createInferRequest(); diff --git a/src/bindings/python/CMakeLists.txt b/src/bindings/python/CMakeLists.txt index d13cb2e99db1fa..6cf43ec3fed72b 100644 --- a/src/bindings/python/CMakeLists.txt +++ b/src/bindings/python/CMakeLists.txt @@ -317,12 +317,14 @@ if(ENABLE_PYTHON_PACKAGING) set(ov_site_packages "site-packages") endif() + # install OpenVINO Python API + set(python_package_prefix "${CMAKE_CURRENT_BINARY_DIR}/install_${pyversion}") set(install_lib "${python_package_prefix}/lib/${python_versioned_folder}/${ov_site_packages}") - set(meta_info_subdir "openvino-${OpenVINO_VERSION}-py${python_xy}.egg-info") - set(meta_info_file "${install_lib}/${meta_info_subdir}/PKG-INFO") + set(openvino_meta_info_subdir "openvino-${OpenVINO_VERSION}-py${python_xy}.egg-info") + set(openvino_meta_info_file "${install_lib}/${openvino_meta_info_subdir}/PKG-INFO") - add_custom_command(OUTPUT ${meta_info_file} + add_custom_command(OUTPUT ${openvino_meta_info_file} COMMAND ${CMAKE_COMMAND} -E remove_directory "${python_package_prefix}" COMMAND ${setup_py_env} @@ -344,9 +346,36 @@ if(ENABLE_PYTHON_PACKAGING) --record=installed.txt WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" DEPENDS ${ov_setup_py_deps} - COMMENT "Create python package with ${meta_info_subdir} folder") - - add_custom_target(_python_api_package ALL DEPENDS ${meta_info_file}) + COMMENT "Create python package with ${openvino_meta_info_subdir} folder") + + # Install OpenVINO Telemetry + + set(OpenVINO_Telemetry_SOURCE_DIR "${OpenVINO_SOURCE_DIR}/thirdparty/telemetry") + file(GLOB_RECURSE telemetry_files ${OpenVINO_Telemetry_SOURCE_DIR}/*) + + set(openvino_telemetry_meta_info_subdir "openvino-telemetry-${OpenVINO_VERSION}-py${python_xy}.egg-info") + set(openvino_telemetry_meta_info_file "${install_lib}/${openvino_telemetry_meta_info_subdir}/PKG-INFO") + + add_custom_command(OUTPUT ${openvino_telemetry_meta_info_file} + COMMAND "${Python3_EXECUTABLE}" "${OpenVINO_Telemetry_SOURCE_DIR}/setup.py" + --no-user-cfg + --quiet + build + --executable "/usr/bin/python3" + install + --no-compile + --prefix "${python_package_prefix}" + --install-lib "${install_lib}" + --install-scripts "${python_package_prefix}/bin" + --single-version-externally-managed + --record=installed.txt + WORKING_DIRECTORY "${OpenVINO_Telemetry_SOURCE_DIR}" + DEPENDS ${telemetry_files} + COMMENT "Create python package with ${openvino_telemetry_meta_info_subdir} folder") + + # create custom target + + add_custom_target(_python_api_package ALL DEPENDS ${openvino_meta_info_file} ${openvino_telemetry_meta_info_file}) # install python package, which will be later packed into DEB | RPM ov_cpack_add_component(${OV_CPACK_COMP_PYTHON_OPENVINO}_package_${pyversion} HIDDEN) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py index 70c8c66d42a53a..56f5cd79a61480 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py @@ -18,13 +18,13 @@ class TorchFXPythonDecoder (Decoder): - def __init__(self, pt_module, fx_gm, nodes=None, mark_node_callback=None, input_shapes=[], input_types=[]): + def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, input_shapes=[], input_types=[]): Decoder.__init__(self) self.mark_node_callback = mark_node_callback # We store every decoder created by this decoder so that all them are not deleted until the first decoder is deleted self.m_decoders = [] self.pt_module = pt_module - self.fx_gm = fx_gm + self.fx_gm = fx_gm if fx_gm is not None else pt_module self.input_types = [OVAny(pt_to_ov_type_map[str(t)]) for t in input_types] self.input_shapes = input_shapes @@ -46,14 +46,16 @@ def __init__(self, pt_module, fx_gm, nodes=None, mark_node_callback=None, input_ self._input_signature.append(value.name) if hasattr(value, "meta") and ('tensor_meta' in value.meta.keys()) and value.meta['tensor_meta']: found_shapes.append(value.meta['tensor_meta'].shape) - found_types.append(OVAny(pt_to_ov_type_map[str(value.meta['tensor_meta'].dtype)])) + found_types.append( + OVAny(pt_to_ov_type_map[str(value.meta['tensor_meta'].dtype)])) else: found_shapes.append(None) found_types.append(None) elif self._nodes[i].op == 'output': # Instead of putting output index, refer to its target uargs = self.unpack_containers(self._nodes[i].args) - self._outputs = [(arg[0], self._nodes.index(arg[1])) for arg in uargs if arg[1] is not None] + self._outputs = [(arg[0], self._nodes.index(arg[1])) + for arg in uargs if arg[1] is not None] if not input_shapes or len(input_shapes) == 0: self.input_shapes = found_shapes @@ -270,8 +272,9 @@ def get_subgraphs(self): return list(self.pt_module.blocks()) def get_subgraph_decoder(self, index): - decoder = TorchFXPythonDecoder(self.get_subgraphs( - )[index], self.fx_gm, mark_node_callback=self.mark_node_callback) + decoder = TorchFXPythonDecoder(self.get_subgraphs()[index], + self.fx_gm, + mark_node_callback=self.mark_node_callback) self.m_decoders.append(decoder) return decoder @@ -284,8 +287,7 @@ def get_op_type(self): return 'UNKNOWN_TYPE_' + str(self.pt_module.op) def get_schema(self): - return '' - return self.pt_module.schema() + return 'NONE' def outputs(self): return [o[1] for o in self._outputs] @@ -318,115 +320,15 @@ def mark_node(self, node): return node def as_constant(self): - - if self.pt_module.op == 'get_attr': - # Extract Constant from FX module field - ret = fetch_attr(self.fx_gm, self.pt_module.target) - ov_const = torch_tensor_to_ov_const(ret, shared_memory=True) - return ov_const.outputs() - - if not self.get_op_type() == 'prim::Constant': - return None - pt_value = self._raw_output(0) - - pt_type_class = pt_value.type().__class__ - if pt_type_class is torch.TensorType: - return self.as_constant_tensor(pt_value) - if pt_type_class is torch.ListType: - return self.as_constant_list(pt_value) - if str(pt_value.type()) in ['torch.int32', 'int']: - return make_constant(OVType.i32, Shape([]), [pt_value.toIValue()]).outputs() - if str(pt_value.type()) in ['torch.float', 'torch.FloatType', 'float']: - return make_constant(OVType.f32, Shape([]), [pt_value.toIValue()]).outputs() - if str(pt_value.type()) in ['torch.bool', 'bool']: - return make_constant(OVType.boolean, Shape([]), [pt_value.toIValue()]).outputs() - - return None + assert self.pt_module.op == 'get_attr', "Only get_attr is supported" + # Extract Constant from FX module field + ret = fetch_attr(self.fx_gm, self.pt_module.target) + ov_const = torch_tensor_to_ov_const(ret, shared_memory=True) + return ov_const.outputs() def as_string(self): - if not self.get_op_type() == 'prim::Constant': - return None - pt_value = self._raw_output(0) - - if str(pt_value.type()) in ['torch.StringType', 'str']: - return pt_value.toIValue() return None - def as_constant_tensor(self, pt_value): - ivalue = pt_value.toIValue() - if pt_value.isCompleteTensor(): - try: - ivalue = ivalue.to( - memory_format=torch.contiguous_format).detach().cpu() - except: - logger.warning("Tensor couldn't detach") - if str(pt_value.type().dtype()) in pt_to_py_type_map: - # Constant interpretation doesn't respect new-full type of PT - # It recognizes only tensors, and give lists as 1D tensors, and scalars as Tensor scalars - # So only tensor-type constants are supported - ovshape = PartialShape(pt_value.type().sizes()) - ovtype = pt_to_ov_type_map[str(pt_value.type().dtype())] - - # TODO: try-except here is a temporary WA for issues with data_ptr that we currently cannot predict; provide better solution - try: - # this is only possible with adding a new ctor for Constant Python binding - # TODO Check strides and pass them somehow - values = ivalue.data_ptr() - ov_const = make_constant( - ovtype, ovshape.get_shape(), values) - except: - # old variant that makes a slow data copying - logger.warning("Constant wasn't able to convert from data_ptr.") - values = ivalue.flatten().tolist() - ov_const = make_constant( - ovtype, ovshape.get_shape(), values) - return ov_const.outputs() - else: - # Incomplete tensor can be scalar - if isinstance(ivalue, float): - return make_constant(OVType.f32, Shape([]), [ivalue]).outputs() - if isinstance(ivalue, int): - return make_constant(OVType.i64, Shape([]), [ivalue]).outputs() - if isinstance(ivalue, bool): - return make_constant(OVType.boolean, Shape([]), [ivalue]).outputs() - - # TODO: verify that it correctly reads incomplete consts - if str(ivalue.type()) in pt_to_ov_type_map: - try: - ovshape = PartialShape(ivalue.size()) - ovtype = pt_to_ov_type_map[str(ivalue.type())] - ov_const = make_constant( - ovtype, ovshape.get_shape(), ivalue.data_ptr()) - except: - # old variant that makes a slow data copying - logger.warning("Constant wasn't able to convert from data_ptr.") - nvalues = ivalue.numpy(force=True) - ovtype = np_to_ov_type_map[str(nvalues.dtype)] - ovshape = PartialShape(nvalues.shape) - ov_const = make_constant( - ovtype, ovshape.get_shape(), nvalues.flatten().tolist()) - return ov_const.outputs() - return None - - def as_constant_list(self, pt_value): - # For now it is treat a list as a 1D tensor; it is required by converters to avoid need to massively rewrite them in that part where constant attributes are queried - pt_element_type = str(pt_value.type().getElementType()) - ivalue = pt_value.toIValue() - is_known_type = pt_element_type in pt_to_ov_type_map - - # WA to broken ov.Type - # Detect integer list and process it with a dedicated method - # TODO: Fix ov.Type and remove this WA - # if pt_to_py_type_map[pt_element_type] == 'int': - # self.as_constant_list_of_ints(ovshape = PartialShape([len(ivalue)]), ivalue) - # End of WA to broken ov.Type - - if is_known_type: - ovtype = pt_to_ov_type_map[pt_element_type] - ovshape = PartialShape([len(ivalue)]) - ov_const = make_constant(ovtype, ovshape.get_shape(), ivalue) - return ov_const.outputs() - def input_is_none(self, index): if index >= len(self._inputs) or (isinstance(self._inputs[index], tuple) and self._inputs[index][0] is None): return True @@ -438,11 +340,4 @@ def debug(self): self.pt_module.print() def may_produce_alias(self, in_index: int, out_index: int) -> bool: - if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d", "aten::matmul"]: - # AliasDB::may_contain_alias sometimes return True for tensors produced by convnd, we have to workaround that - return False - try: - return self.alias_db.may_contain_alias(self._raw_input(in_index), self._raw_output(out_index)) - except: - # Sometimes pytorch fails to get result with IndexError exception while these indexes exist in node - return False + return False diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py index 7dbbacbf1be57f..91192e4110d2bb 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py @@ -97,7 +97,7 @@ def openvino_compile(gm: GraphModule, *args, model_hash_str: str = None, options input_types.append(input_data.type()) input_shapes.append(input_data.size()) - decoder = TorchFXPythonDecoder(gm, gm, input_shapes=input_shapes, input_types=input_types) + decoder = TorchFXPythonDecoder(gm, input_shapes=input_shapes, input_types=input_types) im = fe.load(decoder) diff --git a/src/bindings/python/wheel/setup.py b/src/bindings/python/wheel/setup.py index 1b7af69eceb230..9f6103302559ce 100644 --- a/src/bindings/python/wheel/setup.py +++ b/src/bindings/python/wheel/setup.py @@ -38,6 +38,8 @@ ARCH = "arm" elif machine == "aarch64" or machine == "arm64" or machine == "ARM64": ARCH = "arm64" +elif machine == "riscv64": + ARCH = "riscv64" # The following variables can be defined in environment or .env file SCRIPT_DIR = Path(__file__).resolve().parents[0] diff --git a/src/core/include/openvino/op/ops.hpp b/src/core/include/openvino/op/ops.hpp index f6c91269215f8f..7a17f120f735a5 100644 --- a/src/core/include/openvino/op/ops.hpp +++ b/src/core/include/openvino/op/ops.hpp @@ -153,6 +153,7 @@ #include "openvino/op/result.hpp" #include "openvino/op/reverse.hpp" #include "openvino/op/reverse_sequence.hpp" +#include "openvino/op/rms_norm.hpp" #include "openvino/op/rnn_cell.hpp" #include "openvino/op/rnn_sequence.hpp" #include "openvino/op/roi_align.hpp" diff --git a/src/core/include/openvino/op/rms_norm.hpp b/src/core/include/openvino/op/rms_norm.hpp new file mode 100644 index 00000000000000..43bfd7e213bab0 --- /dev/null +++ b/src/core/include/openvino/op/rms_norm.hpp @@ -0,0 +1,57 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" + +namespace ov { +namespace op { +namespace v14 { +/// \brief Operator performing Root Mean Square Normalization +/// \ingroup ov_ops_cpp_api +class OPENVINO_API RMSNorm : public ov::op::Op { +public: + OPENVINO_OP("RMSNorm", "opset14", ov::op::Op); + + RMSNorm() = default; + /// \brief Constructs an RMSNorm operation without scaling. + /// + /// \param data Input tensor with data + /// \param axes Axes for reduce mean calculation + /// \param eps Epsilon for not dividing by zero while normalizing the value + /// \param compute_type Precision for the internal computation, if undefined it's the same as the input type + RMSNorm(const Output& data, + const Output& axes, + double epsilson, + const ov::element::Type& compute_type = ov::element::undefined); + + /// \brief Constructs an RMSNorm operation with scaling. + /// + /// \param data Input tensor with data + /// \param axes Axes for reduce mean calculation + /// \param scale Scale values for weight + /// \param eps Epsilon for not dividing by zero while normalizing the value + /// \param compute_type Precision for the internal computation, if undefined it's the same as the input type + RMSNorm(const Output& data, + const Output& axes, + const Output& scale, + double epsilson, + const ov::element::Type& compute_type = ov::element::undefined); + + bool visit_attributes(ov::AttributeVisitor& visitor) override; + void validate_and_infer_types() override; + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; + + double get_epsilon() const; + const ov::element::Type& get_compute_type() const; + +private: + double m_epsilon{0}; + ov::element::Type m_compute_type{ov::element::undefined}; +}; + +} // namespace v14 +} // namespace op +} // namespace ov diff --git a/src/core/reference/include/openvino/reference/autobroadcast_binop.hpp b/src/core/reference/include/openvino/reference/autobroadcast_binop.hpp index cdae05772ea36d..9a90e4a9b14a0f 100644 --- a/src/core/reference/include/openvino/reference/autobroadcast_binop.hpp +++ b/src/core/reference/include/openvino/reference/autobroadcast_binop.hpp @@ -19,7 +19,7 @@ namespace internal { inline void row_major_strides(const Shape& shape, size_t* strides, size_t size) noexcept { size_t* st = strides + size - 1; size_t s = 1; - for (auto d = shape.rbegin(); d != shape.rend(); d++) { + for (auto d = shape.rbegin(), last = shape.rend(); d != last; ++d) { *st-- = s; s *= *d; } @@ -44,10 +44,11 @@ inline void numpy_autobroadcast_binop(const T* arg0, const Shape& output_shape, const size_t axis, const size_t stride, - Functor elementwise_functor) { + Functor&& elementwise_functor) { for (CoordinateIterator it(output_shape), ite = CoordinateIterator::end();;) { - for (size_t i = 0; i < stride; ++i) - *out++ = elementwise_functor(arg0[i * A0], arg1[i * A1]); + for (size_t i = 0; i < stride; ++i, ++out) { + *out = elementwise_functor(arg0[i * A0], arg1[i * A1]); + } arg0 += A0 ? stride : 1; arg1 += A1 ? stride : 1; @@ -72,23 +73,233 @@ inline size_t calculate_fixed_axis(size_t axis, const size_t* strides) { } } // namespace internal -/// \brief Helper function to implement autobroadcasting elementwise binop references. -/// -/// \tparam T Element type of the input tensors. -/// \tparam U Element type of the output tensor. -/// \tparam Functor Type of the functor for the elementwise operation. Must support -/// operator()(T,T), and operator()(T,T) must return a value of type -/// U. -/// -/// \param arg0 Pointer to the buffer for left operand input tensor. -/// \param arg1 Pointer to the buffer for right operand input tensor. -/// \param out Pointer to the buffer for output tensor. This must be pre-allocated by -/// the caller, and must be large enough to hold a tensor of the correct -/// shape. -/// \param broadcast_spec Specification of the auto-broadcasting scheme. -/// \param elementwise_functor Functor implementing the elementwise operation to be -/// applied across the input tensors. Must accept two -/// arguments of type T, and return a value of type U. +/** + * @brief Apply elementwise function for 2 inputs of same size. + * + * @param arg0 Pointer to input 0 data. + * @param arg1 Pointer to input 1 data. + * @param out Pointer to output data. + * @param count Number of elements in inputs + * @param f Binary elementwise functions. + */ +template +void no_broadcast_binop(const T* arg0, const T* arg1, U* out, const size_t count, Functor f) { + for (auto last = arg0 + count; arg0 != last; ++arg0, ++arg1, ++out) { + *out = f(*arg0, *arg1); + } +} + +/** + * @brief Apply elementwise function for 2 inputs and apply NUMPY broadcasting. + * + * @param arg0 Pointer to input 0 data. + * @param arg1 Pointer to input 1 data. + * @param out Pointer to output data. + * @param arg0_shape Shape of input 0. + * @param arg1_shape Shape of input 1. + * @param f Binary elementwise functions. + */ +template +void numpy_broadcast_binop(const T* arg0, + const T* arg1, + U* out, + const Shape& arg0_shape, + const Shape& arg1_shape, + Functor f) { + // We'll be using CoordinateTransformBasic to handle the broadcasting. The general procedure is as follows: + // + // (1) Left pad the shorter of the two shapes with ones. + // (2) Squeeze (remove ones from) both shapes, and record the squeezed axis indices. + // (3) Using CoordinateTransformBasic, broadcast both args to the final output shape. The "broadcasted axes" will be + // those that were squeezed in step 2. + // + // Example: + // + // Input shape->Padded shape->Squeezed Shape/Squeezed Axes + // ----------- ------------ ---------------------------- + // a: [ 3, 2, 1] [ 3, 2, 1] [ 3, 2 ] {2} + // b: [ 1, 6] [ 1, 1, 6] [ 6] {0,1} + // | | | + // v v v + // Output shape + // ------------ + // [ 3, 2, 6] + using namespace internal; + + size_t const shape_rank = std::max(arg0_shape.size(), arg1_shape.size()) + 1; + + // TODO: Use compiler-specific alloca() or variable-length array + std::vector tmp(shape_rank * 2); + + size_t* strides0 = tmp.data(); + size_t* strides1 = tmp.data() + shape_rank; + + row_major_strides(arg0_shape, strides0, shape_rank); + row_major_strides(arg1_shape, strides1, shape_rank); + + size_t const padding0 = shape_rank - arg0_shape.size(); + size_t const padding1 = shape_rank - arg1_shape.size(); + + Shape output_shape(shape_rank, 0); + + size_t axis = 0; + + for (size_t i = 0; i < shape_rank; ++i) { + auto const dim0 = value_with_padding_or(arg0_shape, padding0, i, 1); + auto const dim1 = value_with_padding_or(arg1_shape, padding1, i, 1); + + output_shape[i] = std::max(dim0, dim1); + + if (dim0 != dim1) + axis = std::max(axis, i); + } + + if (axis == 0) { + no_broadcast_binop(arg0, arg1, out, strides0[0], f); + } else if (strides0[axis] == 1 && value_with_padding_or(arg0_shape, padding0, axis, 1) == 1) { + axis = calculate_fixed_axis(axis, strides0); + + internal::numpy_autobroadcast_binop<0, 1>(arg0, + arg1, + out, + arg0_shape, + arg1_shape, + strides0, + strides1, + padding0, + padding1, + output_shape, + axis, + strides1[axis], + f); + } else if (strides1[axis] == 1 && value_with_padding_or(arg1_shape, padding1, axis, 1) == 1) { + axis = calculate_fixed_axis(axis, strides1); + + internal::numpy_autobroadcast_binop<1, 0>(arg0, + arg1, + out, + arg0_shape, + arg1_shape, + strides0, + strides1, + padding0, + padding1, + output_shape, + axis, + strides0[axis], + f); + } else + internal::numpy_autobroadcast_binop<1, 1>(arg0, + arg1, + out, + arg0_shape, + arg1_shape, + strides0, + strides1, + padding0, + padding1, + output_shape, + axis, + strides0[axis], + f); +} + +/** + * @brief Apply elementwise function for 2 inputs and apply PDPP broadcasting. + * + * @param arg0 Pointer to input 0 data. + * @param arg1 Pointer to input 1 data. + * @param out Pointer to output data. + * @param arg0_shape Shape of input 0. + * @param arg1_shape Shape of input 1. + * @param axis Start dimension index for broadcast arg1 shape into arg0. + * @param f Binary elementwise functions. + */ +template +void pdpd_broadcast_binop(const T* arg0, + const T* arg1, + U* out, + const Shape& arg0_shape, + const Shape& arg1_shape, + int64_t axis, + Functor f) { + // We'll be using CoordinateTransformBasic to handle the broadcasting. No need to process arg0 and output shape will + // be the same as arg0. We need to process arg1 and the general procedure is as follows: + // + // (1) Trim trailing ones from arg1 shape. + // (2) Left and right pad arg1 to match arg0 shape. Axis is the index start to align between arg0 and arg1. + // (3) Squeeze (remove ones from) arg1 shape, and record the squeezed axis indices. + // (3) Using CoordinateTransformBasic, broadcast arg1 to the final output shape. The "broadcasted axes" will be + // those that were squeezed in step 23. + // + // Example: + // + // Input shape-> Padded shape-> Squeezed Shape/Squeezed Axes + // ----------- ------------ ---------------------------- + // a: [ 3, 4, 5, 6] [ 3, 4, 5, 6] [ 3, 4, 5, 6] + // b: [ 4, 5, ] [ 1, 4, 5, 1] [ 4, 5 ] {0,3} + // | | | + // v v v + // Output shape + // ------------ + // [ 3, 4, 5, 6] + + if (axis == -1) { + axis = arg0_shape.size() - arg1_shape.size(); + } + + Shape arg1_padded_shape = arg1_shape; + // Trim trailing ones + while (arg1_padded_shape.size() > 0 && arg1_padded_shape.back() == 1) { + arg1_padded_shape.pop_back(); + } + + for (int64_t i = 0; i < axis; ++i) { + arg1_padded_shape.insert(arg1_padded_shape.begin(), 1); + } + + while (arg1_padded_shape.size() < arg0_shape.size()) { + arg1_padded_shape.insert(arg1_padded_shape.end(), 1); + } + + Shape arg1_squeezed_shape; + AxisSet arg1_squeezed_axes; + + for (size_t i = 0, size = arg0_shape.size(); i < size; i++) { + if (arg1_padded_shape[i] == 1) { + arg1_squeezed_axes.insert(i); + } else { + arg1_squeezed_shape.push_back(arg1_padded_shape[i]); + } + } + + const CoordinateTransformBasic output_transform{arg0_shape}; + + for (const Coordinate& output_coord : output_transform) { + const auto arg1_coord = util::reduce(output_coord, arg1_squeezed_axes); + const auto out_index = coordinate_index(output_coord, arg0_shape); + const auto arg0_index = coordinate_index(output_coord, arg0_shape); + const auto arg1_index = coordinate_index(arg1_coord, arg1_squeezed_shape); + out[out_index] = f(arg0[arg0_index], arg1[arg1_index]); + } +} + +/** + * @brief Helper function to implement auto broadcasting elementwise binop references. + * + * @tparam T Element type of the input tensors. + * @tparam U Element type of the output tensor. + * @tparam Functor Type of the functor for the elementwise operation. Must support operator()(T,T), and operator()(T,T) + * must return a value of type U. + * + * @param arg0 Pointer to the buffer for left operand input tensor. + * @param arg1 Pointer to the buffer for right operand input tensor. + * @param out Pointer to the buffer for output tensor. This must be pre-allocated by the caller, and must be large + * enough to hold a tensor of the correct shape. + * @param broadcast_spec Specification of the auto-broadcasting scheme. + * @param elementwise_functor Functor implementing the elementwise operation to be applied across the input tensors. + * Must accept two arguments of type T, and return a value of type U. + */ template void autobroadcast_binop(const T* arg0, const T* arg1, @@ -99,203 +310,36 @@ void autobroadcast_binop(const T* arg0, Functor elementwise_functor) { switch (broadcast_spec.m_type) { case op::AutoBroadcastType::NONE: - for (size_t i = 0; i < shape_size(arg0_shape); i++) { - out[i] = static_cast(elementwise_functor(arg0[i], arg1[i])); - } + no_broadcast_binop(arg0, arg1, out, shape_size(arg0_shape), elementwise_functor); break; case op::AutoBroadcastType::NUMPY: - // We'll be using CoordinateTransformBasic to handle the broadcasting. The general - // procedure is as follows: - // - // (1) Left pad the shorter of the two shapes with ones. - // (2) Squeeze (remove ones from) both shapes, and record the squeezed axis - // indices. - // (3) Using CoordinateTransformBasic, broadcast both args to the final output - // shape. The "broadcasted axes" will be those that were squeezed in step - // 2. - // - // Example: - // - // Input shape->Padded shape->Squeezed Shape/Squeezed Axes - // ----------- ------------ ---------------------------- - // a: [ 3, 2, 1] [ 3, 2, 1] [ 3, 2 ] {2} - // b: [ 1, 6] [ 1, 1, 6] [ 6] {0,1} - // | | | - // v v v - // Output shape - // ------------ - // [ 3, 2, 6] - { - using namespace internal; - - size_t const shape_rank = std::max(arg0_shape.size(), arg1_shape.size()) + 1; - - // TODO: Use compiler-specific alloca() or variable-length array - std::vector tmp(shape_rank * 2); - - size_t* strides0 = tmp.data(); - size_t* strides1 = tmp.data() + shape_rank; - - row_major_strides(arg0_shape, strides0, shape_rank); - row_major_strides(arg1_shape, strides1, shape_rank); - - size_t const padding0 = shape_rank - arg0_shape.size(); - size_t const padding1 = shape_rank - arg1_shape.size(); - - Shape output_shape(shape_rank, 0); - - size_t axis = 0; - - for (size_t i = 0; i < shape_rank; i++) { - auto const dim0 = value_with_padding_or(arg0_shape, padding0, i, 1); - auto const dim1 = value_with_padding_or(arg1_shape, padding1, i, 1); - - output_shape[i] = std::max(dim0, dim1); - - if (dim0 != dim1) - axis = std::max(axis, i); - } - - if (axis == 0) { - for (size_t i = 0, end = strides0[0]; i < end; ++i) - out[i] = elementwise_functor(arg0[i], arg1[i]); - } else if (strides0[axis] == 1 && value_with_padding_or(arg0_shape, padding0, axis, 1) == 1) { - axis = calculate_fixed_axis(axis, strides0); - - numpy_autobroadcast_binop<0, 1>(arg0, - arg1, - out, - arg0_shape, - arg1_shape, - strides0, - strides1, - padding0, - padding1, - output_shape, - axis, - strides1[axis], - elementwise_functor); - } else if (strides1[axis] == 1 && value_with_padding_or(arg1_shape, padding1, axis, 1) == 1) { - axis = calculate_fixed_axis(axis, strides1); - - numpy_autobroadcast_binop<1, 0>(arg0, - arg1, - out, - arg0_shape, - arg1_shape, - strides0, - strides1, - padding0, - padding1, - output_shape, - axis, - strides0[axis], - elementwise_functor); - } else - numpy_autobroadcast_binop<1, 1>(arg0, - arg1, - out, - arg0_shape, - arg1_shape, - strides0, - strides1, - padding0, - padding1, - output_shape, - axis, - strides0[axis], - elementwise_functor); - } + numpy_broadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, elementwise_functor); break; case op::AutoBroadcastType::PDPD: - // We'll be using CoordinateTransformBasic to handle the broadcasting. No need to - // process arg0 and output shape will be the same as arg0. We need to process - // arg1 and the general procedure is as follows: - // - // (1) Trim trailing ones from arg1 shape. - // (2) Left and right pad arg1 to match arg0 shape. Axis is the index start - // to align between arg0 and arg1. - // (3) Squeeze (remove ones from) arg1 shape, and record the squeezed axis - // indices. - // (3) Using CoordinateTransformBasic, broadcast arg1 to the final output - // shape. The "broadcasted axes" will be those that were squeezed in step - // 23. - // - // Example: - // - // Input shape-> Padded shape-> Squeezed Shape/Squeezed Axes - // ----------- ------------ ---------------------------- - // a: [ 3, 4, 5, 6] [ 3, 4, 5, 6] [ 3, 4, 5, 6] - // b: [ 4, 5, ] [ 1, 4, 5, 1] [ 4, 5 ] {0,3} - // | | | - // v v v - // Output shape - // ------------ - // [ 3, 4, 5, 6] - { - int64_t axis = broadcast_spec.m_axis; - if (axis == -1) { - axis = arg0_shape.size() - arg1_shape.size(); - } - - Shape arg1_padded_shape = arg1_shape; - // Trim trailing ones - while (arg1_padded_shape.size() > 0 && arg1_padded_shape.back() == 1) { - arg1_padded_shape.pop_back(); - } - - for (int64_t i = 0; i < axis; ++i) { - arg1_padded_shape.insert(arg1_padded_shape.begin(), 1); - } - - while (arg1_padded_shape.size() < arg0_shape.size()) { - arg1_padded_shape.insert(arg1_padded_shape.end(), 1); - } - - Shape arg1_squeezed_shape; - AxisSet arg1_squeezed_axes; - - for (size_t i = 0; i < arg0_shape.size(); i++) { - if (arg1_padded_shape[i] == 1) { - arg1_squeezed_axes.insert(i); - } else { - arg1_squeezed_shape.push_back(arg1_padded_shape[i]); - } - } - - const CoordinateTransformBasic output_transform{arg0_shape}; - - for (const Coordinate& output_coord : output_transform) { - const auto arg1_coord = util::reduce(output_coord, arg1_squeezed_axes); - const auto out_index = coordinate_index(output_coord, arg0_shape); - const auto arg0_index = coordinate_index(output_coord, arg0_shape); - const auto arg1_index = coordinate_index(arg1_coord, arg1_squeezed_shape); - out[out_index] = elementwise_functor(arg0[arg0_index], arg1[arg1_index]); - } - } + pdpd_broadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec.m_axis, elementwise_functor); + break; + default: + break; } } -/// \brief Helper function to implement autobroadcasting elementwise ternaryop -/// references. -/// -/// \tparam U Element type of the selector tensor. -/// \tparam T Element type of the input tensors. -/// \tparam Functor Type of the functor for the elementwise operation. Must support -/// operator()(U,T,T), and operator()(U,T,T) must return a value of type -/// T. -/// -/// \param arg0 Pointer to the buffer for selector tensor. -/// \param arg1 Pointer to the buffer for left operand input tensor. -/// \param arg2 Pointer to the buffer for right operand input tensor. -/// \param out Pointer to the buffer for output tensor. This must be pre-allocated by -/// the caller, and must be large enough to hold a tensor of the correct -/// shape. -/// \param broadcast_spec Specification of the auto-broadcasting scheme. -/// \param elementwise_functor Functor implementing the elementwise operation to be -/// applied across the input tensors. Must accept an argument -/// of -/// type U and two of type T, and return a value of type T. +/** + * + * \brief Helper function to implement auto broadcasting elementwise ternary op references. + * \tparam U Element type of the selector tensor. + * \tparam T Element type of the input tensors. + * \tparam Functor Type of the functor for the elementwise operation. Must support operator()(U,T,T), and + * operator()(U,T,T) must return a value of type T. + * + * \param arg0 Pointer to the buffer for selector tensor. + * \param arg1 Pointer to the buffer for left operand input tensor. + * \param arg2 Pointer to the buffer for right operand input tensor. + * \param out Pointer to the buffer for output tensor. This must be pre-allocated by the caller, and must be large + * enough to hold a tensor of the correct shape. + * \param broadcast_spec Specification of the auto-broadcasting scheme. + * \param elementwise_functor Functor implementing the elementwise operation to be applied across the input tensors Must + * accept an argument of type U and two of type T, and return a value of type T. + */ template void autobroadcast_select(const U* arg0, const T* arg1, @@ -308,8 +352,8 @@ void autobroadcast_select(const U* arg0, Functor elementwise_functor) { switch (broadcast_spec.m_type) { case op::AutoBroadcastType::NONE: - for (size_t i = 0; i < shape_size(arg0_shape); i++) { - out[i] = elementwise_functor(arg0[i], arg1[i], arg2[i]); + for (auto last = arg0 + shape_size(arg0_shape); arg0 != last; ++arg0, ++arg1, ++arg2, ++out) { + *out = elementwise_functor(*arg0, *arg1, *arg2); } break; case op::AutoBroadcastType::NUMPY: @@ -422,7 +466,7 @@ void autobroadcast_select(const U* arg0, Shape arg2_squeezed_shape; AxisSet arg2_squeezed_axes; - for (size_t i = 0; i < arg1_shape.size(); i++) { + for (size_t i = 0, size = arg1_shape.size(); i < size; ++i) { if (arg0_padded_shape[i] == 1) { arg0_squeezed_axes.insert(i); } else { diff --git a/src/core/shape_inference/include/rms_norm_shape_inference.hpp b/src/core/shape_inference/include/rms_norm_shape_inference.hpp new file mode 100644 index 00000000000000..bc03fe37f91f34 --- /dev/null +++ b/src/core/shape_inference/include/rms_norm_shape_inference.hpp @@ -0,0 +1,65 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/rms_norm.hpp" +#include "utils.hpp" + +namespace ov { +namespace op { +namespace v14 { +template > +std::vector shape_infer(const RMSNorm* op, + const std::vector& input_shapes, + const ITensorAccessor& tensor_accessor = make_tensor_accessor()) { + const auto inputs_count = input_shapes.size(); + const auto has_scale_input = inputs_count == 3; + NODE_SHAPE_INFER_CHECK(op, input_shapes, inputs_count == 2 || has_scale_input); + + const auto& data_shape = input_shapes[0]; + const auto& data_rank = data_shape.rank(); + const auto& axes_shape = input_shapes[1]; + const auto& axes_rank = axes_shape.rank(); + + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + ov::util::is_rank_compatible_any_of(axes_rank, {0, 1}), + "Axes input must be a scalar or 1D input. Got: ", + axes_shape); + + // Further validation requires data rank to be static + if (data_rank.is_dynamic()) { + return {data_shape}; + } + + if (axes_shape.rank().is_static()) { + const bool has_axes_compatible = axes_shape.size() == 0 || axes_shape[0].is_dynamic() || + cmp::ge(data_rank.get_length(), axes_shape.get_shape()[0]); + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + has_axes_compatible, + "Number of the axes can't be higher than the rank of the data shape."); + } + + if (has_scale_input) { // Validate scale input + TRShape scale_shape = input_shapes[2]; + const bool is_scale_shape_broadcastable = + TRShape::broadcast_merge_into(scale_shape, data_shape, ov::op::AutoBroadcastType::NUMPY); + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + is_scale_shape_broadcastable, + "Scale input shape must be broadcastable to the shape of the data input."); + } + + // Axes values validation + if (const auto axes_val = ov::op::get_input_const_data_as(op, 1, tensor_accessor)) { + ov::util::normalize_axes(op, data_rank.get_length(), *axes_val); + } + + return {data_shape}; +} +} // namespace v14 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/rms_norm.cpp b/src/core/src/op/rms_norm.cpp new file mode 100644 index 00000000000000..a249e86a6a207e --- /dev/null +++ b/src/core/src/op/rms_norm.cpp @@ -0,0 +1,97 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/rms_norm.hpp" + +#include "itt.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/op.hpp" +#include "rms_norm_shape_inference.hpp" + +namespace ov { +namespace op { +namespace v14 { + +RMSNorm::RMSNorm(const Output& data, + const Output& axes, + double epsilson, + const ov::element::Type& compute_type) + : Op({data, axes}), + m_epsilon(epsilson), + m_compute_type(compute_type) { + constructor_validate_and_infer_types(); +} + +RMSNorm::RMSNorm(const Output& data, + const Output& axes, + const Output& scale, + double epsilson, + const ov::element::Type& compute_type) + : Op({data, axes, scale}), + m_epsilon(epsilson), + m_compute_type(compute_type) { + constructor_validate_and_infer_types(); +} + +bool RMSNorm::visit_attributes(ov::AttributeVisitor& visitor) { + OV_OP_SCOPE(v14_RMSNorm_visit_attributes); + visitor.on_attribute("epsilon", m_epsilon); + visitor.on_attribute("compute_type", m_compute_type); + return true; +} + +void RMSNorm::validate_and_infer_types() { + OV_OP_SCOPE(v14_RMSNorm_validate_and_infer_types); + + const auto& data_element_type = get_input_element_type(0); + const bool is_valid_data_type = data_element_type.is_dynamic() || data_element_type.is_real(); + NODE_VALIDATION_CHECK(this, + is_valid_data_type, + "The element type of the data tensor must be a floating point type. Got: ", + data_element_type); + + const auto& axes_element_type = get_input_element_type(1); + const bool is_valid_axes_type = + data_element_type.is_dynamic() || axes_element_type == element::i32 || axes_element_type == element::i64; + NODE_VALIDATION_CHECK(this, + is_valid_axes_type, + "The element type of the axes tensor must be i32 or i64 type. Got: ", + axes_element_type); + + if (get_input_size() > 2) { // Validate scale input type + + // Validate input types + auto merged_et = element::dynamic; + const auto& scale_element_type = get_input_element_type(2); + const bool is_scale_type_compatible = element::Type::merge(merged_et, data_element_type, scale_element_type); + NODE_VALIDATION_CHECK(this, + is_scale_type_compatible, + "Element type of the scale input must be the same as the data input type."); + } + + const auto output_shapes = shape_infer(this, ov::util::get_node_input_partial_shapes(*this)); + // Output type and shape is the same as the first input + set_output_type(0, data_element_type, output_shapes[0]); +} + +std::shared_ptr RMSNorm::clone_with_new_inputs(const ov::OutputVector& new_args) const { + OV_OP_SCOPE(v14_RMSNorm_clone_with_new_inputs); + check_new_args_count(this, new_args); + if (new_args.size() == 2) { + return std::make_shared(new_args.at(0), new_args.at(1), m_epsilon, m_compute_type); + } + return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), m_epsilon, m_compute_type); +} + +double RMSNorm::get_epsilon() const { + return m_epsilon; +} + +const ov::element::Type& RMSNorm::get_compute_type() const { + return m_compute_type; +} + +} // namespace v14 +} // namespace op +} // namespace ov diff --git a/src/core/tests/type_prop/rms_norm.cpp b/src/core/tests/type_prop/rms_norm.cpp new file mode 100644 index 00000000000000..b24531a9c2cf23 --- /dev/null +++ b/src/core/tests/type_prop/rms_norm.cpp @@ -0,0 +1,273 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/rms_norm.hpp" + +#include + +#include "common_test_utils/test_assertions.hpp" +#include "common_test_utils/type_prop.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/subtract.hpp" + +namespace ov { +namespace test { + +using ov::op::v0::Constant; +using ov::op::v0::Parameter; +using testing::HasSubstr; + +class TypePropRMSNormTest : public TypePropOpTest { +public: + double eps = 1e-5; +}; + +TEST_F(TypePropRMSNormTest, default_ctor) { + const auto op = make_op(); + const auto data = std::make_shared(element::f16, PartialShape{2, 3, 8, 6}); + const auto axes = std::make_shared(element::i64, PartialShape{1}); + const auto scale = std::make_shared(element::f16, PartialShape{}); + + op->set_arguments(ov::OutputVector{data, axes, scale}); + op->validate_and_infer_types(); + + EXPECT_EQ(op->get_output_size(), 1); + EXPECT_EQ(op->get_input_size(), 3); + EXPECT_EQ(op->get_output_element_type(0), element::f16); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{2, 3, 8, 6})); +} + +TEST_F(TypePropRMSNormTest, no_scale_no_compute_type) { + const auto data = std::make_shared(element::f32, PartialShape{2, 3, 8, 6}); + const auto axes = std::make_shared(element::i32, PartialShape{1}); + + const auto op = make_op(data, axes, eps); + EXPECT_EQ(op->get_input_size(), 2); + EXPECT_EQ(op->get_output_size(), 1); + EXPECT_EQ(op->get_output_element_type(0), element::f32); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{2, 3, 8, 6})); + EXPECT_EQ(op->get_epsilon(), eps); +} + +TEST_F(TypePropRMSNormTest, scale_no_compute_type) { + const auto data = std::make_shared(element::f16, PartialShape{2, 3, 8, 6}); + const auto axes = std::make_shared(element::i32, PartialShape{1}); + const auto scale = std::make_shared(element::f16, PartialShape{}); + + const auto op = make_op(data, axes, scale, eps); + EXPECT_EQ(op->get_input_size(), 3); + EXPECT_EQ(op->get_output_size(), 1); + EXPECT_EQ(op->get_output_element_type(0), element::f16); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{2, 3, 8, 6})); + EXPECT_EQ(op->get_epsilon(), eps); +} + +TEST_F(TypePropRMSNormTest, scale_compute_type) { + const auto data = std::make_shared(element::f16, PartialShape{2, 3, 8, 6}); + const auto axes = std::make_shared(element::i32, PartialShape{1}); + const auto scale = std::make_shared(element::f16, PartialShape{}); + const auto compute_type = element::f32; + + const auto op = make_op(data, axes, scale, eps, compute_type); + EXPECT_EQ(op->get_input_size(), 3); + EXPECT_EQ(op->get_output_size(), 1); + EXPECT_EQ(op->get_output_element_type(0), element::f16); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{2, 3, 8, 6})); + EXPECT_EQ(op->get_epsilon(), eps); + EXPECT_EQ(op->get_compute_type(), compute_type); +} + +TEST_F(TypePropRMSNormTest, scale_compute_type_no_scale) { + const auto data = std::make_shared(element::f16, PartialShape{2, 3, 8, 6}); + const auto axes = std::make_shared(element::i32, PartialShape{1}); + const auto compute_type = element::f32; + + const auto op = make_op(data, axes, eps, compute_type); + EXPECT_EQ(op->get_output_size(), 1); + EXPECT_EQ(op->get_output_element_type(0), element::f16); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{2, 3, 8, 6})); +} + +TEST_F(TypePropRMSNormTest, dynamic_data_shape) { + const auto data = std::make_shared(element::f16, PartialShape{-1, {3, 4}, {8, -1}, 6}); + const auto axes = std::make_shared(element::i32, PartialShape{1}); + const auto scale = std::make_shared(element::f16, PartialShape{}); + const auto compute_type = element::f32; + + const auto op = make_op(data, axes, scale, eps, compute_type); + EXPECT_EQ(op->get_output_element_type(0), element::f16); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{-1, {3, 4}, {8, -1}, 6})); +} + +TEST_F(TypePropRMSNormTest, dynamic_data_shape_rank) { + const auto data = std::make_shared(element::f16, PartialShape::dynamic()); + const auto axes = std::make_shared(element::i32, PartialShape{1}); + const auto scale = std::make_shared(element::f16, PartialShape{}); + const auto compute_type = element::f32; + + const auto op = make_op(data, axes, scale, eps, compute_type); + EXPECT_EQ(op->get_output_element_type(0), element::f16); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape::dynamic())); +} + +TEST_F(TypePropRMSNormTest, propagate_symbols) { + auto data_shape = PartialShape{-1, {3, 4}, {8, -1}, 6}; + set_shape_symbols(data_shape); + const auto exp_symbols = get_shape_symbols(data_shape); + + const auto data = std::make_shared(element::f16, data_shape); + const auto axes = std::make_shared(element::i32, PartialShape{1}); + const auto scale = std::make_shared(element::f16, PartialShape{}); + const auto compute_type = element::f32; + + const auto op = make_op(data, axes, scale, eps, compute_type); + EXPECT_EQ(get_shape_symbols(op->get_output_partial_shape(0)), exp_symbols); +} + +TEST_F(TypePropRMSNormTest, incorrect_input_type) { + const auto data = std::make_shared(element::f16, PartialShape::dynamic()); + const auto axes = std::make_shared(element::i32, PartialShape{1}); + const auto scale = std::make_shared(element::f16, PartialShape{}); + const auto compute_type = element::f32; + { + const auto data_int = std::make_shared(element::i32, PartialShape::dynamic()); + OV_EXPECT_THROW(std::ignore = make_op(data_int, axes, scale, eps, compute_type), + ov::NodeValidationFailure, + HasSubstr("The element type of the data tensor must be a floating point type")); + } + { + const auto axes_float = std::make_shared(element::f32, PartialShape{1}); + OV_EXPECT_THROW(std::ignore = make_op(data, axes_float, scale, eps, compute_type), + ov::NodeValidationFailure, + HasSubstr("The element type of the axes tensor must be i32 or i64 type")); + } + { + const auto scale_incompatible = std::make_shared(element::f32, PartialShape{1}); + OV_EXPECT_THROW(std::ignore = make_op(data, axes, scale_incompatible, eps, compute_type), + ov::NodeValidationFailure, + HasSubstr("Element type of the scale input must be the same as the data input type")); + } +} + +TEST_F(TypePropRMSNormTest, incompatible_axes_shape) { + const auto data = std::make_shared(element::f16, PartialShape{2, 3, 8}); + const auto scale = std::make_shared(element::f16, PartialShape{}); + const auto compute_type = element::f32; + { + const auto axes = std::make_shared(element::i32, PartialShape{1, 2}); + OV_EXPECT_THROW(std::ignore = make_op(data, axes, scale, eps, compute_type), + ov::NodeValidationFailure, + HasSubstr("Axes input must be a scalar or 1D input. Got: [1,2]")); + } + { + const auto axes = std::make_shared(element::i32, PartialShape{4}); + OV_EXPECT_THROW(std::ignore = make_op(data, axes, scale, eps, compute_type), + ov::NodeValidationFailure, + HasSubstr("Number of the axes can't be higher than the rank of the data shape")); + } +} + +TEST_F(TypePropRMSNormTest, constant_axes_val_data_dyn_rank) { + const auto data = std::make_shared(element::f16, PartialShape::dynamic()); + const auto axes = std::make_shared(element::i32, Shape{}, 1); + const auto op = make_op(data, axes, eps); + + EXPECT_EQ(op->get_output_element_type(0), element::f16); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape::dynamic())); +} + +TEST_F(TypePropRMSNormTest, constant_axes_val_data_static_rank) { + const auto data = std::make_shared(element::f16, PartialShape{2, 3, 8}); + const auto axes = std::make_shared(element::i32, Shape{}, 1); + const auto op = make_op(data, axes, eps); + + EXPECT_EQ(op->get_output_element_type(0), element::f16); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{2, 3, 8})); +} + +TEST_F(TypePropRMSNormTest, axes_val_as_shape_of) { + const auto data = std::make_shared(element::f16, PartialShape{2, 3, 8}); + const auto data_rank = std::make_shared(std::make_shared(data)); + const auto axes = + std::make_shared(data_rank, std::make_shared(element::i64, Shape{}, 1)); + const auto op = make_op(data, axes, eps); + + EXPECT_EQ(op->get_output_element_type(0), element::f16); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{2, 3, 8})); +} + +TEST_F(TypePropRMSNormTest, incorrect_axes_val) { + const auto data = std::make_shared(element::f16, PartialShape{2, 3, 8}); + { + const auto axes = std::make_shared(element::i32, Shape{}, 3); + OV_EXPECT_THROW(std::ignore = make_op(data, axes, eps), + ov::NodeValidationFailure, + HasSubstr("Parameter axis 3 out of the tensor rank range [-3, 2]")); + } + { + const auto axes = std::make_shared(element::i32, Shape{}, -4); + OV_EXPECT_THROW(std::ignore = make_op(data, axes, eps), + ov::NodeValidationFailure, + HasSubstr("Parameter axis -4 out of the tensor rank range [-3, 2]")); + } +} + +using RMSNormTestParam = std::tuple; +class TypePropRMSNormTestP : public TypePropRMSNormTest, public testing::WithParamInterface { +protected: + void SetUp() override { + std::tie(shape_data, shape_scale) = GetParam(); + } + PartialShape shape_data, shape_scale; +}; + +INSTANTIATE_TEST_SUITE_P(type_prop_rms_scale_shape, + TypePropRMSNormTestP, + testing::Values(std::make_tuple(PartialShape{-1, 3, 1, 2}, PartialShape{-1}), + std::make_tuple(PartialShape{-1, 3, 1, 2}, PartialShape{}), + std::make_tuple(PartialShape{-1, 3, 1, 2}, PartialShape{1}), + std::make_tuple(PartialShape{-1, 3, 1, 2}, PartialShape{2}), + std::make_tuple(PartialShape{-1, 3, 1, 2}, PartialShape{1, 1}), + std::make_tuple(PartialShape{-1, 3, 1, 2}, PartialShape{1, 2}), + std::make_tuple(PartialShape{-1, 3, 1, 2}, PartialShape{3, 1, 2}), + std::make_tuple(PartialShape{-1, 4, 8, 6}, PartialShape{1, 4, 1, 1}), + std::make_tuple(PartialShape{2, 4, 8, 6}, PartialShape{2, 4, 8, 6}), + std::make_tuple(PartialShape{2, 4, 8, 6}, PartialShape{1, 4, 1, 1}), + std::make_tuple(PartialShape{2, 4, 8, 6}, PartialShape{1, 1, 1, 1}), + std::make_tuple(PartialShape{2, 4, 8, 6}, PartialShape::dynamic()), + std::make_tuple(PartialShape::dynamic(), PartialShape{1}), + std::make_tuple(PartialShape::dynamic(), PartialShape::dynamic())), + testing::PrintToStringParamName()); + +TEST_P(TypePropRMSNormTestP, scale_shape) { + const auto data = std::make_shared(element::f16, shape_data); + const auto axes = std::make_shared(element::i32, PartialShape{1}); + + const auto scale = std::make_shared(element::f16, shape_scale); + const auto op = make_op(data, axes, scale, eps); + + EXPECT_EQ(op->get_output_partial_shape(0), shape_data); +} + +TEST_F(TypePropRMSNormTest, scale_incompatible_shape) { + const auto data = std::make_shared(element::f16, PartialShape{-1, 3, 8, 6}); + const auto axes = std::make_shared(element::i32, PartialShape{1}); + const auto compute_type = element::f32; + { + const auto scale = std::make_shared(element::f16, PartialShape{8}); + OV_EXPECT_THROW(std::ignore = make_op(data, axes, scale, eps, compute_type), + ov::NodeValidationFailure, + HasSubstr("Scale input shape must be broadcastable to the shape of the data input")); + } + { + const auto scale = std::make_shared(element::f16, PartialShape{6, 1}); + OV_EXPECT_THROW(std::ignore = make_op(data, axes, scale, eps, compute_type), + ov::NodeValidationFailure, + HasSubstr("Scale input shape must be broadcastable to the shape of the data input")); + } +} + +} // namespace test +} // namespace ov diff --git a/src/core/tests/visitors/op/rms_norm.cpp b/src/core/tests/visitors/op/rms_norm.cpp new file mode 100644 index 00000000000000..ac0d191d1e6dfb --- /dev/null +++ b/src/core/tests/visitors/op/rms_norm.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/rms_norm.hpp" + +#include + +#include "visitors/visitors.hpp" + +using ov::PartialShape; +using ov::op::v0::Parameter; +using ov::test::NodeBuilder; + +TEST(attributes, rms_norm_v14_attr_comp_type_default) { + using ov::op::v14::RMSNorm; + NodeBuilder::opset().insert(); + + const auto data = std::make_shared(ov::element::f16, PartialShape{2, 3, 8, 6}); + const auto axes = std::make_shared(ov::element::i32, PartialShape{1}); + const auto eps = 1e-5f; + + const auto op = std::make_shared(data, axes, eps); + + NodeBuilder builder(op, {data, axes}); + auto g_op = ov::as_type_ptr(builder.create()); + + EXPECT_EQ(g_op->get_compute_type(), op->get_compute_type()); + EXPECT_EQ(g_op->get_output_element_type(0), op->get_output_element_type(0)); + EXPECT_EQ(g_op->get_output_partial_shape(0), op->get_output_partial_shape(0)); +} + +TEST(attributes, rms_norm_v14_attr_comp_type_custom) { + using ov::op::v14::RMSNorm; + NodeBuilder::opset().insert(); + + const auto data = std::make_shared(ov::element::f16, PartialShape{2, 3, 8, 6}); + const auto axes = std::make_shared(ov::element::i32, PartialShape{1}); + const auto eps = 1e-5f; + const auto compute_type = ov::element::f32; + + const auto op = std::make_shared(data, axes, eps, compute_type); + + NodeBuilder builder(op, {data, axes}); + auto g_op = ov::as_type_ptr(builder.create()); + + EXPECT_EQ(g_op->get_compute_type(), op->get_compute_type()); + EXPECT_EQ(g_op->get_output_element_type(0), op->get_output_element_type(0)); + EXPECT_EQ(g_op->get_output_partial_shape(0), op->get_output_partial_shape(0)); +} diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp index c27533ac522073..02c4b72d088f9c 100644 --- a/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp +++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp @@ -52,7 +52,7 @@ class NodeContext : public frontend::NodeContext { // TODO: int due to base class uses it, but naturally it should be size_t for PT Output get_input(int index) const override { size_t index_ = static_cast(index); - FRONT_END_GENERAL_CHECK(!input_is_none(index_), "Input doesn't exist with index: ", index); + FRONT_END_GENERAL_CHECK(!m_decoder->input_is_none(index_), "Input doesn't exist with index: ", index); auto input = m_decoder_inputs.at(index); if (input == 0) { // Case when input can be inlined (possible only for fx decoder) @@ -97,9 +97,7 @@ class NodeContext : public frontend::NodeContext { return m_decoder->get_input_type(index); } - bool input_is_none(size_t index) const { - return index >= m_inputs_is_none.size() || m_inputs_is_none.at(index); - } + bool input_is_none(size_t index) const; Any get_output_type(size_t index) const { return m_decoder->get_output_type(index); diff --git a/src/frontends/pytorch/src/node_context.cpp b/src/frontends/pytorch/src/node_context.cpp index 245cf9e2d27544..ff66b31ec7d6c3 100644 --- a/src/frontends/pytorch/src/node_context.cpp +++ b/src/frontends/pytorch/src/node_context.cpp @@ -147,6 +147,16 @@ std::shared_ptr NodeContext::convert_subgraph(size_t index) const { return model; } +bool NodeContext::input_is_none(size_t index) const { + bool res = index >= m_inputs_is_none.size() || m_inputs_is_none.at(index); + if (!res) { + // check case when input is from outside body + auto input = get_input_from_visible_context(index); + res = is_none_node(input); + } + return res; +} + namespace { std::shared_ptr get_constant_at_input(const NodeContext& ctx, size_t index, bool allow_empty = true) { FRONT_END_GENERAL_CHECK(!ctx.input_is_none(index), "Input with index: ", index, " is none."); diff --git a/src/frontends/pytorch/src/op/if.cpp b/src/frontends/pytorch/src/op/if.cpp index 2a18e7768f31f1..4f8b803a194fda 100644 --- a/src/frontends/pytorch/src/op/if.cpp +++ b/src/frontends/pytorch/src/op/if.cpp @@ -2,8 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/if.hpp" + #include "openvino/frontend/pytorch/node_context.hpp" -#include "openvino/opsets/opset10.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/result.hpp" #include "openvino/util/log.hpp" #include "translate_session.hpp" #include "utils.hpp" @@ -13,12 +16,12 @@ namespace frontend { namespace pytorch { namespace op { +using namespace ov::op; + namespace { // TODO: Ticket 106627. This is a WA and will work only if both branches of if will eventually go to the operation that // will have same output type for both types -void align_result_types(const NodeContext& context, - std::shared_ptr r1, - std::shared_ptr r2) { +void align_result_types(const NodeContext& context, std::shared_ptr r1, std::shared_ptr r2) { auto r1_tensor = r1->input_value(0); auto r2_tensor = r2->input_value(0); auto r1_type = r1_tensor.get_element_type(); @@ -28,19 +31,19 @@ void align_result_types(const NodeContext& context, element::Type merged_type; if (element::Type::merge(merged_type, r1_type, r2_type)) { if (r1_type != merged_type) { - auto convert1 = std::make_shared(r1_tensor, merged_type); + auto convert1 = std::make_shared(r1_tensor, merged_type); r1->set_argument(0, convert1); } if (r2_type != merged_type) { - auto convert2 = std::make_shared(r2_tensor, merged_type); + auto convert2 = std::make_shared(r2_tensor, merged_type); r2->set_argument(0, convert2); } } else { if (r1_type.bitwidth() >= r2_type.bitwidth()) { - auto convert = std::make_shared(r2_tensor, r1_type); + auto convert = std::make_shared(r2_tensor, r1_type); r2->set_argument(0, convert); } else { - auto convert = std::make_shared(r1_tensor, r2_type); + auto convert = std::make_shared(r1_tensor, r2_type); r1->set_argument(0, convert); } } @@ -48,7 +51,7 @@ void align_result_types(const NodeContext& context, } // namespace OutputVector translate_if(const NodeContext& context) { - auto if_node = std::make_shared(context.get_input(0)); + auto if_node = std::make_shared(context.get_input(0)); context.mark_node(if_node); auto decoder = context.get_decoder(); PYTORCH_OP_CONVERSION_CHECK(decoder->get_subgraph_size() == 2, "If must have 2 subgraphs."); @@ -100,8 +103,8 @@ OutputVector translate_if(const NodeContext& context) { res.push_back(if_node->set_output(then_results[i], else_results[i])); } // Each body can have mutated outputs that are not included into pytorch node outputs. - std::map> extra_then_body_results; - std::map> extra_else_body_results; + std::map> extra_then_body_results; + std::map> extra_else_body_results; std::set extra_output_idxs; for (size_t i = num_outs; i < then_results.size(); i++) { const auto result = then_results[i]; @@ -134,28 +137,43 @@ OutputVector translate_if(const NodeContext& context) { for (const auto& output_idx : extra_output_idxs) { if (!extra_then_body_results.count(output_idx)) { // Need to add Parameter->Result construction in then body - auto new_parameter = std::make_shared(element::dynamic, PartialShape::dynamic()); - session->encode_tensor_name(new_parameter->output(0), output_idx); - auto new_result = std::make_shared(new_parameter); - then_body->add_parameters({new_parameter}); + std::shared_ptr new_parameter; + if (inputs_map.count(output_idx) && inputs_map[output_idx][0]) { + // parameter already exist in inputs + new_parameter = inputs_map[output_idx][0]; + } else { + new_parameter = std::make_shared(element::dynamic, PartialShape::dynamic()); + session->encode_tensor_name(new_parameter->output(0), output_idx); + then_body->add_parameters({new_parameter}); + PYTORCH_OP_CONVERSION_CHECK(inputs_map.count(output_idx), + "Input must exist in then body: ", + output_idx); + inputs_map[output_idx][0] = new_parameter; + } + auto new_result = std::make_shared(new_parameter); then_body->add_results({new_result}); then_body->validate_nodes_and_infer_types(); - PYTORCH_OP_CONVERSION_CHECK(inputs_map.count(output_idx), "Input must exist in else body: ", output_idx); - inputs_map[output_idx][0] = new_parameter; extra_then_body_results[output_idx] = new_result; OPENVINO_DEBUG << "Modified then body: " << if_node << '\n'; } else if (!extra_else_body_results.count(output_idx)) { // Need to add Parameter->Result construction in else body - auto new_parameter = std::make_shared(element::dynamic, PartialShape::dynamic()); - session->encode_tensor_name(new_parameter->output(0), output_idx); - auto new_result = std::make_shared(new_parameter); - else_body->add_parameters({new_parameter}); + std::shared_ptr new_parameter; + if (inputs_map.count(output_idx) && inputs_map[output_idx][1]) { + // parameter already exist in inputs + new_parameter = inputs_map[output_idx][1]; + } else { + new_parameter = std::make_shared(element::dynamic, PartialShape::dynamic()); + session->encode_tensor_name(new_parameter->output(0), output_idx); + else_body->add_parameters({new_parameter}); + PYTORCH_OP_CONVERSION_CHECK(inputs_map.count(output_idx), + "Input must exist in then body: ", + output_idx); + inputs_map[output_idx][1] = new_parameter; + } + auto new_result = std::make_shared(new_parameter); else_body->add_results({new_result}); else_body->validate_nodes_and_infer_types(); - PYTORCH_OP_CONVERSION_CHECK(inputs_map.count(output_idx), "Input must exist in then body: ", output_idx); - inputs_map[output_idx][1] = new_parameter; extra_else_body_results[output_idx] = new_result; - OPENVINO_DEBUG << "Modified else body: " << if_node << '\n'; } } // Create prim::If inputs and outputs diff --git a/src/frontends/pytorch/src/translate_session.cpp b/src/frontends/pytorch/src/translate_session.cpp index 6751fe5a80a50c..96484d4726f62b 100644 --- a/src/frontends/pytorch/src/translate_session.cpp +++ b/src/frontends/pytorch/src/translate_session.cpp @@ -80,6 +80,7 @@ std::shared_ptr TranslateSession::convert_pytorch_model( auto parameters = std::make_shared(); auto tensor_map = std::make_shared(); // tensor map of the current context auto mutated_tensors = std::make_shared>(); + std::vector inserted_params; if (input_model) { // When we have input model we should use its inputs order to create Parameters @@ -143,6 +144,7 @@ std::shared_ptr TranslateSession::convert_pytorch_model( // set name of parameter to the index of node in the model encode_tensor_name(parameter->output(0), input); parameters->push_back(parameter); + inserted_params.push_back(input); } } auto context = NodeContext(node, external_tensor_map, tensor_map, parameters, mutated_tensors, this); @@ -260,6 +262,19 @@ std::shared_ptr TranslateSession::convert_pytorch_model( OPENVINO_DEBUG << "Mutated tensor with id " << tensor_id << " doesn't exist in inputs, skipping."; } } + if (!external_tensor_map.empty()) { + // for internal bodies we want to remove all extra inputs that were created, but not used + parameters->erase(std::remove_if(parameters->begin(), + parameters->end(), + [&](std::shared_ptr p) { + auto tensor_id = decode_tensor_name(p); + return p->output(0).get_target_inputs().empty() && + std::find(inserted_params.begin(), + inserted_params.end(), + tensor_id) != inserted_params.end(); + }), + parameters->end()); + } resulting_model = std::make_shared(results, *parameters); // Did a conversion in a nested scope to automatically remove any holders of nodes except those in the graph } diff --git a/src/frontends/tensorflow/docs/supported_ops.md b/src/frontends/tensorflow/docs/supported_ops.md index f206839f3aacbb..a8bb88e1ab601a 100644 --- a/src/frontends/tensorflow/docs/supported_ops.md +++ b/src/frontends/tensorflow/docs/supported_ops.md @@ -629,7 +629,7 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | MatrixDiagV2 | NO | | | MatrixDiagV3 | NO | | | MatrixExponential | NO | | -| MatrixInverse | NO | | +| MatrixInverse | YES | | | MatrixLogarithm | NO | | | MatrixSetDiag | NO | | | MatrixSetDiagV2 | NO | | @@ -1227,7 +1227,7 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | StringSplitV2NEW | YES | openvino-tokenizers required | | StringStrip | NO | | | StringToHashBucket | NO | | -| StringToHashBucketFast | NO | | +| StringToHashBucketFast | YES | openvino-tokenizers required | | StringToHashBucketStrong | NO | | | StringToNumber | NO | | | StringUpper | NO | | diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index e2abb859e3bc75..7a9e4f9acd3d63 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -302,6 +302,7 @@ const std::map get_supported_ops() { {"MatMul", CreatorFunction(translate_mat_mul_op)}, {"MatrixBandPart", CreatorFunction(translate_matrix_band_part_op)}, {"MatrixDiag", CreatorFunction(translate_matrix_diag_op)}, + {"MatrixInverse", CreatorFunction(translate_matrix_inverse_op)}, {"MaxPool", CreatorFunction(translate_max_pool_op)}, {"MaxPoolV2", CreatorFunction(translate_max_pool_op)}, {"MaxPool3D", CreatorFunction(translate_max_pool_op)}, @@ -461,7 +462,12 @@ const std::map get_supported_ops() { }; const std::vector get_supported_ops_via_tokenizers() { - return {"RaggedTensorToSparse", "RaggedTensorToTensor", "StaticRegexReplace", "StringLower", "StringSplitV2"}; + return {"RaggedTensorToSparse", + "RaggedTensorToTensor", + "StaticRegexReplace", + "StringLower", + "StringSplitV2", + "StringToHashBucketFast"}; } } // namespace op } // namespace tensorflow diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index af59b862c89234..df62808b3cdacf 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -106,6 +106,7 @@ OP_CONVERTER(translate_lrn_op); OP_CONVERTER(translate_mat_mul_op); OP_CONVERTER(translate_matrix_diag_op); OP_CONVERTER(translate_matrix_band_part_op); +OP_CONVERTER(translate_matrix_inverse_op); OP_CONVERTER(translate_max_pool_op); OP_CONVERTER_NAMED(translate_max_pool_with_argmax); OP_CONVERTER(translate_mirror_pad_op); diff --git a/src/frontends/tensorflow_common/src/op/matrix_inverse.cpp b/src/frontends/tensorflow_common/src/op/matrix_inverse.cpp new file mode 100644 index 00000000000000..0e7d6e15948fa7 --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/matrix_inverse.cpp @@ -0,0 +1,35 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/op/inverse.hpp" + +using namespace std; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { +OutputVector translate_matrix_inverse_op(const NodeContext& node) { + default_op_checks(node, 1, {"MatrixInverse"}); + // retrieve the input tensor + auto input = node.get_input(0); + + // handle optional 'adjoint' attribute (default is false) + bool adjoint = node.get_attribute("adjoint", false); + TENSORFLOW_OP_VALIDATION( + node, + !adjoint, + "[TensorFlow Frontend] internal error: MatrixInverse is supported only for adjoint equal to false"); + + auto inverse_op = make_shared(input, adjoint); + set_node_name(node.get_name(), inverse_op); + + return {inverse_op}; +} +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 7e9ccc2fbdac14..4ceffafc8b5c30 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -893,6 +893,82 @@ std::set> jit_subtract_emitter::get_supported_precisi return {{element::f32, element::f32}}; } +/// SWISH /// +jit_swish_emitter::jit_swish_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node) + : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) { + prepare_table(); + sigmoid_emitter = std::make_unique(h, host_isa, node); +} + +jit_swish_emitter::jit_swish_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const float beta, + const ov::element::Type exec_prc) + : jit_emitter(host, host_isa, exec_prc), beta(beta) { + prepare_table(); + sigmoid_emitter = std::make_unique(h, host_isa, exec_prc); +} + +size_t jit_swish_emitter::get_inputs_count() const {return 1; } + +size_t jit_swish_emitter::get_aux_vecs_count() const { + return sigmoid_emitter->get_aux_vecs_count() + 2; +} + +size_t jit_swish_emitter::get_aux_gprs_count() const { + return sigmoid_emitter->get_aux_gprs_count() + 1; +} + +void jit_swish_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel"); + } +} + +template +void jit_swish_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string()); + + using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; + const TReg vmm_src(in_vec_idxs[0]); + const TReg vmm_dst(out_vec_idxs[0]); + const TReg vmm_orig_src(aux_vec_idxs[sigmoid_emitter->get_aux_vecs_count()]); + const TReg vmm_aux(aux_vec_idxs[sigmoid_emitter->get_aux_vecs_count() + 1]); + + h->mov(vmm_orig_src.b16, vmm_src.b16); + + // x*beta + h->ld1r(vmm_aux.s, table_val2("beta")); + h->fmul(vmm_aux.s, vmm_aux.s, vmm_src.s); + + // sigmoid(x*beta) + sigmoid_emitter->emit_code( + { vmm_aux.getIdx() }, + out_vec_idxs, + aux_vec_idxs, + aux_gpr_idxs); + + // x*sigmoid(x*beta) + h->fmul(vmm_dst.s, vmm_dst.s, vmm_orig_src.s); +} + +void jit_swish_emitter::register_table_entries() { + push_arg_entry_of("beta", dnnl::impl::float2int(beta), true); +} + +void jit_swish_emitter::emit_data() const { + jit_emitter::emit_data(); + sigmoid_emitter->emit_data(); +} + +std::set> jit_swish_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + /// TANH /// jit_tanh_emitter::jit_tanh_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index a30ba3e432ee36..af5dbdf4c0a910 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -361,6 +361,39 @@ class jit_subtract_emitter : public jit_emitter { void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; +class jit_swish_emitter : public jit_emitter { +public: + jit_swish_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const float beta, + const ov::element::Type exec_prc = ov::element::f32); + + jit_swish_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node); + + size_t get_inputs_count() const override; + + size_t get_aux_vecs_count() const override; + + size_t get_aux_gprs_count() const override; + + void register_table_entries() override; + + void emit_data() const override; + + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); + +private: + std::unique_ptr sigmoid_emitter; + + float beta; + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; + + template + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; +}; + class jit_tanh_emitter : public jit_emitter { public: jit_tanh_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp index 178a461181c60f..7476062b13e4d5 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp @@ -32,6 +32,7 @@ bool JitEltwiseExecutor::isSupported( Algorithm::EltwiseSelect, Algorithm::EltwiseSigmoid, Algorithm::EltwiseSubtract, + Algorithm::EltwiseSwish, Algorithm::EltwiseTanh); if (!is_supported) { return false; diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index e23a5b13f7f9a0..86884efaa40a10 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -575,6 +575,16 @@ struct EltwiseEmitter { } }; +template<> +struct EltwiseEmitter { + void operator()(EltwiseEmitterContext& ctx) { + ctx.emitter = std::make_shared(ctx.host, + ctx.host_isa, + ctx.opData.alpha, + ctx.exec_prc); + } +}; + template<> struct EltwiseEmitter { void operator()(EltwiseEmitterContext& ctx) { @@ -623,6 +633,7 @@ std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitte OV_CASE(Algorithm::EltwiseSelect, ov::intel_cpu::aarch64::jit_select_emitter), OV_CASE(Algorithm::EltwiseSigmoid, ov::intel_cpu::aarch64::jit_sigmoid_emitter), OV_CASE(Algorithm::EltwiseSubtract, ov::intel_cpu::aarch64::jit_subtract_emitter), + OV_CASE(Algorithm::EltwiseSwish, ov::intel_cpu::aarch64::jit_swish_emitter), OV_CASE(Algorithm::EltwiseTanh, ov::intel_cpu::aarch64::jit_tanh_emitter)); if (!ctx.emitter) @@ -782,6 +793,7 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwiseSelect, jit_select_emitter), OV_CASE(Algorithm::EltwiseSigmoid, jit_sigmoid_emitter), OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter), + OV_CASE(Algorithm::EltwiseSwish, jit_swish_emitter), OV_CASE(Algorithm::EltwiseTanh, jit_tanh_emitter)); if (precisions.empty()) OPENVINO_THROW("Unsupported operation type for Eltwise emitter"); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp index 6723c9a71bc355..fea311915cbad3 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp @@ -178,7 +178,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, jit_generator { // 14 | aux // 15 | aux // 16 | aux - // 17 | src + // 17 | aux // 18 | src // 19 | src // 20 | src @@ -194,18 +194,18 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, jit_generator { if (idx > MAX_ELTWISE_INPUTS) { OPENVINO_THROW("source vector register " + std::to_string(idx) + " is not supported"); } - return TReg(17 + idx); + return TReg(18 + idx); } inline SReg get_scl_reg(const uint32_t idx) { if (idx > MAX_ELTWISE_INPUTS) { OPENVINO_THROW("source scalar register " + std::to_string(idx) + " is not supported"); } - return SReg(17 + idx); + return SReg(18 + idx); } inline TReg get_aux_vmm(const uint32_t idx) { - if (idx > 6) { + if (idx > 7) { OPENVINO_THROW("aux vector register " + std::to_string(idx) + " is not supported"); } return TReg(10 + idx); diff --git a/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp b/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp index 8f05876ce219b7..b3588f8bffbd47 100644 --- a/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp @@ -90,6 +90,7 @@ #include "reshape_shape_inference.hpp" #include "reverse_sequence_shape_inference.hpp" #include "reverse_shape_inference.hpp" +#include "rms_norm_shape_inference.hpp" #include "rnn_cell_shape_inference.hpp" #include "rnn_sequence_shape_inference.hpp" #include "roi_align_shape_inference.hpp" @@ -399,6 +400,7 @@ using IStaticShapeInferFactory = template <> const IStaticShapeInferFactory::TRegistry IStaticShapeInferFactory::registry{ // opset14 + _OV_OP_SHAPE_INFER_MASK_REG(op::v14::RMSNorm, ShapeInferTA, util::bit::mask(1)), _OV_OP_SHAPE_INFER_MASK_REG(opset14::Inverse, ShapeInferTA, util::bit::mask()), // opset13 _OV_OP_SHAPE_INFER_MASK_REG(opset13::Multinomial, ShapeInferTA, util::bit::mask(1)), diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp index 1e8f3b5d54e485..797fc117b46201 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp @@ -157,6 +157,7 @@ std::string ActivationLayerCPUTest::getPrimitiveType(const utils::ActivationType (activation_type == utils::ActivationTypes::Exp) || (activation_type == utils::ActivationTypes::Relu) || (activation_type == utils::ActivationTypes::Sigmoid) || + (activation_type == utils::ActivationTypes::Swish) || (activation_type == utils::ActivationTypes::Tanh))) { return "jit"; } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/rms_norm_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/rms_norm_shape_inference_test.cpp new file mode 100644 index 00000000000000..cb3f346ec98c6f --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/rms_norm_shape_inference_test.cpp @@ -0,0 +1,137 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_assertions.hpp" +#include "utils.hpp" + +using namespace ov; +using namespace ov::intel_cpu; +using ov::op::v0::Constant; +using ov::op::v0::Parameter; +using testing::HasSubstr; + +TEST(StaticShapeInferenceTest, RMSNormStaticShapeInferenceTestDefaultCtor) { + const auto op = std::make_shared(); + const auto data = std::make_shared(element::f16, PartialShape::dynamic()); + const auto axes = std::make_shared(element::i64, PartialShape::dynamic()); + const auto scale = std::make_shared(element::f16, PartialShape::dynamic()); + + op->set_arguments(ov::OutputVector{data, axes, scale}); + + std::vector static_input_shapes = {StaticShape{2, 3, 8, 6}, StaticShape{1}, StaticShape{1}}; + int32_t axis_val = -1; + const auto const_data = std::unordered_map{{1, {element::i32, Shape{1}, &axis_val}}}; + const auto static_output_shapes = shape_inference(op.get(), static_input_shapes, const_data); + EXPECT_EQ(static_output_shapes[0], StaticShape({2, 3, 8, 6})); +} + +TEST(StaticShapeInferenceTest, RMSNormStaticShapeInferenceTest2ins) { + const auto data = std::make_shared(element::f32, PartialShape::dynamic()); + const auto axes = std::make_shared(element::i32, PartialShape::dynamic()); + const auto eps = 1e-5f; + + const auto op = std::make_shared(data, axes, eps); + + std::vector static_input_shapes = {StaticShape{2, 3, 8, 6}, StaticShape{1}}; + int32_t axis_val = -1; + const auto const_data = std::unordered_map{{1, {element::i32, Shape{1}, &axis_val}}}; + const auto static_output_shapes = shape_inference(op.get(), static_input_shapes, const_data); + EXPECT_EQ(static_output_shapes[0], StaticShape({2, 3, 8, 6})); +} + +TEST(StaticShapeInferenceTest, RMSNormStaticShapeInferenceTest3ins) { + const auto data = std::make_shared(element::f32, PartialShape::dynamic()); + const auto axes = std::make_shared(element::i32, PartialShape::dynamic()); + const auto scale = std::make_shared(element::f32, PartialShape::dynamic()); + const auto eps = 1e-5f; + + const auto op = std::make_shared(data, axes, scale, eps); + + std::vector static_input_shapes = {StaticShape{2, 3, 8, 6}, StaticShape{1}, StaticShape{1}}; + int32_t axis_val = -1; + const auto const_data = std::unordered_map{{1, {element::i32, Shape{1}, &axis_val}}}; + const auto static_output_shapes = shape_inference(op.get(), static_input_shapes, const_data); + EXPECT_EQ(static_output_shapes[0], StaticShape({2, 3, 8, 6})); +} + +TEST(StaticShapeInferenceTest, RMSNormIncorrectAxisValParam) { + const auto data = std::make_shared(element::f32, PartialShape::dynamic()); + const auto axes = std::make_shared(element::i32, PartialShape::dynamic()); + const auto eps = 1e-5f; + + const auto op = std::make_shared(data, axes, eps); + + std::vector static_input_shapes = {StaticShape{2, 3, 8, 6}, StaticShape{1}}; + int32_t axis_val = 5; + const auto const_data = std::unordered_map{{1, {element::i32, Shape{1}, &axis_val}}}; + + OV_EXPECT_THROW(shape_inference(op.get(), static_input_shapes, const_data), + NodeValidationFailure, + HasSubstr("Parameter axis 5 out of the tensor rank range [-4, 3]")); +} + +TEST(StaticShapeInferenceTest, RMSNormIncorrectAxisValConst) { + const auto data = std::make_shared(element::f32, PartialShape::dynamic()); + const auto axes = std::make_shared(element::i32, Shape{}, 5); + const auto eps = 1e-5f; + + const auto op = std::make_shared(data, axes, eps); + + std::vector static_input_shapes = {StaticShape{2, 3, 8, 6}, StaticShape{}}; + + OV_EXPECT_THROW(shape_inference(op.get(), static_input_shapes), + NodeValidationFailure, + HasSubstr("Parameter axis 5 out of the tensor rank range [-4, 3]")); +} + +TEST(StaticShapeInferenceTest, RMSNormIncorrectAxisShapeDim) { + const auto data = std::make_shared(element::f32, PartialShape::dynamic()); + const auto axes = std::make_shared(element::i32, PartialShape::dynamic()); + const auto eps = 1e-5f; + + const auto op = std::make_shared(data, axes, eps); + + std::vector static_input_shapes = {StaticShape{2, 3, 8, 6}, StaticShape{5}}; + int32_t axis_val = 5; + const auto const_data = std::unordered_map{{1, {element::i32, Shape{1}, &axis_val}}}; + + OV_EXPECT_THROW(shape_inference(op.get(), static_input_shapes, const_data), + NodeValidationFailure, + HasSubstr("Number of the axes can't be higher than the rank of the data shape")); +} + +TEST(StaticShapeInferenceTest, RMSNormIncorrectAxisShapeRank) { + const auto data = std::make_shared(element::f32, PartialShape::dynamic()); + const auto axes = std::make_shared(element::i32, PartialShape::dynamic()); + const auto eps = 1e-5f; + + const auto op = std::make_shared(data, axes, eps); + + std::vector static_input_shapes = {StaticShape{2, 3, 8, 6}, StaticShape{1, 5}}; + int32_t axis_val = 5; + const auto const_data = std::unordered_map{{1, {element::i32, Shape{1}, &axis_val}}}; + + OV_EXPECT_THROW(shape_inference(op.get(), static_input_shapes, const_data), + NodeValidationFailure, + HasSubstr("Axes input must be a scalar or 1D input. Got: {1,5}")); +} + +TEST(StaticShapeInferenceTest, RMSNormIncorrectScaleShape) { + const auto data = std::make_shared(element::f32, PartialShape::dynamic()); + const auto axes = std::make_shared(element::i32, PartialShape::dynamic()); + const auto scale = std::make_shared(element::f32, PartialShape::dynamic()); + const auto eps = 1e-5f; + + const auto op = std::make_shared(data, axes, scale, eps); + + std::vector static_input_shapes = {StaticShape{2, 3, 8, 6}, StaticShape{1}, StaticShape{6, 1}}; + int32_t axis_val = -1; + const auto const_data = std::unordered_map{{1, {element::i32, Shape{1}, &axis_val}}}; + + OV_EXPECT_THROW(shape_inference(op.get(), static_input_shapes, const_data), + NodeValidationFailure, + HasSubstr("Scale input shape must be broadcastable to the shape of the data input")); +} diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp index 80c19bf2f37486..688ab6a72c0c22 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp @@ -95,16 +95,17 @@ inline ov::Shape get_tensor_shape(const ov::PartialShape& pshape) { return res; } -inline ov::Shape predict_shape(const std::string& name, const ov::Shape current_shape, ov::element::Type element_type, cldnn::ShapePredictor& shape_predictor) { - auto prealloc_info = shape_predictor.predict_preallocation_shape(name, current_shape, element_type.bitwidth(), false); +inline ov::Shape predict_shape(const std::string& name, const cldnn::layout layout, cldnn::ShapePredictor& shape_predictor) { + auto prealloc_info = shape_predictor.predict_preallocation_shape(name, layout, false); const auto& preallocation_shape = prealloc_info.second; auto can_preallocate_buffer = prealloc_info.first && - shape_predictor.can_preallocate(cldnn::ceil_div(ov::shape_size(preallocation_shape) * element_type.bitwidth(), 8)); + shape_predictor.can_preallocate(cldnn::ceil_div(ov::shape_size(preallocation_shape) * + ov::element::Type(layout.data_type).bitwidth(), 8)); if (can_preallocate_buffer) { return preallocation_shape; } - return current_shape; + return layout.get_shape(); } /// WA: Force exit. Any opencl api call can be hang after CL_OUT_OF_RESOURCES. diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp index 0931e9310145c4..5cede62fd17e69 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp @@ -80,8 +80,7 @@ struct PerfCounter { class ProgramBuilder final { public: - ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, - bool createTopologyOnly = false, bool partialBuild = false, + ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, bool partialBuild = false, std::shared_ptr task_executor = nullptr, std::shared_ptr compilation_context = nullptr, bool innerProgram = false); @@ -174,10 +173,9 @@ class ProgramBuilder final { void cleanup_build(); // TODO(eunsoo): remove createTopolpgyOnly argument and add another method to create topology from ngraph function - std::shared_ptr build(const std::vector>& ops, - bool createTopologyOnly = false, bool partialBuild = false, bool innerProgram = false); + std::shared_ptr build(const std::vector>& ops, bool partialBuild = false, bool innerProgram = false); - void CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr& op); + void CreateSingleLayerPrimitive(const std::shared_ptr& op); }; void CreateCustomOp(ProgramBuilder& p, const std::shared_ptr& node, CustomLayerPtr customLayer); @@ -189,8 +187,6 @@ void CreateElementwiseOp(ProgramBuilder& p, std::vector coefficients = {}, bool pythondiv = true); -bool IsNodeOnConstPath(const std::shared_ptr& node); - void validate_inputs_count(const std::shared_ptr& op, std::vector possible_inputs_count); inline bool ends_with(const std::string& value, const std::string& suffix) { diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp index 01ae5e1a5b62b0..320e0b40f964fa 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp @@ -44,14 +44,12 @@ struct ShapePredictor { /// ov::intel_gpu::buffers_preallocation_ratio property, it increases buffer size by /// `_buffers_preallocation_ratio` value unconditionally. /// \param id Primitive id. -/// \param current_shape Primitive's shape on current iteration. -/// \param dt_size Primitive's data_type size. +/// \param layout Primitive's layout on current iteration. /// \param can_reuse_buffer Specifies if current memory buffer is enough to store data. /// \return The result of shape size prediction as std::pair, where the first element /// says if shape is successfully predicted and can be preallocated, and the second element is ov::Shape itself. std::pair predict_preallocation_shape(const std::string& id, - const ov::Shape& current_shape, - size_t dt_bitwidth, + const cldnn::layout& layout, bool can_reuse_buffer, int32_t next_iters_prealloc_count = -1); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_runtime_skippable_nodes.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_runtime_skippable_nodes.cpp index da847d5d2504bc..f7f173ca5282e5 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_runtime_skippable_nodes.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_runtime_skippable_nodes.cpp @@ -37,6 +37,8 @@ void mark_runtime_skippable_nodes::run(program& p) { || impl_params->get_input_layout(0).get_partial_shape()[axis] == impl_params->get_input_layout(1).get_partial_shape()[0]) { // May be skipepd node.can_be_optimized(true); + // Set runtime skippable only when the node is set as can_be_optimized finally. + node.set_runtime_skippable(true); GPU_DEBUG_TRACE_DETAIL << "[mark_runtime_skippable_nodes] : " << node.id() << " can_be_optimized" << std::endl; } }); @@ -56,6 +58,8 @@ void mark_runtime_skippable_nodes::run(program& p) { if (node.have_user_with_type() && node.get_users().size() == 1) return; node.can_be_optimized(true); + // Set runtime skippable only when the node is set as can_be_optimized finally. + node.set_runtime_skippable(true); GPU_DEBUG_TRACE_DETAIL << "[mark_runtime_skippable_nodes] : " << node.id() << " can_be_optimized" << std::endl; } }); @@ -94,6 +98,8 @@ void mark_runtime_skippable_nodes::run(program& p) { if (!end.empty() && !is_valid) return; node.can_be_optimized(true); + // Set runtime skippable only when the node is set as can_be_optimized finally. + node.set_runtime_skippable(true); GPU_DEBUG_TRACE_DETAIL << "[mark_runtime_skippable_nodes] : " << node.id() << " can_be_optimized" << std::endl; }); program_helpers::do_for_types(*node, [](broadcast_node& node){ @@ -132,6 +138,8 @@ void mark_runtime_skippable_nodes::run(program& p) { } node.can_be_optimized(true); + // Set runtime skippable only when the node is set as can_be_optimized finally. + node.set_runtime_skippable(true); GPU_DEBUG_TRACE_DETAIL << "[mark_runtime_skippable_nodes] : " << node.id() << " can_be_optimized" << std::endl; } }); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp index 622ce9c05061c6..99a2b3c6f857f9 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp @@ -242,7 +242,10 @@ void prepare_padding::run(program& p) { } auto& input = node.get_dependency(0); - if (node.get_preferred_impl_type() == impl_types::ocl && input.is_type()) { + // WA to add reorder between MVN and Conv because Conv need input data with padding but MVN opt kernel with default format does not support padding. + // TODO: MVN opt kernel should support padding. + if (node.get_preferred_impl_type() == impl_types::ocl && input.is_type() + && format::is_default_format(input.get_output_layout().format)) { // check the allowed format to avoid perf drop by unnecessary reorder addition. auto new_reorder = std::make_shared(node.id() + "_padding_reorder_for_" + input.id(), input.id(), input.get_output_layout()); auto& new_reorder_node = p.get_or_create(new_reorder); p.add_intermediate(new_reorder_node, node, input); diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index d133d92e77b72a..6161d04ed26ab1 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -301,6 +301,10 @@ struct program_node { bool can_be_optimized() const { return optimized; } void can_be_optimized(bool opt) { optimized = opt; } + // check/set if the node is runtime skippable + bool is_runtime_skippable() const { return runtime_skippable; } + void set_runtime_skippable(bool skippable) { runtime_skippable = skippable; } + // check/set if the node's buffer can be shared during the memory pool optimization bool can_share_buffer() const { return share_buffer; } void can_share_buffer(bool share) { share_buffer = share; } @@ -484,6 +488,7 @@ struct program_node { bool constant = false; bool data_flow = false; bool in_shape_of_subgraph = false; + bool runtime_skippable = false; std::set dependant_shape_of_nodes; diff --git a/src/plugins/intel_gpu/src/graph/include/reorder_inst.h b/src/plugins/intel_gpu/src/graph/include/reorder_inst.h index 8e91957c5192dd..9226510cb34520 100644 --- a/src/plugins/intel_gpu/src/graph/include/reorder_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reorder_inst.h @@ -27,6 +27,7 @@ struct typed_program_node : public typed_program_node_base { public: typed_program_node(const std::shared_ptr prim, program& prog) : parent(prim, prog) { support_padding_all(true); + set_runtime_skippable(true); } program_node& mean_nv12() const { return get_dependency(2); } diff --git a/src/plugins/intel_gpu/src/graph/include/reshape_inst.h b/src/plugins/intel_gpu/src/graph/include/reshape_inst.h index fda3975d2e691a..166a1cb5d3b734 100644 --- a/src/plugins/intel_gpu/src/graph/include/reshape_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reshape_inst.h @@ -20,6 +20,7 @@ struct typed_program_node : public typed_program_node_base { using parent = typed_program_node_base; typed_program_node(const std::shared_ptr prim, program& prog) : parent(prim, prog) { support_padding_all(true); + set_runtime_skippable(true); } public: diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 57256345afdc43..6dda7d9147f238 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -103,11 +103,9 @@ bool is_user_cpu(const program_node* user) { return true; } } - // TODO : refactor these as runtime_skippable_nodes - // If the user is dynamic && runtime skippable gather or strided slice, we still need to its parents' completion + // If the user is dynamic and runtime skippable node, we still need to its parents' completion // event even though the user's program_node is can_be_optimized - if (!user->is_dynamic() || (!user->is_type() && !user->is_type() && - !user->is_type() && !user->is_type())) + if (!user->is_dynamic() || (!user->is_runtime_skippable())) return false; } bool is_cpu = user->get_selected_impl() ? user->get_selected_impl()->is_cpu() @@ -496,7 +494,7 @@ event::ptr primitive_inst::realloc_if_needed() { _outputs[0] = variable.get_memory(); // To record shape predictor - auto prealloc_info = sp.predict_preallocation_shape(id(), _impl_params->output_layouts[0].get_shape(), dt_size, true); + auto prealloc_info = sp.predict_preallocation_shape(id(), _impl_params->output_layouts[0], true); return ev; } else if (_outputs[0] && variable.get_memory() && get_network().get_engine().is_the_same_buffer(*_outputs[0], *variable.get_memory())) { GPU_DEBUG_TRACE_DETAIL << id() << " : realloc_if_needed: Reset output mem" << std::endl; @@ -541,8 +539,7 @@ event::ptr primitive_inst::realloc_if_needed() { } // Clear out memory if if was previously reused, but now primitive can't be optimized - if (_node->is_type() || _node->is_type() || _node->is_type() || _node->is_type() || - _node->is_type() || _node->is_type()) { + if (_node->is_runtime_skippable()) { if (can_be_optimized()) { _max_output_layout_count = _deps[0].first->_max_output_layout_count; GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO("can_be_optimized"); @@ -586,7 +583,7 @@ event::ptr primitive_inst::realloc_if_needed() { // If debug config is set, repsect the config most tmp_prealloc_count = -1; } - prealloc_info = sp.predict_preallocation_shape(id(), current_shape, dt_size, can_reuse_buffer, tmp_prealloc_count); + prealloc_info = sp.predict_preallocation_shape(id(), updated_layout, can_reuse_buffer, tmp_prealloc_count); if (prealloc_info.first && sp.can_preallocate(ov::shape_size(prealloc_info.second) * dt_size)) { auto new_layout = updated_layout; diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_gemm_like_fp16.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_gemm_like_fp16.cl index 8a5d0c255438c5..0c662ff1f84c84 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_gemm_like_fp16.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_gemm_like_fp16.cl @@ -110,7 +110,6 @@ KERNEL(convolution_f16)( _result.se = mad( _rowA, sub_group_broadcast( colB, 14 ), _result.se ); \ _result.sf = mad( _rowA, sub_group_broadcast( colB, 15 ), _result.sf ); \ } - typedef CAT( half, FILTER_SIZE_X ) half_t; // Walk DOWN src0 (patch 0, 1, 2, ...) and DOWN src1. // Inner loop loads and FMADs one row (FILTER_SIZE_X) of each input patch // and FILTER_SIZE_X/2 rows of interleaved filter. @@ -158,6 +157,7 @@ KERNEL(convolution_f16)( #elif !defined(INPUT_BUFFER_WIDTH_PADDED) && !defined(INPUT_BUFFER_HEIGHT_PADDED) // TODO: Fixed vload issue in this path. #pragma error + typedef CAT( half, FILTER_SIZE_X ) half_t; half_t blockA00; half* pblockA00 = (half*)(&blockA00); #if (PADDING_SIZE_X == 1) && (INPPUT_PADDING_Y == 1) && (FILTER_SIZE_X == 3) && (FILTER_SIZE_Y == 3) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/softmax_gpu_bf.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/softmax_gpu_bf.cl index c2ff40c796de98..b70fe7a5173acf 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/softmax_gpu_bf.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/softmax_gpu_bf.cl @@ -7,6 +7,22 @@ #include "include/batch_headers/sub_group_block_read.cl" #include "include/batch_headers/sub_group_block_write.cl" +#if IS_DYNAMIC + +#define CALC_POWER(n) ({uint pos = 0; uint i = n; do { i >>= 1; ++pos; } while (i); --pos;}) + +#define BLOCK_READ(ptr, offset) DT_INPUT_BLOCK_READ(ptr, offset) +#define BLOCK_WRITE(ptr, offset, val) DT_OUTPUT_BLOCK_WRITE(ptr, offset, val) +#define BLOCK_TYPE INPUT0_TYPE + +#define OPT_BLOCK_SIZE 8 + +#define BLOCK_READ_OPT(ptr, offset) CAT(DT_INPUT_BLOCK_READ, OPT_BLOCK_SIZE)(ptr, offset) +#define BLOCK_WRITE_OPT(ptr, offset, val) CAT(DT_OUTPUT_BLOCK_WRITE, OPT_BLOCK_SIZE)(ptr, offset, val) +#define BLOCK_TYPE_OPT MAKE_VECTOR_TYPE(INPUT0_TYPE, OPT_BLOCK_SIZE) + +#else + #if SUBGROUP_BLOCK_SIZE == 1 #define BLOCK_READ(ptr, offset) DT_INPUT_BLOCK_READ(ptr, offset) #define BLOCK_WRITE(ptr, offset, val) DT_OUTPUT_BLOCK_WRITE(ptr, offset, val) @@ -17,8 +33,6 @@ #define BLOCK_TYPE MAKE_VECTOR_TYPE(INPUT0_TYPE, SUBGROUP_BLOCK_SIZE) #endif -#if IS_DYNAMIC -#define CALC_POWER(n) ({uint pos = 0; uint i = n; do { i >>= 1; ++pos; } while (i); --pos;}) #endif REQD_SUB_GROUP_SIZE(SUB_GROUP_SIZE) @@ -64,53 +78,76 @@ KERNEL (softmax_gpu_continuous_bfyx)( const uint leftover_idx = data_set_offset + aligned_offset + workers_per_data_set * items_num + in_data_set_idx; INPUT0_TYPE my_chunk[STACK_SIZE]; - INPUT0_TYPE my_maximum = -UNIT_VAL_MAX; INPUT0_TYPE my_sum = UNIT_VAL_ZERO; __local INPUT0_TYPE lg_storage[SLM_SIZE]; // Read inputs and Get maximum value from data set uint input_idx=0; +#if IS_DYNAMIC + if (workers_per_data_set > SUB_GROUP_SIZE) + { + const uint num_iters = items_num - (items_num % OPT_BLOCK_SIZE); + for (; input_idx < num_iters; input_idx += OPT_BLOCK_SIZE) + { + BLOCK_TYPE_OPT vec_tmp = BLOCK_READ_OPT(input, aligned_data_offset + input_idx * get_sub_group_size()); + unroll_for (int j = 0; j < OPT_BLOCK_SIZE; j++) + { + my_chunk[input_idx+j] = vec_tmp[j]; + } + } + + for (; input_idx < items_num; input_idx++) + { + BLOCK_TYPE vec_tmp = BLOCK_READ(input, aligned_data_offset + input_idx * get_sub_group_size()); + my_chunk[input_idx] = vec_tmp; + } + } +#else if (workers_per_data_set > SUB_GROUP_SIZE) { for (; input_idx SUB_GROUP_SIZE) + { + const uint num_iters = items_num - (items_num % OPT_BLOCK_SIZE); + for (; output_idx < num_iters; output_idx += OPT_BLOCK_SIZE) + { + BLOCK_TYPE_OPT vec_tmp; + unroll_for (int j = 0; j < OPT_BLOCK_SIZE; j++) + { + ACTIVATION_TYPE dequantized = my_chunk[output_idx + j] / my_sum; + FUSED_OPS_MAIN; + vec_tmp[j] = FUSED_OPS_RESULT_MAIN; + } + BLOCK_WRITE_OPT(output, aligned_data_offset + output_idx * get_sub_group_size(), vec_tmp); + } + + for (; output_idx SUB_GROUP_SIZE) { for (; output_idx < items_num - (items_num % SUBGROUP_BLOCK_SIZE); output_idx+=SUBGROUP_BLOCK_SIZE) @@ -180,6 +242,7 @@ KERNEL (softmax_gpu_continuous_bfyx)( BLOCK_WRITE(output, aligned_data_offset + output_idx * get_sub_group_size(), vec_tmp); } } +#endif for (; output_idx < items_num; output_idx++) { ACTIVATION_TYPE dequantized = my_chunk[output_idx] / my_sum; @@ -200,6 +263,26 @@ KERNEL (softmax_gpu_continuous_bfyx)( FUSED_OPS_LEFTOVERS; output[leftover_idx] = FUSED_OPS_RESULT_LEFTOVERS; } +#else +#if IS_DYNAMIC + if (workers_per_data_set > SUB_GROUP_SIZE) + { + const uint num_iters = items_num - (items_num % OPT_BLOCK_SIZE); + for (; output_idx < num_iters; output_idx += OPT_BLOCK_SIZE) + { + BLOCK_TYPE_OPT vec_tmp; + unroll_for (int j = 0; j < OPT_BLOCK_SIZE; j++) + vec_tmp[j] = ACTIVATION(my_chunk[output_idx + j] / my_sum, ACTIVATION_PARAMS); + BLOCK_WRITE_OPT(output, aligned_data_offset + output_idx * get_sub_group_size(), vec_tmp); + } + + for (; output_idx < items_num; output_idx++) + { + BLOCK_TYPE vec_tmp; + vec_tmp = ACTIVATION(my_chunk[output_idx] / my_sum, ACTIVATION_PARAMS); + BLOCK_WRITE(output, aligned_data_offset + output_idx * get_sub_group_size(), vec_tmp); + } + } #else if (workers_per_data_set > SUB_GROUP_SIZE) { @@ -215,7 +298,7 @@ KERNEL (softmax_gpu_continuous_bfyx)( BLOCK_WRITE(output, aligned_data_offset + output_idx * get_sub_group_size(), vec_tmp); } } - +#endif for (; output_idx < items_num; output_idx++) { output[aligned_data_offset + get_sub_group_local_id() + output_idx * get_sub_group_size()] = ACTIVATION(my_chunk[output_idx] / my_sum, ACTIVATION_PARAMS); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.cpp index ffaf57c4f6afd2..adba7b625a4455 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.cpp @@ -83,14 +83,20 @@ JitConstants ConvolutionKernelBase::GetJitConstants(const convolution_params& pa } } + return mem_consts; +} + +JitConstants ConvolutionKernelBase::GetJitConstantsWithLoopUnroll(const convolution_params& params, const DispatchData& dispatchData) const { + JitConstants mem_consts = ConvolutionKernelBase::GetJitConstants(params, dispatchData); + std::vector unrollLoopParams{params.filterSize.x, - params.filterSize.y, - (uint32_t)dispatchData.gemmStyle.globalWorkSizeDX, - (uint32_t)dispatchData.gemmStyle.globalWorkSizeDY, - (uint32_t)dispatchData.gemmStyle.globalWorkSizeDZ, - (uint32_t)dispatchData.gemmStyle.subBlockDimM, - (uint32_t)dispatchData.gemmStyle.subBlockDimK, - (uint32_t)dispatchData.gemmStyle.subBlockDimN}; + params.filterSize.y, + (uint32_t)dispatchData.gemmStyle.globalWorkSizeDX, + (uint32_t)dispatchData.gemmStyle.globalWorkSizeDY, + (uint32_t)dispatchData.gemmStyle.globalWorkSizeDZ, + (uint32_t)dispatchData.gemmStyle.subBlockDimM, + (uint32_t)dispatchData.gemmStyle.subBlockDimK, + (uint32_t)dispatchData.gemmStyle.subBlockDimN}; auto loopCount = *std::max_element(unrollLoopParams.begin(), unrollLoopParams.end()); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.h index a2615d8d70dcc2..f8d1b3bf0de956 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.h @@ -36,10 +36,8 @@ class ConvolutionKernelBase : public WeightBiasKernelBase { size_t globalWorkSizeDZ; }; - union { - CLDNNStyle cldnnStyle; - GEMMStyle gemmStyle; - }; + CLDNNStyle cldnnStyle; + GEMMStyle gemmStyle; }; std::string GetAutoTuneOptions(int autoTuneIndex) const; @@ -53,6 +51,7 @@ class ConvolutionKernelBase : public WeightBiasKernelBase { virtual bool NeedPaddedInput() const { return false; } bool Validate(const Params& p) const override; using WeightBiasKernelBase::GetJitConstants; + JitConstants GetJitConstantsWithLoopUnroll(const convolution_params& params, const DispatchData& dispatchData) const; virtual JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const; virtual JitConstants GetFusedPrimitivesJitConstants(const convolution_params& params, const DispatchData& dispatchData) const; virtual DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp index 2233851eaace58..e9d250a3320654 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp @@ -33,7 +33,7 @@ DeviceFeaturesKey ConvolutionKernel_bfyx_Direct_10_10_12::get_required_device_fe JitConstants ConvolutionKernel_bfyx_Direct_10_10_12::GetJitConstants(const convolution_params& cp, const DispatchData& dispatchData) const { - JitConstants jit = Parent::GetJitConstants(cp, dispatchData); + JitConstants jit = Parent::GetJitConstantsWithLoopUnroll(cp, dispatchData); jit.AddConstants({ MakeJitConstant("ALIGNED_OFM", RoundUp(cp.outputs[0].Feature().v / cp.groups, dispatchData.gemmStyle.subBlockDimN) * cp.groups), diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp index 07eeab7b42d086..b18975d4e4de9d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp @@ -44,7 +44,7 @@ std::string ConvolutionKernel_bfyx_GEMMLike::GetKernelName(const convolution_par JitConstants ConvolutionKernel_bfyx_GEMMLike::GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const { - JitConstants jit = Parent::GetJitConstants(params, dispatchData); + JitConstants jit = Parent::GetJitConstantsWithLoopUnroll(params, dispatchData); jit.AddConstants({ MakeJitConstant("ALIGNED_OFM_PER_GROUP", RoundUp(params.outputs[0].Feature().v / params.groups, dispatchData.gemmStyle.subBlockDimN)), @@ -110,8 +110,9 @@ bool ConvolutionKernel_bfyx_GEMMLike::Validate(const Params& p) const { return false; } - // To prevent big sized filter which causes lots of CL build time. - const size_t acceptable_filter_x_size = 64; // This acceptable size was decided by heuristics + // Limit filter_x_size to 32 becasue convolution ref kernel is faster than GEMMLike kernel when filter size is bigger. + // 32 is chosen from filter size of customer model. May need to more measurement to pick optimal value + const size_t acceptable_filter_x_size = 32; if (params.filterSize.x > acceptable_filter_x_size) { return false; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp index 78dc5064565b76..3cb7bd8354ee55 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp @@ -226,7 +226,7 @@ JitConstants ConvolutionKernel_bfyx_os_iyx_osv16::GetJitConstants(const convolut const size_t of_threads_per_batch = RoundUp(of_maps_per_group, sub_group_size); size_t leftovers = of_threads_per_batch - of_maps_per_group; - auto jit = Parent::GetJitConstants(params, dispatchData); + auto jit = Parent::GetJitConstantsWithLoopUnroll(params, dispatchData); if (!params.fused_ops.empty()) { auto input_dt = GetUnitType(params); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp index 089f1799b1ec7b..da1328ca77083c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp @@ -109,7 +109,8 @@ GemmKernelTiledOpt::GemmTuningData GemmKernelTiledOpt::SetTuningParams(const gem bool output_ndim_transposed = (params.output_order.size() > 0 && (params.output_order.back() != (static_cast(params.output_order.size()) - 1))); if ((params.transpose_input0 == 0 /*X_LAST*/) && (params.transpose_input1 == 0 /*X_LAST*/ || params.transpose_input1 == 1 /*Y_LAST*/) && (!params.indirect_input0 && !params.inputs[0].has_dynamic_pad()) - && (!output_ndim_transposed || params.fused_ops.empty())) { + && (!output_ndim_transposed || params.fused_ops.empty()) + && !params.engineInfo.supports_immad) { // - Not supports transposed input0 / transposed input1 for OTHER mode yet // - If output X dim (= N) is transposed, cannot read eltwise as aligned data tuning_data.tile_n_size = 32; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_update_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_update_kernel_ref.cpp index 59c70777f3d599..fb6ef268fc14db 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_update_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_update_kernel_ref.cpp @@ -245,7 +245,16 @@ JitConstants ScatterUpdateKernelRef::GetJitConstants(const scatter_update_params std::vector pitches; const auto& output = params.outputs[0]; if (output.is_dynamic()) { - pitches = GetDynamicPitches(output.GetDims(), params.inputs.size() + GetFusedPrimitiveInputsCount(params)); + size_t tensor_idx = params.inputs.size() + GetFusedPrimitiveInputsCount(params); + for (auto input : params.inputs) { + if (!input.is_dynamic()) + tensor_idx--; + } + for (auto fused_op : params.fused_ops) { + if (!fused_op.output_tensor.is_dynamic()) + tensor_idx--; + } + pitches = GetDynamicPitches(output.GetDims(), tensor_idx); } else { pitches = GetPlanarPitches(output.GetDims()); } diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index bfaea6ea279c04..52fa7f03e14da0 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -2,28 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "openvino/pass/serialize.hpp" #include "openvino/runtime/iplugin.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" #include "openvino/runtime/internal_properties.hpp" -#include "openvino/util/common_util.hpp" #include "intel_gpu/graph/serialization/binary_buffer.hpp" -#include "intel_gpu/graph/serialization/layout_serializer.hpp" -#include "intel_gpu/graph/serialization/string_serializer.hpp" -#include "intel_gpu/graph/serialization/utils.hpp" -#include "intel_gpu/graph/serialization/vector_serializer.hpp" #include "intel_gpu/runtime/itt.hpp" #include "intel_gpu/plugin/graph.hpp" #include "intel_gpu/plugin/compiled_model.hpp" #include "intel_gpu/plugin/async_infer_request.hpp" -#include -#include #include -#include -#include -#include namespace ov { namespace intel_gpu { diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 852a75f0ef4cbd..cc35d024322538 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -39,7 +39,7 @@ Graph::Graph(std::shared_ptr model, const RemoteContextImpl::Ptr& con : m_context(context) , m_config(config) , m_stream_id(stream_id) { - auto program_builder = std::make_shared(model, get_engine(), config, false, false); + auto program_builder = std::make_shared(model, get_engine(), config, false); m_config = program_builder->get_config(); build(program_builder->get_compiled_program()); diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index ba238e111c70d1..4b7b3748d6e69d 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -31,7 +31,7 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ config.set_property(ov::intel_gpu::max_dynamic_batch(1)); config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer())); - ProgramBuilder prog(internal_body, p.get_engine(), config, false, false, p.get_task_executor(), p.get_compilation_context(), true); + ProgramBuilder prog(internal_body, p.get_engine(), config, false, p.get_task_executor(), p.get_compilation_context(), true); branch.inner_program = prog.get_compiled_program(); auto& input_map = branch.input_map; diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index d280be79adab2d..21314503caa055 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -297,7 +297,7 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr(cloned_model, engine, config, false, true); + program = std::make_shared(cloned_model, engine, config, true); std::pair device_memory_usage = program->get_compiled_program()->get_estimated_device_mem_usage(); if (device_memory_usage.first == static_cast(-1L) && device_memory_usage.second == static_cast(-1L)) { return static_cast(max_batch_size); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 499744f8d0eacc..f28fee32d39df3 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -54,7 +54,7 @@ std::string layer_type_name_ID(const std::shared_ptr& op) { } ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, - bool create_topology_only, bool partial_build, + bool partial_build, std::shared_ptr task_executor, std::shared_ptr compilation_context, bool is_inner_program) @@ -103,7 +103,7 @@ ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& auto ops = model->get_ordered_ops(); - m_program = build(ops, create_topology_only, partial_build, is_inner_program); + m_program = build(ops, partial_build, is_inner_program); } ProgramBuilder::ProgramBuilder(cldnn::engine& engine, const ExecutionConfig& config) @@ -133,8 +133,7 @@ void ProgramBuilder::cleanup_build() { #endif } -std::shared_ptr ProgramBuilder::build(const std::vector>& ops, - bool create_topology_only, bool partial_build, bool is_inner_program) { +std::shared_ptr ProgramBuilder::build(const std::vector>& ops, bool partial_build, bool is_inner_program) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "ProgramBuilder::build"); // In the case of inner program, allow_new_shape_infer flag is setted by outside of program. // So, do not check allow_new_shape_infer for inner program build @@ -157,35 +156,31 @@ std::shared_ptr ProgramBuilder::build(const std::vector& op) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "ProgramBuilder::is_op_supported"); - cldnn::topology topology; try { // Query mode disables checks that input primitives are created, // as is_op_supported method is called for each operation separately @@ -199,7 +194,7 @@ bool ProgramBuilder::is_op_supported(const std::shared_ptr& op) { // reliable results of QueryNetwork call. prepare_build(); allow_new_shape_infer = requires_new_shape_infer(op); - CreateSingleLayerPrimitive(topology, op); + CreateSingleLayerPrimitive(op); cleanup_build(); DisableQueryMode(); } catch (std::exception&) { @@ -211,7 +206,7 @@ bool ProgramBuilder::is_op_supported(const std::shared_ptr& op) { return true; } -void ProgramBuilder::CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr& op) { +void ProgramBuilder::CreateSingleLayerPrimitive(const std::shared_ptr& op) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "ProgramBuilder::CreateSingleLayerPrimitive"); GPU_DEBUG_LOG << "Process " << "op::" << op->get_type_info().version_id << "::" << op->get_type_name() << " operation " << "(friendly_name=" << op->get_friendly_name() << ")" << std::endl; @@ -369,28 +364,6 @@ int64_t ProgramBuilder::get_result_index(const ov::Output& value return m_model->get_result_index(value); } -// TODO: Does it make sense to add such method to ov core? -bool IsNodeOnConstPath(const std::shared_ptr& node) { - std::set> nodes_processed = {}; - std::function&)> is_const_node = [&nodes_processed, &is_const_node](const std::shared_ptr& node) { - if (nodes_processed.count(node)) return true; - nodes_processed.insert(node); - // If input is constant, then drop it from the processing list - if (std::dynamic_pointer_cast(node) != nullptr) - return true; - // If the node doesn't have any parents and it's not a constant, then we deal with dynamic path - if (node->get_input_size() == 0) - return false; - for (size_t i = 0; i < node->get_input_size(); i++) { - auto input_node = node->get_input_node_shared_ptr(i); - if (!is_const_node(input_node)) - return false; - } - return true; - }; - return is_const_node(node); -} - void validate_inputs_count(const std::shared_ptr& op, std::vector valid_inputs_count) { for (auto ic : valid_inputs_count) { if (op->get_input_size() == ic) { diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index dcdfe046558853..dba2809c7b1019 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -6,8 +6,6 @@ #include "openvino/core/preprocess/input_tensor_info.hpp" #include "openvino/core/parallel.hpp" #include "openvino/core/validation_util.hpp" -#include "openvino/op/util/op_types.hpp" -#include "transformations/utils/utils.hpp" #include "intel_gpu/primitives/kv_cache.hpp" #include "intel_gpu/plugin/usm_host_tensor.hpp" @@ -359,7 +357,10 @@ void SyncInferRequest::wait() { if (need_reallocate) { std::string internal_name = m_output_names_map.at(port_idx); - auto actual_memory_shape = predict_shape(internal_name, mem_shape, output_tensor->get_element_type(), *m_shape_predictor); + auto actual_memory_shape = predict_shape(internal_name, cldnn::layout(mem_shape, + output_tensor->get_element_type(), + cldnn::format::get_default_format(mem_shape.size())), + *m_shape_predictor); output_tensor->set_shape(actual_memory_shape); } } @@ -480,7 +481,10 @@ TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrappe auto actual_memory_shape = tensor_shape; if (is_dynamic) { - actual_memory_shape = predict_shape(name, tensor_shape, element_type, *m_shape_predictor); + actual_memory_shape = predict_shape(name, cldnn::layout(tensor_shape, + element_type, + cldnn::format::get_default_format(tensor_shape.size())), + *m_shape_predictor); } return { create_device_tensor(actual_memory_shape, element_type, need_lockable_mem), TensorOwner::PLUGIN }; @@ -714,7 +718,10 @@ std::vector SyncInferRequest::prepare_input(const std::string auto device_tensor = std::dynamic_pointer_cast(device_tensor_wrapper.ptr); if (is_dynamic) { if (device_tensor->get_original_memory()->size() < user_tensor->get_byte_size()) { - auto actual_shape = predict_shape(internal_name, user_tensor->get_shape(), device_tensor_et, *m_shape_predictor); + auto actual_shape = predict_shape(internal_name, cldnn::layout(user_tensor->get_shape(), + element_type, + cldnn::format::get_default_format(user_tensor->get_shape().size())), + *m_shape_predictor); GPU_DEBUG_TRACE_DETAIL << " actual memory shape: " << actual_shape.to_string() << std::endl; auto new_tensor = create_device_tensor(actual_shape, device_tensor_et, false); new_tensor->set_shape(user_tensor->get_shape()); diff --git a/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp b/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp index ebc4d20b3c1844..5ffe32f7a2dfdf 100644 --- a/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp +++ b/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp @@ -5,7 +5,6 @@ #include "intel_gpu/plugin/usm_host_tensor.hpp" #include "intel_gpu/plugin/remote_tensor.hpp" #include "intel_gpu/plugin/remote_context.hpp" -#include "openvino/runtime/make_tensor.hpp" #include namespace ov { diff --git a/src/plugins/intel_gpu/src/plugin/variable_state.cpp b/src/plugins/intel_gpu/src/plugin/variable_state.cpp index 19c8c20016be34..81871bbf8fa2fb 100644 --- a/src/plugins/intel_gpu/src/plugin/variable_state.cpp +++ b/src/plugins/intel_gpu/src/plugin/variable_state.cpp @@ -77,7 +77,7 @@ void VariableState::update_device_buffer() { const auto alloc_type = m_context->get_engine().use_unified_shared_memory() ? cldnn::allocation_type::usm_device : cldnn::allocation_type::cl_mem; const auto current_buf_size = m_layout.get_buffer_size().sizes(); ov::Shape current_shape(current_buf_size.begin(), current_buf_size.end()); - const auto alloc_shape = predict_shape(m_name, current_shape, m_layout.data_type, *m_shape_predictor); + const auto alloc_shape = predict_shape(m_name, cldnn::layout(current_shape, m_layout.data_type, m_layout.format), *m_shape_predictor); const auto alloc_layout = cldnn::layout(alloc_shape, m_layout.data_type, m_layout.format); m_memory = m_context->get_engine().allocate_memory(alloc_layout, alloc_type, false); actual_size = std::max(actual_size, alloc_layout.bytes_count()); diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp index 0e701ebee5661c..4de85d27c06668 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp @@ -128,21 +128,37 @@ allocation_type ocl_engine::detect_usm_allocation_type(const void* memory) const bool ocl_engine::check_allocatable(const layout& layout, allocation_type type) { OPENVINO_ASSERT(supports_allocation(type) || type == allocation_type::cl_mem, "[GPU] Unsupported allocation type: ", type); - OPENVINO_ASSERT(layout.bytes_count() <= get_device_info().max_alloc_mem_size, + bool exceed_allocatable_mem_size = (layout.bytes_count() > get_device_info().max_alloc_mem_size); + + // When dynamic shape upper bound makes bigger buffer, then return false. + if (exceed_allocatable_mem_size && layout.is_dynamic()) { + OPENVINO_ASSERT(layout.has_upper_bound(), "[GPU] Dynamic shape without upper bound tries to allocate"); + return false; + } + + OPENVINO_ASSERT(!exceed_allocatable_mem_size, "[GPU] Exceeded max size of memory object allocation: ", "requested ", layout.bytes_count(), " bytes, " "but max alloc size supported by device is ", get_device_info().max_alloc_mem_size, " bytes.", "Please try to reduce batch size or use lower precision."); auto used_mem = get_used_device_memory(allocation_type::usm_device) + get_used_device_memory(allocation_type::usm_host); + auto exceed_available_mem_size = (layout.bytes_count() + used_mem > get_max_memory_size()); + + // When dynamic shape upper bound makes bigger buffer, then return false. + if (exceed_available_mem_size && layout.is_dynamic()) { + OPENVINO_ASSERT(layout.has_upper_bound(), "[GPU] Dynamic shape without upper bound tries to allocate"); + return false; + } + #ifdef __unix__ // Prevent from being killed by Ooo Killer of Linux - OPENVINO_ASSERT(layout.bytes_count() + used_mem <= get_max_memory_size(), + OPENVINO_ASSERT(!exceed_available_mem_size, "[GPU] Exceeded max size of memory allocation: ", "Required ", layout.bytes_count(), " bytes, already occupied : ", used_mem, " bytes, ", "but available memory size is ", get_max_memory_size(), " bytes"); #else - if (layout.bytes_count() + used_mem > get_max_memory_size()) { + if (exceed_available_mem_size) { GPU_DEBUG_COUT << "[Warning] [GPU] Exceeded max size of memory allocation: " << "Required " << layout.bytes_count() << " bytes, already occupied : " << used_mem << " bytes, but available memory size is " << get_max_memory_size() << " bytes" << std::endl; GPU_DEBUG_COUT << "Please note that performance might drop due to memory swap." << std::endl; diff --git a/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp b/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp index 599878ca5566b1..feb72d1879df7b 100644 --- a/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp +++ b/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp @@ -57,13 +57,15 @@ bool ShapePredictor::can_preallocate(size_t desired_buffer_size) { } std::pair ShapePredictor::predict_preallocation_shape(const std::string& id, - const ov::Shape& current_shape, - size_t dt_bitwidth, + const cldnn::layout& layout, bool can_reuse_buffer, int32_t custom_next_iters_prealloc_count) { size_t next_iters_prealloc_count = custom_next_iters_prealloc_count > 0 ? static_cast(custom_next_iters_prealloc_count) : _next_iters_preallocation_count; + auto current_shape = layout.get_shape(); + auto dt_bitwidth = ov::element::Type(layout.data_type).bitwidth(); + add_shape(id, current_shape); // Save shape information and exit without pre-allocation suggestion if current @@ -124,6 +126,8 @@ std::pair ShapePredictor::predict_preallocation_shape(const std auto new_shape = current_shape + preallocation_shape; return {true, new_shape}; } else if (_buffers_preallocation_ratio > 1.0f) { + if (format::is_blocked(layout.format)) + return {false, {}}; // Apply percentage buffer preallocation auto current_shape_size = ov::shape_size(current_shape); ov::Shape new_shape_size(current_shape.size(), 1); diff --git a/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_huge_input_range.cpp b/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_huge_input_range.cpp index b86ca197523e48..8333fca1168a1b 100644 --- a/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_huge_input_range.cpp +++ b/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_huge_input_range.cpp @@ -190,8 +190,6 @@ class DynamicShapeHugeRangeGPUTest : public testing::WithParamInterface emptyAdditionalConfig; diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/convolution.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/convolution.cpp index 122780afb1d163..871168f02911b6 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/convolution.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/convolution.cpp @@ -19,6 +19,7 @@ typedef std::tuple< ov::element::Type, // Input precision ov::element::Type, // Output precision InputShape, // Input shape + bool, // ReduceSum replacement test std::string // Device name > convLayerTestParamsSet; @@ -30,8 +31,9 @@ class ConvolutionLayerGPUTest : public testing::WithParamInterface kernel, stride, dilation; @@ -47,7 +49,10 @@ class ConvolutionLayerGPUTest : public testing::WithParamInterface{inputShape.second[0].at(-2), inputShape.second[0].at(-1)}) << "_"; + else + result << "K" << ov::test::utils::vec2str(kernel) << "_"; result << "S" << ov::test::utils::vec2str(stride) << "_"; result << "PB" << ov::test::utils::vec2str(padBegin) << "_"; result << "PE" << ov::test::utils::vec2str(padEnd) << "_"; @@ -57,6 +62,7 @@ class ConvolutionLayerGPUTest : public testing::WithParamInterface {}; +TEST_P(shape_predictor_tests_b_fs_yx_fsv16, prediction) { + auto p = GetParam(); + auto& in_shapes = p.in_shapes; + auto& expected_predicted_shape = p.expected_predicted_shape; + auto& engine = get_test_engine(); + + ShapePredictor sp(&engine, p.buffers_preallocation_ratio); + std::pair result; + + for (auto& shape : in_shapes) + result = sp.predict_preallocation_shape("dummy_name", cldnn::layout(shape, + ov::element::f32, + cldnn::format::b_fs_yx_fsv16), + p.can_reuse_buffer); + + ASSERT_TRUE(result.first == !expected_predicted_shape.empty()); + ASSERT_EQ(result.second, expected_predicted_shape); +} + +INSTANTIATE_TEST_SUITE_P(smoke, shape_predictor_tests_b_fs_yx_fsv16, + testing::ValuesIn(std::vector{ + // Preallocation for next N iterations tests + {{{1,1}, {1,1}, {1,1}}, {}, 1.0f, false}, + {{{1,1}, {1,21}, {1,31}}, {}, 1.0f, false}, + {{{1,3}, {1,2}, {1,1}}, {}, 1.0f, false}, + {{{1,1}, {1,2}, {1,3}}, {1,13}, 1.0f, false}, + {{{1,1}, {1,2}, {1,3}}, {1,13}, 1.1f, false}, + {{{1,1,1}, {1,2,2}, {1,3,3}}, {1,13,13}, 1.0f, false}, + {{{1,1,1}, {1,2,2}, {1,3,3}}, {1,13,13}, 1.1f, false}, + {{{1,1,1}, {1,3,2}, {1,7,3}}, {}, 1.0f, false}, + {{{1,1,1}, {1,1,3}, {1,1,5}}, {1,1,25}, 1.0f, false}, + {{{1,1,1}, {1,1,3}, {1,1,5}}, {1,1,25}, 1.1f, false}, + {{{1,1,1}, {1,1,3}, {1,1,5}}, {1,1,25}, 1.0f, false}, + {{{1,1,1}, {1,1,3}, {1,1,5}}, {1,1,25}, 1.1f, false}, + {{{1,1}, {1,1}, {1,1}, {1,1}, {1,1}, {1,1}}, {}, 1.0f, false}, + {{{1,10}, {1,1}, {1,2}, {1,3}}, {1,13}, 1.0f, false}, + {{{1,10}, {1,1}, {1,2}, {1,3}}, {1,13}, 1.1f, false}, + {{{1,3,480,720}, {3,3,480,720}, {5,3,480,720}}, {}, 1.0f, false}, + {{{1,1}, {1,1}, {1,1}}, {}, 1.0f, true}, + {{{1,1}, {1,2}, {1,3}}, {}, 1.0f, true}, + + // Percentage preallocation tests + {{{1,1}, {1,1}, {1,1}}, {}, 1.1f, false}, + {{{1,1}, {1,128}, {1,256}}, {}, 1.1f, false}, + {{{1,3,128}, {1,3,112}, {1,3,418}, {1,3,512}}, {}, 1.1f, false}, + {{{1,1}, {1,1}, {1,1}}, {}, 1.1f, true}, + {{{1,1}, {1,128}, {1,256}}, {}, 1.1f, true}, + {{{1,3,128}, {1,3,112}, {1,3,418}, {1,3,512}}, {}, 1.1f, true}, + })); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index 9593e34e15c9ad..9b766aebd12565 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -1642,7 +1642,7 @@ TEST(convolution_f32_fw_gpu, convolution_big_size_weights) { auto& engine = get_test_engine(); const std::vector filter_size_data = { - 65, 65, + 33, 33, }; const std::vector impl_kernel_data = { @@ -1715,6 +1715,70 @@ TEST(convolution_f32_fw_gpu, convolution_big_size_weights) { } +TEST(convolution_f16_fw_gpu, convolution_big_size_weights) { + auto& engine = get_test_engine(); + + const std::vector filter_size_data = { + 65, 33, + 65, 32, + }; + + const std::vector impl_kernel_data = { + "convolution_gpu_ref__f16", + "convolution_gpu_bfyx_gemm_like__f16", + }; + + for (size_t m = 0 ; m < filter_size_data.size() / 2; m++) { + const int in_y = filter_size_data[m * 2]; + const int in_x = filter_size_data[m * 2 + 1]; + + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 1, in_x, in_y } }); + auto weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 1, in_x, in_y } }); + + tests::random_generator rg(GET_SUITE_NAME); + VVVVF input_rnd = rg.generate_random_4d(1, 1, in_y, in_x, -10, 10); + VF input_rnd_vec = flatten_4d(format::bfyx, input_rnd); + VVVVF filter_rnd = rg.generate_random_4d(1, 1, in_y, in_x, -10, 10); + VF filter_rnd_vec = flatten_4d(format::bfyx, filter_rnd); + + set_values(input, input_rnd_vec); + set_values(weights, filter_rnd_vec); + + topology topology( + input_layout("input", input->get_layout()), + data("weights", weights), + convolution( "conv", input_info("input"), "weights", no_bias, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + + network network(engine, topology, config); + + auto impl_info = network.get_implementation_info("conv"); + ASSERT_EQ(impl_info, impl_kernel_data[m]); + + network.set_input_data("input", input); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "conv"); + + auto output_memory = outputs.at("conv").get_memory(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + + int y_size = output_layout.spatial(1); + int x_size = output_layout.spatial(0); + int f_size = output_layout.feature(); + int b_size = output_layout.batch(); + + ASSERT_EQ(y_size, 1); + ASSERT_EQ(x_size, 1); + ASSERT_EQ(f_size, 1); + ASSERT_EQ(b_size, 1); + } +} + TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) { //Same params as convolution_f32_fw_gpu, basic_convolution but with bfyx optimized data and weights set as input_layout auto& engine = get_test_engine(); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/scatter_update_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/scatter_update_gpu_test.cpp index 90e2cba631cb95..e1d3945d596114 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/scatter_update_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/scatter_update_gpu_test.cpp @@ -1683,6 +1683,88 @@ TEST(scatter_update_gpu_fp32, dynamic) { } } +TEST(scatter_update_gpu_fp32, mixed_input_with_dynamic_static) { + // Dictionary : 1x2x5x2 + // Indexes : 2x1x2x1 + // Updates : 1x2x2x1x2x2 + // Axis : 2 + // Output : 1x2x5x2 + // Input values in fp32 + + auto& engine = get_test_engine(); + + auto input1_layout = layout{ ov::PartialShape::dynamic(4), data_types::f32, format::bfyx }; + auto input2_layout = layout{ ov::PartialShape{2, 1, 2, 1}, data_types::f32, format::bfyx }; + auto input3_layout = layout{ ov::PartialShape::dynamic(6), data_types::f32, format::bfyx }; + + auto input1 = engine.allocate_memory({{1, 2, 5, 2}, data_types::f32, format::bfyx}); // Dictionary + auto input2 = engine.allocate_memory({{2, 1, 2, 1}, data_types::f32, format::bfyx}); // Indices + auto input3 = engine.allocate_memory({{1, 2, 2, 1, 2, 2}, data_types::f32, format::bfwzyx}); // Updates + auto axis = 2; + + set_values(input1, { + 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, + 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f + }); + + set_values(input2, { + 2.f, 0.f, + 3.f, 4.f + }); + + set_values(input3, { + 20.f, 30.f, + 40.f, 50.f, + 60.f, 70.f, + 80.f, 90.f, + 100.f, 110.f, + 120.f, 130.f, + 140.f, 150.f, + 160.f, 170.f + }); + + topology topology; + topology.add(input_layout("InputDictionary", input1_layout)); + topology.add(input_layout("InputText", input2_layout)); + topology.add(input_layout("InputUpdates", input3_layout)); + + topology.add(reorder("DictionaryReordered", input_info("InputDictionary"), format::bfyx, data_types::f32)); + topology.add(reorder("TextReordered", input_info("InputText"), format::bfyx, data_types::f32)); + topology.add(scatter_update("scatter_update", + input_info("DictionaryReordered"), + input_info("TextReordered"), + input_info("InputUpdates"), + axis) + ); + topology.add(reorder("out", input_info("scatter_update"), format::bfyx, data_types::f32)); + + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); + + network.set_input_data("InputDictionary", input1); + network.set_input_data("InputText", input2); + network.set_input_data("InputUpdates", input3); + + auto inst = network.get_primitive("scatter_update"); + auto impl = inst->get_impl(); + ASSERT_TRUE(impl != nullptr); + ASSERT_TRUE(impl->is_dynamic()); + + auto outputs = network.execute(); + auto output = outputs.at("out").get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + + std::vector expected_results = { + 40.f, 50.f, 2.f, 3.f, 20.f, 30.f, 60.f, 70.f, 80.f, 90.f, + 120.f, 130.f, 12.f, 13.f, 100.f, 110.f, 140.f, 150.f, 160.f, 170.f + }; + + for (size_t i = 0; i < expected_results.size(); ++i) { + ASSERT_EQ(expected_results[i], output_ptr[i]); + } +} + TEST(scatter_update_cpu_impl_fp32, dynamic) { // Dictionary : 1x2x5x2 // Indexes : 2x1x2x1 diff --git a/src/plugins/template/tests/functional/op_reference/base_reference_test.cpp b/src/plugins/template/tests/functional/op_reference/base_reference_test.cpp index 72c8634260f66b..6445ad97c464a0 100644 --- a/src/plugins/template/tests/functional/op_reference/base_reference_test.cpp +++ b/src/plugins/template/tests/functional/op_reference/base_reference_test.cpp @@ -103,10 +103,6 @@ void CommonReferenceTest::ValidateBlobs(const ov::Tensor& refBlob, const auto& element_type = refBlob.get_element_type(); if (!legacy_compare) { - double abs_threshold_{abs_threshold}; - if (abs_threshold_ < 0.) - abs_threshold_ = std::numeric_limits::max(); - switch (element_type) { case ov::element::boolean: case ov::element::bf16: @@ -121,11 +117,7 @@ void CommonReferenceTest::ValidateBlobs(const ov::Tensor& refBlob, case ov::element::u16: case ov::element::u32: case ov::element::u64: - ov::test::utils::compare(refBlob, - outBlob, - // actual_comparision_size, - abs_threshold_, - threshold); + ov::test::utils::compare(refBlob, outBlob, abs_threshold, threshold); return; } } diff --git a/src/plugins/template/tests/functional/op_reference/bucketize.cpp b/src/plugins/template/tests/functional/op_reference/bucketize.cpp index 487e20fa286b32..17fef88542783b 100644 --- a/src/plugins/template/tests/functional/op_reference/bucketize.cpp +++ b/src/plugins/template/tests/functional/op_reference/bucketize.cpp @@ -14,29 +14,29 @@ using namespace ov; struct BucketizeParams { template BucketizeParams(const element::Type& input_type, - const PartialShape& input_pshape, + const Shape& input_shape, const std::vector& input, const element::Type& bucket_type, - const PartialShape& bucket_pshape, + const Shape& bucket_shape, const std::vector& buckets, bool with_right_bound, const element::Type& output_type, const std::vector& expected_output) : input_type(input_type), - input_pshape(input_pshape), - input(CreateTensor(input_type, input)), + input_shape(input_shape), + input(CreateTensor(input_shape, input_type, input)), bucket_type(bucket_type), - bucket_pshape(bucket_pshape), - buckets(CreateTensor(bucket_type, buckets)), + bucket_shape(bucket_shape), + buckets(CreateTensor(bucket_shape, bucket_type, buckets)), with_right_bound(with_right_bound), output_type(output_type), - expected_output(CreateTensor(output_type, expected_output)) {} + expected_output(CreateTensor(input_shape, output_type, expected_output)) {} element::Type input_type; - PartialShape input_pshape; + Shape input_shape; ov::Tensor input; element::Type bucket_type; - PartialShape bucket_pshape; + Shape bucket_shape; ov::Tensor buckets; bool with_right_bound; element::Type output_type; @@ -46,12 +46,11 @@ struct BucketizeParams { class ReferenceBucketizeLayerTest : public testing::TestWithParam, public CommonReferenceTest { public: void SetUp() override { - legacy_compare = true; - auto params = GetParam(); + const auto& params = GetParam(); function = CreateFunction(params.input_type, - params.input_pshape, + params.input_shape, params.bucket_type, - params.bucket_pshape, + params.bucket_shape, params.with_right_bound, params.output_type); inputData = {params.input, params.buckets}; @@ -59,12 +58,12 @@ class ReferenceBucketizeLayerTest : public testing::TestWithParam& obj) { - auto param = obj.param; + const auto& param = obj.param; std::ostringstream result; result << "input_type=" << param.input_type << "_"; - result << "input_pshape=" << param.input_pshape << "_"; + result << "input_shape=" << param.input_shape << "_"; result << "bucket_type=" << param.bucket_type << "_"; - result << "bucket_pshape=" << param.bucket_pshape << "_"; + result << "bucket_shape=" << param.bucket_shape << "_"; result << "with_right_bound=" << param.with_right_bound << "_"; result << "output_type=" << param.output_type; return result.str(); @@ -72,13 +71,13 @@ class ReferenceBucketizeLayerTest : public testing::TestWithParam CreateFunction(const element::Type& input_type, - const PartialShape& input_pshape, + const Shape& input_shape, const element::Type& bucket_type, - const PartialShape& bucket_pshape, + const Shape& bucket_shape, const bool with_right_bound, const element::Type& output_type) { - auto data = std::make_shared(input_type, input_pshape); - auto buckets = std::make_shared(bucket_type, bucket_pshape); + auto data = std::make_shared(input_type, input_shape); + auto buckets = std::make_shared(bucket_type, bucket_shape); return std::make_shared( std::make_shared(data, buckets, output_type, with_right_bound), ParameterVector{data, buckets}); @@ -94,20 +93,20 @@ INSTANTIATE_TEST_SUITE_P(smoke_Bucketize_With_Hardcoded_Refs, ::testing::Values( // fp32, int32, with_right_bound BucketizeParams(element::f32, - PartialShape{10, 1}, + Shape{10, 1}, std::vector{8.f, 1.f, 2.f, 1.1f, 8.f, 10.f, 1.f, 10.2f, 0.f, 20.f}, element::i32, - PartialShape{4}, + Shape{4}, std::vector{1, 4, 10, 20}, true, element::i32, std::vector{2, 0, 1, 1, 2, 2, 0, 3, 0, 3}), // fp32, int32, with_right_bound BucketizeParams(element::i32, - PartialShape{1, 1, 10}, + Shape{1, 1, 10}, std::vector{8, 1, 2, 1, 8, 5, 1, 5, 0, 20}, element::i32, - PartialShape{4}, + Shape{4}, std::vector{1, 4, 10, 20}, false, element::i32, diff --git a/src/plugins/template/tests/functional/op_reference/conversion.hpp b/src/plugins/template/tests/functional/op_reference/conversion.hpp index afe549b471ae79..a50200b391d122 100644 --- a/src/plugins/template/tests/functional/op_reference/conversion.hpp +++ b/src/plugins/template/tests/functional/op_reference/conversion.hpp @@ -27,15 +27,13 @@ struct ConvertParams { const ov::element::Type& iType, const ov::element::Type& oType, const std::vector& iValues, - const std::vector& oValues, - size_t iSize = 0, - size_t oSize = 0) + const std::vector& oValues) : conversionType(convType), pshape(shape), inType(iType), outType(oType), - inputData(CreateTensor(iType, iValues, iSize)), - refData(CreateTensor(oType, oValues, oSize)) {} + inputData(CreateTensor(shape.get_shape(), iType, iValues)), + refData(CreateTensor(shape.get_shape(), oType, oValues)) {} ConversionTypes conversionType; ov::PartialShape pshape; ov::element::Type inType; diff --git a/src/plugins/template/tests/functional/op_reference/convert.cpp b/src/plugins/template/tests/functional/op_reference/convert.cpp index c92c6e5340c2c1..69026e88ff1c27 100644 --- a/src/plugins/template/tests/functional/op_reference/convert.cpp +++ b/src/plugins/template/tests/functional/op_reference/convert.cpp @@ -254,15 +254,13 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::f32, std::vector{0xA0}, - std::vector{1.0f, 0.0f, 1.0f, 0.0f}, - 4), + std::vector{1.0f, 0.0f, 1.0f, 0.0f}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{2, 2}, ov::element::u4, ov::element::f32, std::vector{0xBF, 0xA0}, - std::vector{15.0f, 11.0f, 0.0f, 10.0f}, - 4), + std::vector{15.0f, 11.0f, 0.0f, 10.0f}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{2, 2}, ov::element::u8, @@ -292,8 +290,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::f32, std::vector{0xEF, 0x2F}, - std::vector{-1.0f, -2.0f, -1.0f, 2.0f}, - 4), + std::vector{-1.0f, -2.0f, -1.0f, 2.0f}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{2, 2}, ov::element::i8, @@ -402,128 +399,98 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::i4, std::vector{0xA0}, - std::vector{0x01, 0x01}, - 4, - 4), + std::vector{0x01, 0x01}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u4, ov::element::i4, std::vector{0x12, 0x03}, - std::vector{0x12, 0x03}, - 4, - 4), + std::vector{0x12, 0x03}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u8, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u16, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u32, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u64, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i4, ov::element::i4, std::vector{0xFE, 0x03}, - std::vector{0xFE, 0x03}, - 4, - 4), + std::vector{0xFE, 0x03}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i8, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i16, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i64, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::f16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xEF, 0x30}, - 4, - 4), + std::vector{0xEF, 0x30}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::bf16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xEF, 0x30}, - 4, - 4), + std::vector{0xEF, 0x30}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::f32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), // destination i8 ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{8}, ov::element::u1, ov::element::i8, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u4, ov::element::i8, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u8, @@ -553,8 +520,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::i8, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i8, @@ -603,15 +569,13 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::i16, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u4, ov::element::i16, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u8, @@ -641,8 +605,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::i16, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i8, @@ -691,15 +654,13 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::i32, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u4, ov::element::i32, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u8, @@ -729,8 +690,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::i32, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i8, @@ -779,15 +739,13 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::i64, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u4, ov::element::i64, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u8, @@ -817,8 +775,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::i64, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i8, @@ -868,113 +825,85 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u1, std::vector{0xA0}, - std::vector{0xA0}, - 8, - 8), + std::vector{0xA0}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{8}, ov::element::u4, ov::element::u1, std::vector{0x01, 0x10, 0x00, 0x00}, - std::vector{0x90}, - 8, - 8), + std::vector{0x90}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{8}, ov::element::u8, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{8}, ov::element::u16, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{8}, ov::element::u32, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{8}, ov::element::u64, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{8}, ov::element::i4, ov::element::u1, std::vector{0x01, 0x10, 0x00, 0x00}, - std::vector{0x90}, - 8, - 8), + std::vector{0x90}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{8}, ov::element::i8, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{8}, ov::element::i16, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{8}, ov::element::i32, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{8}, ov::element::i64, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{8}, ov::element::f16, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{8}, ov::element::bf16, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{8}, ov::element::f32, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), // destination u4 ConvertParams(ConversionTypes::CONVERT, @@ -982,113 +911,85 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u4, std::vector{0xA0}, - std::vector{0x01, 0x01}, - 4, - 4), + std::vector{0x01, 0x01}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u4, ov::element::u4, std::vector{0x12, 0x03}, - std::vector{0x12, 0x03}, - 4, - 4), + std::vector{0x12, 0x03}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u8, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u16, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u32, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u64, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i4, ov::element::u4, std::vector{0xFE, 0x03}, - std::vector{0xFE, 0x03}, - 4, - 4), + std::vector{0xFE, 0x03}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i8, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i16, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i64, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::f16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xEF, 0x30}, - 4, - 4), + std::vector{0xEF, 0x30}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::bf16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xEF, 0x30}, - 4, - 4), + std::vector{0xEF, 0x30}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::f32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), // destination u8 ConvertParams(ConversionTypes::CONVERT, @@ -1096,15 +997,13 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u8, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u4, ov::element::u8, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u8, @@ -1134,8 +1033,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::u8, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i8, @@ -1185,15 +1083,13 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u16, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u4, ov::element::u16, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u8, @@ -1223,8 +1119,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::u16, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i8, @@ -1274,15 +1169,13 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u32, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u4, ov::element::u32, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u8, @@ -1312,8 +1205,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::u32, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i8, @@ -1362,15 +1254,13 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u64, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u4, ov::element::u64, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::u8, @@ -1400,8 +1290,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::u64, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT, ov::PartialShape{4}, ov::element::i8, diff --git a/src/plugins/template/tests/functional/op_reference/convert_like.cpp b/src/plugins/template/tests/functional/op_reference/convert_like.cpp index 60462c51e9ecf1..43d50daecafa79 100644 --- a/src/plugins/template/tests/functional/op_reference/convert_like.cpp +++ b/src/plugins/template/tests/functional/op_reference/convert_like.cpp @@ -26,22 +26,19 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u2, ov::element::boolean, std::vector{0b10010011}, - std::vector{1, 1, 0, 1}, - 4), + std::vector{1, 1, 0, 1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::boolean, std::vector{0x21, 0x03, 0x00}, - std::vector{0, 1, 0, 1, 0, 0, 0, 1}, - 8), + std::vector{0, 1, 0, 1, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u6, ov::element::boolean, std::vector{0x21, 0x03, 0x00}, - std::vector{1, 1, 0, 1}, - 4), + std::vector{1, 1, 0, 1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 3}, ov::element::u8, @@ -84,22 +81,19 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u2, ov::element::bf16, std::vector{0b10010011}, - std::vector{2.0f, 1.0f, 0.0f, 3.0f}, - 4), + std::vector{2.0f, 1.0f, 0.0f, 3.0f}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::bf16, std::vector{0x21, 0x03, 0x00}, - std::vector{0.0f, 2.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 3.0f}, - 8), + std::vector{0.0f, 2.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 3.0f}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u6, ov::element::bf16, std::vector{0x21, 0x03, 0x00}, - std::vector{2.0f, 1.0f, 0.0f, 3.0f}, - 4), + std::vector{2.0f, 1.0f, 0.0f, 3.0f}), ConvertParams( ConversionTypes::CONVERT_LIKE, ov::PartialShape{1, 1, 3, 5}, @@ -122,22 +116,19 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u2, ov::element::f16, std::vector{0b10010011}, - std::vector{2.0f, 1.0f, 0.0f, 3.0f}, - 4), + std::vector{2.0f, 1.0f, 0.0f, 3.0f}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::f16, std::vector{0x21, 0x03, 0x00}, - std::vector{0.0f, 2.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 3.0f}, - 8), + std::vector{0.0f, 2.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 3.0f}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u6, ov::element::f16, std::vector{0x21, 0x03, 0x00}, - std::vector{2.0f, 1.0f, 0.0f, 3.0f}, - 4), + std::vector{2.0f, 1.0f, 0.0f, 3.0f}), ConvertParams( ConversionTypes::CONVERT_LIKE, ov::PartialShape{1, 1, 3, 5}, @@ -160,36 +151,31 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::f32, std::vector{0xA0}, - std::vector{1.0f, 0.0f, 1.0f, 0.0f}, - 4), + std::vector{1.0f, 0.0f, 1.0f, 0.0f}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u2, ov::element::f32, std::vector{0b10010011}, - std::vector{2.0f, 1.0f, 0.0f, 3.0f}, - 4), + std::vector{2.0f, 1.0f, 0.0f, 3.0f}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::f32, std::vector{0x21, 0x03, 0x00}, - std::vector{0.0f, 2.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 3.0f}, - 8), + std::vector{0.0f, 2.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 3.0f}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 2}, ov::element::u4, ov::element::f32, std::vector{0xBF, 0xA0}, - std::vector{15.0f, 11.0f, 0.0f, 10.0f}, - 4), + std::vector{15.0f, 11.0f, 0.0f, 10.0f}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u6, ov::element::f32, std::vector{0x21, 0x03, 0x00}, - std::vector{2.0f, 1.0f, 0.0f, 3.0f}, - 4), + std::vector{2.0f, 1.0f, 0.0f, 3.0f}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 2}, ov::element::u8, @@ -219,8 +205,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::f32, std::vector{0xEF, 0x2F}, - std::vector{-1.0f, -2.0f, -1.0f, 2.0f}, - 4), + std::vector{-1.0f, -2.0f, -1.0f, 2.0f}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 2}, ov::element::i8, @@ -328,173 +313,134 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::i4, std::vector{0xA0}, - std::vector{0x01, 0x01}, - 4, - 4), + std::vector{0x01, 0x01}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u2, ov::element::i4, std::vector{0b10010011}, - std::vector{0x12, 0x30}, - 4, - 4), + std::vector{0x12, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::i4, std::vector{0x21, 0x03, 0x00}, - std::vector{0x20, 0x10, 0x00, 0x30}, - 8, - 8), + std::vector{0x20, 0x10, 0x00, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u4, ov::element::i4, std::vector{0x12, 0x03}, - std::vector{0x12, 0x03}, - 4, - 4), + std::vector{0x12, 0x03}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u6, ov::element::i4, std::vector{0x21, 0x03, 0x00}, - std::vector{0x12, 0x30}, - 4, - 4), + std::vector{0x12, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u8, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u16, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u32, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u64, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i4, ov::element::i4, std::vector{0xFE, 0x03}, - std::vector{0xFE, 0x03}, - 4, - 4), + std::vector{0xFE, 0x03}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i8, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i16, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i64, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::f16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xEF, 0x30}, - 4, - 4), + std::vector{0xEF, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::bf16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xEF, 0x30}, - 4, - 4), + std::vector{0xEF, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::f32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), // destination i8 ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u1, ov::element::i8, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u2, ov::element::i8, std::vector{0b10010011}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::i8, std::vector{0x21, 0x03, 0x00}, - std::vector{0, 2, 0, 1, 0, 0, 0, 3}, - 8), + std::vector{0, 2, 0, 1, 0, 0, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u4, ov::element::i8, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u6, ov::element::i8, std::vector{0x21, 0x03, 0x00}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u8, @@ -524,8 +470,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::i8, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i8, @@ -574,36 +519,31 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::i16, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u2, ov::element::i16, std::vector{0b10010011}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::i16, std::vector{0x21, 0x03, 0x00}, - std::vector{0, 2, 0, 1, 0, 0, 0, 3}, - 8), + std::vector{0, 2, 0, 1, 0, 0, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u4, ov::element::i16, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u6, ov::element::i16, std::vector{0x21, 0x03, 0x00}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u8, @@ -633,8 +573,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::i16, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i8, @@ -683,36 +622,31 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::i32, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u2, ov::element::i32, std::vector{0b10010011}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::i32, std::vector{0x21, 0x03, 0x00}, - std::vector{0, 2, 0, 1, 0, 0, 0, 3}, - 8), + std::vector{0, 2, 0, 1, 0, 0, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u4, ov::element::i32, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u6, ov::element::i32, std::vector{0x21, 0x03, 0x00}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u8, @@ -742,8 +676,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::i32, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i8, @@ -792,36 +725,31 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::i64, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u2, ov::element::i64, std::vector{0b10010011}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::i64, std::vector{0x21, 0x03, 0x00}, - std::vector{0, 2, 0, 1, 0, 0, 0, 3}, - 8), + std::vector{0, 2, 0, 1, 0, 0, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u4, ov::element::i64, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u6, ov::element::i64, std::vector{0x21, 0x03, 0x00}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u8, @@ -851,8 +779,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::i64, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i8, @@ -902,136 +829,103 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u1, std::vector{0xA0}, - std::vector{0xA0}, - 8, - 8), + std::vector{0xA0}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u2, ov::element::u1, std::vector{0b10010011, 0x00}, - std::vector{0b01010000}, - 8, - 8), + std::vector{0b01010000}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::u1, std::vector{0x21, 0x03, 0x00}, - std::vector{0b00010001}, - 8), + std::vector{0b00010001}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u4, ov::element::u1, std::vector{0x01, 0x10, 0x00, 0x00}, - std::vector{0x90}, - 8, - 8), + std::vector{0x90}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u6, ov::element::u1, std::vector{0x21, 0x03, 0x00, 0x00, 0x00, 0x00}, - std::vector{0b01010000}, - 8, - 8), + std::vector{0b01010000}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u8, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u16, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u32, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u64, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::i4, ov::element::u1, std::vector{0x01, 0x10, 0x00, 0x00}, - std::vector{0x90}, - 8, - 8), + std::vector{0x90}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::i8, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::i16, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::i32, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::i64, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::f16, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::bf16, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::f32, ov::element::u1, std::vector{1, 0, 1, 0, 0, 0, 0, 1}, - std::vector{0xA1}, - 8, - 8), + std::vector{0xA1}), // destination u4 ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1039,137 +933,103 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u4, std::vector{0xA0}, - std::vector{0x01, 0x01}, - 4, - 4), + std::vector{0x01, 0x01}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u2, ov::element::u4, std::vector{0b10010011}, - std::vector{0x12, 0x30}, - 4, - 4), + std::vector{0x12, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::u4, std::vector{0x21, 0x03, 0x00}, - std::vector{0x20, 0x10, 0x00, 0x30}, - 8, - 8), + std::vector{0x20, 0x10, 0x00, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u4, ov::element::u4, std::vector{0x12, 0x03}, - std::vector{0x12, 0x03}, - 4, - 4), + std::vector{0x12, 0x03}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u6, ov::element::u4, std::vector{0x21, 0x03, 0x00}, - std::vector{0x12, 0x30}, - 4, - 4), + std::vector{0x12, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u8, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u16, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u32, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u64, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x21, 0x30}, - 4, - 4), + std::vector{0x21, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i4, ov::element::u4, std::vector{0xFE, 0x03}, - std::vector{0xFE, 0x03}, - 4, - 4), + std::vector{0xFE, 0x03}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i8, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i16, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i64, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::f16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xEF, 0x30}, - 4, - 4), + std::vector{0xEF, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::bf16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xEF, 0x30}, - 4, - 4), + std::vector{0xEF, 0x30}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::f32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xEF, 0x32}, - 4, - 4), + std::vector{0xEF, 0x32}), // destination u8 ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1177,36 +1037,31 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u8, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u2, ov::element::u8, std::vector{0b10010011}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::u8, std::vector{0x21, 0x03, 0x00}, - std::vector{0, 2, 0, 1, 0, 0, 0, 3}, - 8), + std::vector{0, 2, 0, 1, 0, 0, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u4, ov::element::u8, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u6, ov::element::u8, std::vector{0x21, 0x03, 0x00}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u8, @@ -1236,8 +1091,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::u8, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i8, @@ -1287,36 +1141,31 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u16, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u2, ov::element::u16, std::vector{0b10010011}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::u16, std::vector{0x21, 0x03, 0x00}, - std::vector{0, 2, 0, 1, 0, 0, 0, 3}, - 8), + std::vector{0, 2, 0, 1, 0, 0, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u4, ov::element::u16, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u6, ov::element::u16, std::vector{0x21, 0x03, 0x00}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u8, @@ -1346,8 +1195,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::u16, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i8, @@ -1397,36 +1245,31 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u32, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u2, ov::element::u32, std::vector{0b10010011}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::u32, std::vector{0x21, 0x03, 0x00}, - std::vector{0, 2, 0, 1, 0, 0, 0, 3}, - 8), + std::vector{0, 2, 0, 1, 0, 0, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u4, ov::element::u32, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u6, ov::element::u32, std::vector{0x21, 0x03, 0x00}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u8, @@ -1456,8 +1299,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::u32, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i8, @@ -1506,36 +1348,31 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u64, std::vector{0x81}, - std::vector{1, 0, 0, 0, 0, 0, 0, 1}, - 8), + std::vector{1, 0, 0, 0, 0, 0, 0, 1}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u2, ov::element::u64, std::vector{0b10010011}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{8}, ov::element::u3, ov::element::u64, std::vector{0x21, 0x03, 0x00}, - std::vector{0, 2, 0, 1, 0, 0, 0, 3}, - 8), + std::vector{0, 2, 0, 1, 0, 0, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u4, ov::element::u64, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u6, ov::element::u64, std::vector{0x21, 0x03, 0x00}, - std::vector{2, 1, 0, 3}, - 4), + std::vector{2, 1, 0, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::u8, @@ -1565,8 +1402,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i4, ov::element::u64, std::vector{0x12, 0x34}, - std::vector{2, 1, 4, 3}, - 4), + std::vector{2, 1, 4, 3}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{4}, ov::element::i8, @@ -1615,460 +1451,346 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f32, ov::element::nf4, std::vector{-0.6961928009986877f, 0.7229568362236023f, 1.0f, -0.5250730514526367f}, - std::vector{0xE1, 0x2F}, - 4, - 4), + std::vector{0xE1, 0x2F}), // destination u2 ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::boolean, ov::element::u2, std::vector{1, 0, 1, 1, 0, 1, 0, 0}, - std::vector{0b01000101, 0b00010000}, - 8, - 8), + std::vector{0b01000101, 0b00010000}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u1, ov::element::u2, std::vector{0b11001011}, - std::vector{0x50, 0x45}, - 8, - 8), + std::vector{0x50, 0x45}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{1, 8}, ov::element::u3, ov::element::u2, std::vector{0b00100111, 0b00011010, 0b00001000}, - std::vector{0b00100111, 0b00011010}, - 8, - 8), + std::vector{0b00100111, 0b00011010}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u4, ov::element::u2, std::vector{0x21, 0x01, 0x31, 0xff}, - std::vector{0b01100100, 0b01111111}, - 8, - 8), + std::vector{0b01100100, 0b01111111}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{1, 8}, ov::element::u6, ov::element::u2, std::vector{0x02, 0x13, 0x00, 0x81, 0x04, 0b01001011}, - std::vector{0b00100111, 0b00010000}, - 8, - 8), + std::vector{0b00100111, 0b00010000}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u8, ov::element::u2, std::vector{0, 2, 1, 3, 4, 1, 2, 2}, - std::vector{0b00100111, 0b00011010}, - 8, - 8), + std::vector{0b00100111, 0b00011010}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u16, ov::element::u2, std::vector{0, 2, 1, 3, 4, 1, 2, 2}, - std::vector{0b00100111, 0b00011010}, - 8, - 8), + std::vector{0b00100111, 0b00011010}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u32, ov::element::u2, std::vector{0, 2, 1, 3, 4, 1, 2, 2}, - std::vector{0b00100111, 0b00011010}, - 8, - 8), + std::vector{0b00100111, 0b00011010}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u64, ov::element::u2, std::vector{0, 2, 1, 3, 4, 1, 2, 2}, - std::vector{0b00100111, 0b00011010}, - 8, - 8), + std::vector{0b00100111, 0b00011010}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::i4, ov::element::u2, std::vector{0x21, 0x01, 0x31, 0xff}, - std::vector{0b01100100, 0b01111111}, - 8, - 8), + std::vector{0b01100100, 0b01111111}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::i8, ov::element::u2, std::vector{0, 2, 1, 3, 4, 1, 2, 2}, - std::vector{0b00100111, 0b00011010}, - 8, - 8), + std::vector{0b00100111, 0b00011010}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::i16, ov::element::u2, std::vector{0, 2, 1, 3, 4, 1, 2, 2}, - std::vector{0b00100111, 0b00011010}, - 8, - 8), + std::vector{0b00100111, 0b00011010}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::i32, ov::element::u2, std::vector{0, 2, 1, 3, 4, 1, 2, 2}, - std::vector{0b00100111, 0b00011010}, - 8, - 8), + std::vector{0b00100111, 0b00011010}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::i64, ov::element::u2, std::vector{0, 2, 1, 3, 4, 1, 2, 2}, - std::vector{0b00100111, 0b00011010}, - 8, - 8), + std::vector{0b00100111, 0b00011010}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{1, 8}, ov::element::f8e4m3, ov::element::u2, std::vector{0.1f, 2.0f, 1.1f, 3.0f, 4.0f, 1.0f, 2.2f, 2.8f}, - std::vector{0b00100111, 0b00011010}, - 8, - 8), + std::vector{0b00100111, 0b00011010}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::f16, ov::element::u2, std::vector{0.1f, 2.0f, 1.1f, 3.0f, 4.0f, 1.0f, 2.2f, 2.8f}, - std::vector{0b00100111, 0b00011010}, - 8, - 8), + std::vector{0b00100111, 0b00011010}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::f32, ov::element::u2, std::vector{0.1f, 2.0f, 1.1f, 3.0f, 4.0f, 1.0f, 2.2f, 2.8f}, - std::vector{0b00100111, 0b00011010}, - 8, - 8), + std::vector{0b00100111, 0b00011010}), // destination u3 ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::boolean, ov::element::u3, std::vector{1, 0, 1, 1, 0, 1, 0, 0}, - std::vector{0b01000101, 0b00010000, 0x00}, - 8, - 8), + std::vector{0b01000101, 0b00010000, 0x00}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u1, ov::element::u3, std::vector{0b11001011}, - std::vector{0b01010000, 0b01000101, 0x00}, - 8, - 8), + std::vector{0b01010000, 0b01000101, 0x00}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{1, 8}, ov::element::u2, ov::element::u3, std::vector{0b00100111, 0b00011010}, - std::vector{0b00100111, 0b00011010, 0x00}, - 8, - 8), + std::vector{0b00100111, 0b00011010, 0x00}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u4, ov::element::u3, std::vector{0x21, 0x01, 0x31, 0xff}, - std::vector{0b01100100, 0b01111111, 0b00000011}, - 8, - 8), + std::vector{0b01100100, 0b01111111, 0b00000011}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{1, 8}, ov::element::u6, ov::element::u3, std::vector{0x02, 0x13, 0x00, 0x41, 0x26, 0x00}, - std::vector{0b00100111, 0b00011010, 0b00001001}, - 8, - 8), + std::vector{0b00100111, 0b00011010, 0b00001001}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u8, ov::element::u3, std::vector{0, 2, 1, 3, 4, 1, 2, 2}, - std::vector{0b00100111, 0b00011010, 0b00001000}, - 8, - 8), + std::vector{0b00100111, 0b00011010, 0b00001000}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u16, ov::element::u3, std::vector{0, 2, 1, 3, 4, 1, 2, 2}, - std::vector{0b00100111, 0b00011010, 0b00001000}, - 8, - 8), + std::vector{0b00100111, 0b00011010, 0b00001000}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u32, ov::element::u3, std::vector{0, 2, 1, 3, 4, 1, 2, 2}, - std::vector{0b00100111, 0b00011010, 0b00001000}, - 8, - 8), + std::vector{0b00100111, 0b00011010, 0b00001000}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u64, ov::element::u3, std::vector{0, 2, 1, 3, 4, 1, 2, 2}, - std::vector{0b00100111, 0b00011010, 0b00001000}, - 8, - 8), + std::vector{0b00100111, 0b00011010, 0b00001000}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::i8, ov::element::u3, std::vector{0, 2, 1, -3, 4, -1, 8, 7}, - std::vector{0b00100101, 0b00110011, 0b00011101}, - 8, - 8), + std::vector{0b00100101, 0b00110011, 0b00011101}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::i16, ov::element::u3, std::vector{0, 2, 1, -3, 4, -1, 8, 7}, - std::vector{0b00100101, 0b00110011, 0b00011101}, - 8, - 8), + std::vector{0b00100101, 0b00110011, 0b00011101}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::i32, ov::element::u3, std::vector{0, 2, 1, -3, 4, -1, 8, 7}, - std::vector{0b00100101, 0b00110011, 0b00011101}, - 8, - 8), + std::vector{0b00100101, 0b00110011, 0b00011101}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::i64, ov::element::u3, std::vector{0, 2, 1, -3, 4, -1, 8, 7}, - std::vector{0b00100101, 0b00110011, 0b00011101}, - 8, - 8), + std::vector{0b00100101, 0b00110011, 0b00011101}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::f8e4m3, ov::element::u3, std::vector{0.0f, 2.1f, 1.7f, -3.1f, 4.0f, -1.0f, 8.0f, 7.2f}, - std::vector{0b00100101, 0b00110011, 0b00011101}, - 8, - 8), + std::vector{0b00100101, 0b00110011, 0b00011101}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::f8e5m2, ov::element::u3, std::vector{0.0f, 2.1f, 1.7f, -3.1f, 4.0f, -1.0f, 8.0f, 7.2f}, - std::vector{0b00100101, 0b00110011, 0b00011101}, - 8, - 8), + std::vector{0b00100101, 0b00110011, 0b00011101}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::f16, ov::element::u3, std::vector{0.0f, 2.1f, 1.7f, -3.1f, 4.0f, -1.0f, 8.0f, 7.2f}, - std::vector{0b00100101, 0b00110011, 0b00011101}, - 8, - 8), + std::vector{0b00100101, 0b00110011, 0b00011101}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::bf16, ov::element::u3, std::vector{0.0f, 2.1f, 1.7f, -3.1f, 4.0f, -1.0f, 8.0f, 7.2f}, - std::vector{0b00100101, 0b00110011, 0b00011101}, - 8, - 8), + std::vector{0b00100101, 0b00110011, 0b00011101}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::f32, ov::element::u3, std::vector{0.0f, 2.1f, 1.7f, -3.1f, 4.0f, -1.0f, 8.0f, 7.2f}, - std::vector{0b00100101, 0b00110011, 0b00011101}, - 8, - 8), + std::vector{0b00100101, 0b00110011, 0b00011101}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::f64, ov::element::u3, std::vector{0.0, 2.1, 1.7, -3.1, 4.0, -1.0, 8.0, 7.2}, - std::vector{0b00100101, 0b00110011, 0b00011101}, - 8, - 8), + std::vector{0b00100101, 0b00110011, 0b00011101}), // destination u6 ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::boolean, ov::element::u6, std::vector{1, 0, 1, 1, 0, 1, 0, 0}, - std::vector{0x10, 0x11, 0x00, 0x01, 0x00, 0x00}, - 8, - 8), + std::vector{0x10, 0x11, 0x00, 0x01, 0x00, 0x00}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u1, ov::element::u6, std::vector{0b11001011}, - std::vector{0x11, 0x00, 0x00, 0x10, 0x11, 0x00}, - 8, - 8), + std::vector{0x11, 0x00, 0x00, 0x10, 0x11, 0x00}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{1, 8}, ov::element::u2, ov::element::u6, std::vector{0b00100111, 0b00010000}, - std::vector{0x02, 0x13, 0x00, 0x01, 0x00, 0x00}, - 8, - 8), + std::vector{0x02, 0x13, 0x00, 0x01, 0x00, 0x00}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{1, 8}, ov::element::u3, ov::element::u6, std::vector{0b00100111, 0b00011010, 0b00001001}, - std::vector{0x02, 0x13, 0x00, 0x41, 0x26, 0x00}, - 8, - 8), + std::vector{0x02, 0x13, 0x00, 0x41, 0x26, 0x00}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u4, ov::element::u6, std::vector{0x21, 0x01, 0x31, 0xff}, - std::vector{0x12, 0x10, 0x00, 0x13, 0xff, 0x00}, - 8, - 8), + std::vector{0x12, 0x10, 0x00, 0x13, 0xff, 0x00}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u8, ov::element::u6, std::vector{0, 2, 1, 3, 24, 1, 32, 52}, - std::vector{0x02, 0x13, 0x00, 0x81, 0x04, 0b01001011}, - 8, - 8), + std::vector{0x02, 0x13, 0x00, 0x81, 0x04, 0b01001011}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u16, ov::element::u6, std::vector{0, 2, 1, 3, 24, 1, 32, 52}, - std::vector{0x02, 0x13, 0x00, 0x81, 0x04, 0b01001011}, - 8, - 8), + std::vector{0x02, 0x13, 0x00, 0x81, 0x04, 0b01001011}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u32, ov::element::u6, std::vector{0, 2, 1, 3, 24, 1, 32, 52}, - std::vector{0x02, 0x13, 0x00, 0x81, 0x04, 0b01001011}, - 8, - 8), + std::vector{0x02, 0x13, 0x00, 0x81, 0x04, 0b01001011}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::u64, ov::element::u6, std::vector{0, 2, 1, 3, 24, 1, 32, 52}, - std::vector{0x02, 0x13, 0x00, 0x81, 0x04, 0b01001011}, - 8, - 8), + std::vector{0x02, 0x13, 0x00, 0x81, 0x04, 0b01001011}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::i4, ov::element::u6, std::vector{0x21, 0x01, 0x31, 0xff}, - std::vector{0x12, 0x10, 0x00, 0x13, 0xff, 0b00001111}, - 8, - 8), + std::vector{0x12, 0x10, 0x00, 0x13, 0xff, 0b00001111}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::i8, ov::element::u6, std::vector{0, 2, 1, -3, 24, -1, 32, 52}, - std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}, - 8, - 8), + std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::i16, ov::element::u6, std::vector{0, 2, 1, -3, 24, -1, 32, 52}, - std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}, - 8, - 8), + std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::i32, ov::element::u6, std::vector{0, 2, 1, -3, 24, -1, 32, 52}, - std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}, - 8, - 8), + std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::i64, ov::element::u6, std::vector{0, 2, 1, -3, 24, -1, 32, 52}, - std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}, - 8, - 8), + std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::f8e4m3, ov::element::u6, std::vector{0.0f, 2.1f, 1.7f, -3.1f, 24.0f, -1.0f, 32.0f, 52.0f}, - std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}, - 8, - 8), + std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::f8e5m2, ov::element::u6, std::vector{0.0f, 2.1f, 1.7f, -3.1f, 24.0f, -1.0f, 32.0f, 56.0f}, - std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x08, 0b01111011}, - 8, - 8), + std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x08, 0b01111011}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::f16, ov::element::u6, std::vector{0.0f, 2.1f, 1.7f, -3.1f, 24.0f, -1.0f, 32.0f, 52.0f}, - std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}, - 8, - 8), + std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::bf16, ov::element::u6, std::vector{0.0f, 2.1f, 1.7f, -3.1f, 24.0f, -1.0f, 32.0f, 52.0f}, - std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}, - 8, - 8), + std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::f32, ov::element::u6, std::vector{0.0f, 2.1f, 1.7f, -3.1f, 24.0f, -1.0f, 32.0f, 52.0f}, - std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}, - 8, - 8), + std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}), ConvertParams(ConversionTypes::CONVERT_LIKE, ov::PartialShape{2, 4}, ov::element::f64, ov::element::u6, std::vector{0.0, 2.1, 1.7, -3.1, 24.0, -1.0, 32.0, 52.0}, - std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011}, - 8, - 8)), + std::vector{0x02, 0x1d, 0b00000011, 0x8f, 0x04, 0b01111011})), ReferenceConversionLayerTest::getTestCaseName); } // namespace } // namespace ConversionOpsRefTestDefinitions diff --git a/src/plugins/template/tests/functional/op_reference/cum_sum.cpp b/src/plugins/template/tests/functional/op_reference/cum_sum.cpp index dbb6b9e1dad0ef..13c23ebf0bf3ab 100644 --- a/src/plugins/template/tests/functional/op_reference/cum_sum.cpp +++ b/src/plugins/template/tests/functional/op_reference/cum_sum.cpp @@ -15,7 +15,7 @@ namespace { struct CumSumParams { // Custom axis input and attributes template - CumSumParams(const PartialShape& shape, + CumSumParams(const Shape& shape, const element::Type& iType, const std::vector& iValues, const std::vector& oValues, @@ -23,7 +23,7 @@ struct CumSumParams { const bool reverse, const element::Type& axisType, AT axisVal, - const PartialShape& axisShape) + const Shape& axisShape) : execlusive(execlusive), reverse(reverse), axisValue(axisVal), @@ -33,13 +33,13 @@ struct CumSumParams { inType(iType), outType(iType), axisData(CreateTensor(axisType, std::vector{axisVal})), - inputData(CreateTensor(iType, iValues)), - refData(CreateTensor(iType, oValues)), + inputData(CreateTensor(shape, iType, iValues)), + refData(CreateTensor(shape, iType, oValues)), testDefaults(false) {} // Default axis input and attributes template - CumSumParams(const PartialShape& shape, + CumSumParams(const Shape& shape, const element::Type& iType, const std::vector& iValues, const std::vector& oValues) @@ -47,16 +47,16 @@ struct CumSumParams { axisType(element::i32), inType(iType), outType(iType), - inputData(CreateTensor(iType, iValues)), - refData(CreateTensor(iType, oValues)), + inputData(CreateTensor(shape, iType, iValues)), + refData(CreateTensor(shape, iType, oValues)), testDefaults(true) {} bool execlusive = false; bool reverse = false; int64_t axisValue = 0; - PartialShape axisShape; - PartialShape inShape; + Shape axisShape; + Shape inShape; element::Type axisType; element::Type inType; element::Type outType; @@ -102,9 +102,9 @@ class ReferenceCumSumLayerTest : public testing::TestWithParam, pu } private: - static std::shared_ptr CreateFunction(const PartialShape& data_shape, + static std::shared_ptr CreateFunction(const Shape& data_shape, const element::Type& data_type, - const PartialShape& axis_shape, + const Shape& axis_shape, const element::Type& axis_type, const bool execlusive, const bool reverse) { @@ -114,7 +114,7 @@ class ReferenceCumSumLayerTest : public testing::TestWithParam, pu return std::make_shared(NodeVector{cum_sum}, ParameterVector{data_param, axis_param}); } - static std::shared_ptr CreateFunction(const PartialShape& data_shape, const element::Type& data_type) { + static std::shared_ptr CreateFunction(const Shape& data_shape, const element::Type& data_type) { const auto data_param = std::make_shared(data_type, data_shape); const auto cum_sum = std::make_shared(data_param); return std::make_shared(NodeVector{cum_sum}, ParameterVector{data_param}); @@ -130,14 +130,14 @@ std::vector generateCumSumParams(const element::Type& type) { using T = typename element_type_traits::value_type; std::vector opParams{ // Default axis input and attributes - CumSumParams(PartialShape{1}, type, std::vector{3}, std::vector{3}), - CumSumParams(PartialShape{6}, type, std::vector{1, 2, 3, 4, 5, 6}, std::vector{1, 3, 6, 10, 15, 21}), - CumSumParams(PartialShape{2, 4}, + CumSumParams(Shape{1}, type, std::vector{3}, std::vector{3}), + CumSumParams(Shape{6}, type, std::vector{1, 2, 3, 4, 5, 6}, std::vector{1, 3, 6, 10, 15, 21}), + CumSumParams(Shape{2, 4}, type, std::vector{0, 1, 2, 3, 4, 5, 6, 7}, std::vector{0, 1, 2, 3, 4, 6, 8, 10}), // Custom axis input and attributes - CumSumParams(PartialShape{6}, + CumSumParams(Shape{6}, type, std::vector{1, 2, 3, 4, 5, 6}, std::vector{1, 3, 6, 10, 15, 21}, @@ -145,8 +145,8 @@ std::vector generateCumSumParams(const element::Type& type) { false, element::i32, int32_t(0), - PartialShape{}), // axis i32 - CumSumParams(PartialShape{6}, + Shape{}), // axis i32 + CumSumParams(Shape{6}, type, std::vector{1, 2, 3, 4, 5, 6}, std::vector{1, 3, 6, 10, 15, 21}, @@ -154,8 +154,8 @@ std::vector generateCumSumParams(const element::Type& type) { false, element::i64, int64_t(0), - PartialShape{}), // axis i64 - CumSumParams(PartialShape{6}, + Shape{}), // axis i64 + CumSumParams(Shape{6}, type, std::vector{1, 2, 3, 4, 5, 6}, std::vector{21, 20, 18, 15, 11, 6}, @@ -163,8 +163,8 @@ std::vector generateCumSumParams(const element::Type& type) { true, element::i64, int64_t(0), - PartialShape{}), - CumSumParams(PartialShape{6}, + Shape{}), + CumSumParams(Shape{6}, type, std::vector{1, 2, 3, 4, 5, 6}, std::vector{0, 1, 3, 6, 10, 15}, @@ -172,8 +172,8 @@ std::vector generateCumSumParams(const element::Type& type) { false, element::i64, int64_t(0), - PartialShape{}), - CumSumParams(PartialShape{6}, + Shape{}), + CumSumParams(Shape{6}, type, std::vector{1, 2, 3, 4, 5, 6}, std::vector{20, 18, 15, 11, 6, 0}, @@ -181,9 +181,9 @@ std::vector generateCumSumParams(const element::Type& type) { true, element::i64, int64_t(0), - PartialShape{}), + Shape{}), - CumSumParams(PartialShape{2, 4}, + CumSumParams(Shape{2, 4}, type, std::vector{0, 1, 2, 3, 4, 5, 6, 7}, std::vector{0, 1, 2, 3, 4, 6, 8, 10}, @@ -191,8 +191,8 @@ std::vector generateCumSumParams(const element::Type& type) { false, element::i32, int32_t(0), - PartialShape{}), - CumSumParams(PartialShape{2, 4}, + Shape{}), + CumSumParams(Shape{2, 4}, type, std::vector{0, 1, 2, 3, 4, 5, 6, 7}, std::vector{4, 6, 8, 10, 4, 5, 6, 7}, @@ -200,8 +200,8 @@ std::vector generateCumSumParams(const element::Type& type) { true, element::i32, int32_t(0), - PartialShape{}), - CumSumParams(PartialShape{2, 4}, + Shape{}), + CumSumParams(Shape{2, 4}, type, std::vector{0, 1, 2, 3, 4, 5, 6, 7}, std::vector{0, 0, 0, 0, 0, 1, 2, 3}, @@ -209,8 +209,8 @@ std::vector generateCumSumParams(const element::Type& type) { false, element::i32, int32_t(0), - PartialShape{}), - CumSumParams(PartialShape{2, 4}, + Shape{}), + CumSumParams(Shape{2, 4}, type, std::vector{0, 1, 2, 3, 4, 5, 6, 7}, std::vector{4, 5, 6, 7, 0, 0, 0, 0}, @@ -218,8 +218,8 @@ std::vector generateCumSumParams(const element::Type& type) { true, element::i32, int32_t(0), - PartialShape{}), - CumSumParams(PartialShape{2, 4}, + Shape{}), + CumSumParams(Shape{2, 4}, type, std::vector{0, 1, 2, 3, 4, 5, 6, 7}, std::vector{0, 1, 3, 6, 4, 9, 15, 22}, @@ -227,8 +227,8 @@ std::vector generateCumSumParams(const element::Type& type) { false, element::i32, int32_t(1), - PartialShape{}), - CumSumParams(PartialShape{2, 4}, + Shape{}), + CumSumParams(Shape{2, 4}, type, std::vector{0, 1, 2, 3, 4, 5, 6, 7}, std::vector{0, 0, 1, 3, 0, 4, 9, 15}, @@ -236,8 +236,8 @@ std::vector generateCumSumParams(const element::Type& type) { false, element::i32, int32_t(1), - PartialShape{}), - CumSumParams(PartialShape{2, 4}, + Shape{}), + CumSumParams(Shape{2, 4}, type, std::vector{0, 1, 2, 3, 4, 5, 6, 7}, std::vector{6, 6, 5, 3, 22, 18, 13, 7}, @@ -245,8 +245,8 @@ std::vector generateCumSumParams(const element::Type& type) { true, element::i32, int32_t(1), - PartialShape{}), - CumSumParams(PartialShape{2, 4}, + Shape{}), + CumSumParams(Shape{2, 4}, type, std::vector{0, 1, 2, 3, 4, 5, 6, 7}, std::vector{6, 5, 3, 0, 18, 13, 7, 0}, @@ -254,10 +254,10 @@ std::vector generateCumSumParams(const element::Type& type) { true, element::i32, int32_t(1), - PartialShape{}), + Shape{}), CumSumParams( - PartialShape{3, 2, 4}, + Shape{3, 2, 4}, type, std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 27, 30, 33, 36, 39, 42, 45}, @@ -265,9 +265,9 @@ std::vector generateCumSumParams(const element::Type& type) { false, element::i32, int32_t(0), - PartialShape{}), + Shape{}), CumSumParams( - PartialShape{3, 2, 4}, + Shape{3, 2, 4}, type, std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, std::vector{0, 1, 2, 3, 4, 6, 8, 10, 8, 9, 10, 11, 20, 22, 24, 26, 16, 17, 18, 19, 36, 38, 40, 42}, @@ -275,9 +275,9 @@ std::vector generateCumSumParams(const element::Type& type) { false, element::i32, int32_t(1), - PartialShape{}), + Shape{}), CumSumParams( - PartialShape{3, 2, 4}, + Shape{3, 2, 4}, type, std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, std::vector{0, 1, 3, 6, 4, 9, 15, 22, 8, 17, 27, 38, 12, 25, 39, 54, 16, 33, 51, 70, 20, 41, 63, 86}, @@ -285,7 +285,7 @@ std::vector generateCumSumParams(const element::Type& type) { false, element::i32, int32_t(2), - PartialShape{}), + Shape{}), }; return opParams; } diff --git a/src/plugins/template/tests/functional/op_reference/detection_output.cpp b/src/plugins/template/tests/functional/op_reference/detection_output.cpp index 571efbafb1993c..f76710714831fc 100644 --- a/src/plugins/template/tests/functional/op_reference/detection_output.cpp +++ b/src/plugins/template/tests/functional/op_reference/detection_output.cpp @@ -12,6 +12,19 @@ using namespace reference_tests; using namespace ov; namespace { +size_t get_k(const size_t num_images, + const size_t num_prior_boxes, + const size_t num_classes, + const int top_k, + const std::vector& keep_top_k) { + if (keep_top_k[0] > 0) + return num_images * keep_top_k[0]; + else if (keep_top_k[0] == -1 && top_k > 0) + return num_images * top_k * num_classes; + else + return num_images * num_prior_boxes * num_classes; +} + struct DetectionOutputParams { template DetectionOutputParams(const int num_classes, @@ -43,7 +56,6 @@ struct DetectionOutputParams { locData(CreateTensor(iType, locValues)), confData(CreateTensor(iType, confValues)), priorBoxesData(CreateTensor(iType, priorBoxesValues)), - refData(CreateTensor(iType, oValues)), testcaseName(test_name) { attrs.num_classes = num_classes; attrs_v8.background_label_id = attrs.background_label_id = background_label_id; @@ -70,6 +82,10 @@ struct DetectionOutputParams { priorBoxesShape = ov::Shape{is_priors_patch_size_1 ? 1UL : num_images, attrs.variance_encoded_in_target ? 1UL : 2UL, num_prior_boxes * prior_box_size}; + + const auto k = get_k(num_images, num_prior_boxes, num_classes, top_k, keep_top_k); + const auto output_shape = Shape{1, 1, k, 7}; + refData = CreateTensor(output_shape, iType, oValues); } template @@ -104,7 +120,6 @@ struct DetectionOutputParams { locData(CreateTensor(iType, locValues)), confData(CreateTensor(iType, confValues)), priorBoxesData(CreateTensor(iType, priorBoxesValues)), - refData(CreateTensor(iType, oValues)), auxLocData(CreateTensor(iType, auxLocValues)), auxConfData(CreateTensor(iType, auxConfValues)), testcaseName(test_name) { @@ -135,6 +150,10 @@ struct DetectionOutputParams { num_prior_boxes * prior_box_size}; auxLocShape = locShape; auxConfShape = confShape; + + const auto k = get_k(num_images, num_prior_boxes, num_classes, top_k, keep_top_k); + const auto output_shape = Shape{1, 1, k, 7}; + refData = CreateTensor(output_shape, iType, oValues); } ov::op::v0::DetectionOutput::Attributes attrs; @@ -158,8 +177,7 @@ class ReferenceDetectionOutputLayerTest : public testing::TestWithParam& obj) { - auto param = obj.param; + const auto& param = obj.param; std::ostringstream result; result << "locShape=" << param.locShape << "_"; result << "confShape=" << param.confShape << "_"; @@ -206,8 +224,7 @@ class ReferenceDetectionOutputV8LayerTest : public testing::TestWithParam& obj) { - auto param = obj.param; + const auto& param = obj.param; std::ostringstream result; result << "locShape=" << param.locShape << "_"; result << "confShape=" << param.confShape << "_"; diff --git a/src/plugins/template/tests/functional/op_reference/elu.cpp b/src/plugins/template/tests/functional/op_reference/elu.cpp index aebdf47f685e06..7322a3dddb1508 100644 --- a/src/plugins/template/tests/functional/op_reference/elu.cpp +++ b/src/plugins/template/tests/functional/op_reference/elu.cpp @@ -23,8 +23,8 @@ struct EluParams { pshape(shape), inType(iType), outType(iType), - inputData(CreateTensor(iType, iValues)), - refData(CreateTensor(iType, oValues)) {} + inputData(CreateTensor(shape.get_shape(), iType, iValues)), + refData(CreateTensor(shape.get_shape(), iType, oValues)) {} double alpha = 0; @@ -38,14 +38,13 @@ struct EluParams { class ReferenceEluLayerTest : public testing::TestWithParam, public CommonReferenceTest { public: void SetUp() override { - legacy_compare = true; - auto params = GetParam(); + const auto& params = GetParam(); function = CreateFunction(params.pshape, params.inType, params.outType, params.alpha); inputData = {params.inputData}; refOutData = {params.refData}; } static std::string getTestCaseName(const testing::TestParamInfo& obj) { - auto param = obj.param; + const auto& param = obj.param; std::ostringstream result; result << "shape=" << param.pshape << "_"; result << "iType=" << param.inType << "_"; diff --git a/src/plugins/template/tests/functional/op_reference/experimental_detectron_detection_output.cpp b/src/plugins/template/tests/functional/op_reference/experimental_detectron_detection_output.cpp index 29bac1c4af004e..07daa8373f1368 100644 --- a/src/plugins/template/tests/functional/op_reference/experimental_detectron_detection_output.cpp +++ b/src/plugins/template/tests/functional/op_reference/experimental_detectron_detection_output.cpp @@ -34,14 +34,16 @@ struct ExperimentalDOParams { deltasData(CreateTensor(iType, deltasValues)), scoresData(CreateTensor(iType, scoresValues)), imageSizeInfoData(CreateTensor(iType, imageSizeInfoValues)), - refBoxesData(CreateTensor(iType, refBoxesValues)), - refClassesData(CreateTensor(ov::element::i32, refClassesValues)), - refScoresData(CreateTensor(iType, refScoresValues)), testcaseName(testcaseName) { roisShape = Shape{num_rois, 4}; deltasShape = Shape{num_rois, static_cast(attrs.num_classes * 4)}; scoresShape = Shape{num_rois, static_cast(attrs.num_classes)}; imageSizeInfoShape = Shape{1, 3}; + + const auto max_d = attrs.max_detections_per_image; + refBoxesData = CreateTensor(Shape{max_d, 4}, iType, refBoxesValues); + refClassesData = CreateTensor(Shape{max_d}, ov::element::i32, refClassesValues); + refScoresData = CreateTensor(Shape{max_d}, iType, refScoresValues); } Attrs attrs; @@ -65,14 +67,13 @@ class ReferenceExperimentalDOLayerTest : public testing::TestWithParam& obj) { - auto param = obj.param; + const auto& param = obj.param; std::ostringstream result; result << "roisShape=" << param.roisShape << "_"; result << "deltasShape=" << param.deltasShape << "_"; diff --git a/src/plugins/template/tests/functional/op_reference/experimental_detectron_generate_proposals.cpp b/src/plugins/template/tests/functional/op_reference/experimental_detectron_generate_proposals.cpp index 3b8fc5ba1f8c64..aa4d50a0c0358e 100644 --- a/src/plugins/template/tests/functional/op_reference/experimental_detectron_generate_proposals.cpp +++ b/src/plugins/template/tests/functional/op_reference/experimental_detectron_generate_proposals.cpp @@ -35,13 +35,15 @@ struct ExperimentalGPParams { anchorsData(CreateTensor(iType, anchorsValues)), deltasData(CreateTensor(iType, deltasValues)), scoresData(CreateTensor(iType, scoresValues)), - refRoisData(CreateTensor(iType, refRoisValues)), - refScoresData(CreateTensor(iType, refScoresValues)), testcaseName(testcaseName) { imageSizeInfoShape = Shape{3}; anchorsShape = Shape{height * width * number_of_channels, 4}; deltasShape = Shape{number_of_channels * 4, height, width}; scoresShape = Shape{number_of_channels, height, width}; + + const auto post_nms = static_cast(attrs.post_nms_count); + refRoisData = CreateTensor(Shape{post_nms, 4}, iType, refRoisValues); + refScoresData = CreateTensor(Shape{post_nms}, iType, refScoresValues); } Attrs attrs; @@ -64,14 +66,13 @@ class ReferenceExperimentalGPLayerTest : public testing::TestWithParam& obj) { - auto param = obj.param; + const auto& param = obj.param; std::ostringstream result; result << "imageSizeInfoShape=" << param.imageSizeInfoShape << "_"; result << "anchorsShape=" << param.anchorsShape << "_"; diff --git a/src/plugins/template/tests/functional/op_reference/floor_mod.cpp b/src/plugins/template/tests/functional/op_reference/floor_mod.cpp index 33b06550000d8b..641d5c284d0042 100644 --- a/src/plugins/template/tests/functional/op_reference/floor_mod.cpp +++ b/src/plugins/template/tests/functional/op_reference/floor_mod.cpp @@ -17,6 +17,7 @@ struct FloorModParams { template FloorModParams(const PartialShape& iShape1, const PartialShape& iShape2, + const Shape& oShape, const element::Type& iType, const std::vector& iValues1, const std::vector& iValues2, @@ -27,7 +28,7 @@ struct FloorModParams { outType(iType), inputData1(CreateTensor(iType, iValues1)), inputData2(CreateTensor(iType, iValues2)), - refData(CreateTensor(iType, oValues)) {} + refData(CreateTensor(oShape, iType, oValues)) {} PartialShape pshape1; PartialShape pshape2; @@ -41,15 +42,14 @@ struct FloorModParams { class ReferenceFloorModLayerTest : public testing::TestWithParam, public CommonReferenceTest { public: void SetUp() override { - legacy_compare = true; - auto params = GetParam(); + const auto& params = GetParam(); function = CreateFunction(params.pshape1, params.pshape2, params.inType, params.outType); inputData = {params.inputData1, params.inputData2}; refOutData = {params.refData}; } static std::string getTestCaseName(const testing::TestParamInfo& obj) { - auto param = obj.param; + const auto& param = obj.param; std::ostringstream result; result << "iShape1=" << param.pshape1 << "_"; result << "iShape2=" << param.pshape2 << "_"; @@ -81,6 +81,7 @@ std::vector generateParamsForFloorMod() { std::vector params{FloorModParams(ov::PartialShape{4}, ov::PartialShape{4}, + ov::Shape{4}, IN_ET, std::vector{7, -7, 7, -7}, std::vector{3, 3, -3, -3}, @@ -95,6 +96,7 @@ std::vector generateParamsForFloorModBroadcast() { std::vector params{ FloorModParams(ov::PartialShape{2, 1, 2}, ov::PartialShape{2, 1}, + ov::Shape{2, 2, 2}, IN_ET, std::vector{1, 2, 3, 4}, std::vector{2, 3}, @@ -110,6 +112,7 @@ std::vector generateParamsForFloorModScalar() { std::vector params{ FloorModParams(ov::PartialShape{1}, ov::PartialShape{1}, + ov::Shape{1}, IN_ET, std::vector{2}, std::vector{4}, @@ -122,7 +125,7 @@ template std::vector generateParamsForFloorModNonIntegerDivisor() { using T = typename element_type_traits::value_type; // clang-format off - return {FloorModParams(ov::PartialShape{8}, ov::PartialShape{8}, IN_ET, + return {FloorModParams(ov::PartialShape{8}, ov::PartialShape{8}, ov::Shape{8}, IN_ET, std::vector{-3.2, -3.1, -3.0, 5.0, 5.1, 5.2, -1.6, 1.6}, std::vector{-3.1, -3.1, -3.1, -5.1, -5.1, -5.1, 1.7, 1.7}, std::vector{-0.1, -0.0, -3.0, -0.1, -0.0, -5.0, 0.1, 1.6})}; diff --git a/src/plugins/template/tests/functional/op_reference/generate_proposals.cpp b/src/plugins/template/tests/functional/op_reference/generate_proposals.cpp index 61e32387b8bf7a..52aa2a98b71785 100644 --- a/src/plugins/template/tests/functional/op_reference/generate_proposals.cpp +++ b/src/plugins/template/tests/functional/op_reference/generate_proposals.cpp @@ -39,14 +39,16 @@ struct GPParams { anchorsData(CreateTensor(iType, anchorsValues)), deltasData(CreateTensor(iType, deltasValues)), scoresData(CreateTensor(iType, scoresValues)), - refRoisData(CreateTensor(iType, refRoisValues)), - refScoresData(CreateTensor(iType, refScoresValues)), - refRoiNumData(CreateTensor(roiNumType, refRoiNumValues)), testcaseName(testcaseName) { imageSizeInfoShape = Shape{batch, 3}; anchorsShape = Shape{height, width, number_of_channels, 4}; deltasShape = Shape{batch, number_of_channels * 4, height, width}; scoresShape = Shape{batch, number_of_channels, height, width}; + + const auto number_of_rois = refScoresValues.size(); + refRoisData = CreateTensor(Shape{number_of_rois, 4}, iType, refRoisValues); + refScoresData = CreateTensor(Shape{number_of_rois}, iType, refScoresValues); + refRoiNumData = CreateTensor(Shape{batch}, roiNumType, refRoiNumValues); } Attrs attrs; @@ -70,14 +72,13 @@ struct GPParams { class ReferenceGPLayerTest : public testing::TestWithParam, public CommonReferenceTest { public: void SetUp() override { - legacy_compare = true; - auto params = GetParam(); + const auto& params = GetParam(); function = CreateFunction(params); inputData = {params.imageSizeInfoData, params.anchorsData, params.deltasData, params.scoresData}; refOutData = {params.refRoisData, params.refScoresData, params.refRoiNumData}; } static std::string getTestCaseName(const testing::TestParamInfo& obj) { - auto param = obj.param; + const auto& param = obj.param; std::ostringstream result; result << "imageSizeInfoShape=" << param.imageSizeInfoShape << "_"; result << "anchorsShape=" << param.anchorsShape << "_"; diff --git a/src/plugins/template/tests/functional/op_reference/grn.cpp b/src/plugins/template/tests/functional/op_reference/grn.cpp index f5a31eb2c97a1c..19e91b6485d522 100644 --- a/src/plugins/template/tests/functional/op_reference/grn.cpp +++ b/src/plugins/template/tests/functional/op_reference/grn.cpp @@ -23,8 +23,8 @@ struct GrnParams { pshape(shape), inType(iType), outType(iType), - inputData(CreateTensor(iType, iValues)), - refData(CreateTensor(iType, oValues)) {} + inputData(CreateTensor(pshape.get_shape(), iType, iValues)), + refData(CreateTensor(pshape.get_shape(), iType, oValues)) {} float bias; PartialShape pshape; element::Type inType; @@ -36,14 +36,13 @@ struct GrnParams { class ReferenceGrnLayerTest : public testing::TestWithParam, public CommonReferenceTest { public: void SetUp() override { - legacy_compare = true; - auto params = GetParam(); + const auto& params = GetParam(); function = CreateFunction(params.bias, params.pshape, params.inType); inputData = {params.inputData}; refOutData = {params.refData}; } static std::string getTestCaseName(const testing::TestParamInfo& obj) { - auto param = obj.param; + const auto& param = obj.param; std::ostringstream result; result << "bias=" << param.bias << "_"; result << "shape=" << param.pshape << "_"; diff --git a/src/plugins/template/tests/functional/op_reference/maximum.cpp b/src/plugins/template/tests/functional/op_reference/maximum.cpp index 2646e7496b1927..1b783bc5cae3b2 100644 --- a/src/plugins/template/tests/functional/op_reference/maximum.cpp +++ b/src/plugins/template/tests/functional/op_reference/maximum.cpp @@ -25,9 +25,9 @@ struct MaximumParams { pshape2(iShape2), inType(iType), outType(iType), - inputData1(CreateTensor(iType, iValues1)), - inputData2(CreateTensor(iType, iValues2)), - refData(CreateTensor(iType, oValues)) {} + inputData1(CreateTensor(iShape1.get_shape(), iType, iValues1)), + inputData2(CreateTensor(iShape2.get_shape(), iType, iValues2)), + refData(CreateTensor(iShape1.get_shape(), iType, oValues)) {} PartialShape pshape1; PartialShape pshape2; @@ -41,15 +41,14 @@ struct MaximumParams { class ReferenceMaximumLayerTest : public testing::TestWithParam, public CommonReferenceTest { public: void SetUp() override { - legacy_compare = true; - auto params = GetParam(); + const auto& params = GetParam(); function = CreateFunction(params.pshape1, params.pshape2, params.inType, params.outType); inputData = {params.inputData1, params.inputData2}; refOutData = {params.refData}; } static std::string getTestCaseName(const testing::TestParamInfo& obj) { - auto param = obj.param; + const auto& param = obj.param; std::ostringstream result; result << "iShape1=" << param.pshape1 << "_"; result << "iShape2=" << param.pshape2 << "_"; @@ -188,12 +187,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Maximum_Int32_With_Hardcoded_Refs, ::testing::ValuesIn(generateCombinedParamsForMaximumInt32()), ReferenceMaximumLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Maximume_Int64_With_Hardcoded_Refs, +INSTANTIATE_TEST_SUITE_P(smoke_Maximum_Int64_With_Hardcoded_Refs, ReferenceMaximumLayerTest, ::testing::ValuesIn(generateCombinedParamsForMaximumInt64()), ReferenceMaximumLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Maximume_UnsignedInt_With_Hardcoded_Refs, +INSTANTIATE_TEST_SUITE_P(smoke_Maximum_UnsignedInt_With_Hardcoded_Refs, ReferenceMaximumLayerTest, ::testing::ValuesIn(generateCombinedParamsForMaximumUnsignedInt()), ReferenceMaximumLayerTest::getTestCaseName); diff --git a/src/plugins/template/tests/functional/op_reference/nonzero.cpp b/src/plugins/template/tests/functional/op_reference/nonzero.cpp index 0f2cafcd820e00..30b3e036676a8f 100644 --- a/src/plugins/template/tests/functional/op_reference/nonzero.cpp +++ b/src/plugins/template/tests/functional/op_reference/nonzero.cpp @@ -27,8 +27,11 @@ struct NonZeroParams { inType(inType), refType(refType), inputData(CreateTensor(inType, inputData)), - refData(CreateTensor(refType, refData)), - testcaseName(test_name) {} + testcaseName(test_name) { + const auto input_rank = inputShape.get_shape().size(); + const auto non_zero_num = refData.size() / input_rank; + this->refData = CreateTensor(Shape{input_rank, non_zero_num}, refType, refData); + } PartialShape dynamicShape; PartialShape inputShape; @@ -42,15 +45,14 @@ struct NonZeroParams { class ReferenceNonZeroLayerTest : public testing::TestWithParam, public CommonReferenceTest { public: void SetUp() override { - legacy_compare = true; - auto params = GetParam(); + const auto& params = GetParam(); function = CreateFunction(params.dynamicShape, params.inType, params.refType); inputData = {params.inputData}; refOutData = {params.refData}; } static std::string getTestCaseName(const testing::TestParamInfo& obj) { - auto param = obj.param; + const auto& param = obj.param; std::ostringstream result; result << "dShape=" << param.dynamicShape << "_"; result << "iShape=" << param.inputShape << "_"; diff --git a/src/plugins/template/tests/functional/op_reference/reshape.cpp b/src/plugins/template/tests/functional/op_reference/reshape.cpp index 861de521d2706e..68a39c1c9229ec 100644 --- a/src/plugins/template/tests/functional/op_reference/reshape.cpp +++ b/src/plugins/template/tests/functional/op_reference/reshape.cpp @@ -28,10 +28,8 @@ struct ReshapeParams { m_input_type = input_type; m_expected_type = expected_type; m_zero_flag = zero_flag; - m_input_value = input_shape.size() > 0 ? CreateTensor(input_shape, input_type, input_value) - : CreateTensor(input_type, input_value); - m_expected_value = expected_shape.size() > 0 ? CreateTensor(expected_shape, expected_type, expected_value) - : CreateTensor(expected_type, expected_value); + m_input_value = CreateTensor(input_shape, input_type, input_value); + m_expected_value = CreateTensor(expected_shape, expected_type, expected_value); } template @@ -105,8 +103,7 @@ struct ReshapeShuffleParams { class ReferenceReshapeLayerTest : public testing::TestWithParam, public CommonReferenceTest { public: void SetUp() override { - legacy_compare = true; - const auto params = GetParam(); + const auto& params = GetParam(); function = CreateFunction(params.m_input_type, params.m_expected_type, params.m_input_shape, @@ -117,7 +114,7 @@ class ReferenceReshapeLayerTest : public testing::TestWithParam, } static std::string getTestCaseName(const testing::TestParamInfo& obj) { - const auto param = obj.param; + const auto& param = obj.param; std::ostringstream result; result << "input_shape=" << param.m_input_shape << "; "; @@ -147,8 +144,7 @@ class ReferenceReshapeShuffleLayerTest : public testing::TestWithParam& obj) { - const auto param = obj.param; + const auto& param = obj.param; std::ostringstream result; result << "input_shape=" << param.m_input_shape1 << "; "; diff --git a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py index 9400c2ea681de6..230b5defec7196 100644 --- a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py +++ b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py @@ -9,7 +9,6 @@ import numpy as np from common.constants import test_device, test_precision from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder -from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder from openvino.frontend import FrontEndManager from openvino.runtime import Core, Type, PartialShape diff --git a/tests/layer_tests/pytorch_tests/test_convnd.py b/tests/layer_tests/pytorch_tests/test_convnd.py index baedbb2184acbe..f261e2d78dfab0 100644 --- a/tests/layer_tests/pytorch_tests/test_convnd.py +++ b/tests/layer_tests/pytorch_tests/test_convnd.py @@ -216,7 +216,6 @@ def forward(self, x, y): @pytest.mark.nightly @pytest.mark.precommit - @pytest.mark.xfail(reason="ticket 123727") def test_conv2d(self, ie_device, precision, ir_version): self._test(*self.create_model(), - ie_device, precision, ir_version, freeze_model=True, dynamic_shapes=False) + ie_device, precision, ir_version, freeze_model=True) diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_add.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_add.py index 1ddc307eea3bf7..15b73d422bdfb4 100644 --- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_add.py +++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_add.py @@ -1,11 +1,9 @@ -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import pytest import tensorflow as tf -from common.layer_test_class import check_ir_version from common.tf2_layer_test_class import CommonTF2LayerTest -from unit_tests.utils.graph import build_graph class TestKerasAdd(CommonTF2LayerTest): @@ -27,62 +25,6 @@ def create_keras_add_net(self, input_names, input_shapes, input_type, ir_version # create reference IR net ref_net = None - op_name = "Add" - if check_ir_version(10, None, ir_version): - # convert NHWC to NCHW layout if tensor rank greater 3 - converted_input_shape = input_shapes[0].copy() - if len(converted_input_shape) > 3: - converted_input_shape[1] = input_shapes[0][-1] - converted_input_shape[2:] = input_shapes[0][1:-1] - if len(input_names) == 2: - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'input2': {'kind': 'op', 'type': 'Parameter'}, - 'input2_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'op': {'kind': 'op', 'type': op_name}, - 'op_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('input2', 'input2_data'), - ('input1_data', 'op', {'in': 0}), - ('input2_data', 'op', {'in': 1}), - ('op', 'op_data'), - ('op_data', 'result') - ]) - elif len(input_names) == 3: - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'input2': {'kind': 'op', 'type': 'Parameter'}, - 'input2_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'op1': {'kind': 'op', 'type': op_name}, - 'op1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'input3': {'kind': 'op', 'type': 'Parameter'}, - 'input3_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'op2': {'kind': 'op', 'type': op_name}, - 'op2_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('input2', 'input2_data'), - ('input1_data', 'op1', {'in': 0}), - ('input2_data', 'op1', {'in': 1}), - ('op1', 'op1_data'), - ('input3', 'input3_data'), - ('op1_data', 'op2', {'in': 0}), - ('input3_data', 'op2', {'in': 1}), - ('op2', 'op2_data'), - ('op2_data', 'result') - ]) - else: - AssertionError("Not supported case with input number greater 2") - return tf2_net, ref_net test_data_float32_precommit = [ diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_elu.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_elu.py index 2bc44c08dcba45..306b5c11111391 100644 --- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_elu.py +++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_elu.py @@ -1,11 +1,9 @@ -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import pytest import tensorflow as tf -from common.layer_test_class import check_ir_version from common.tf2_layer_test_class import CommonTF2LayerTest -from unit_tests.utils.graph import build_graph class TestKerasELU(CommonTF2LayerTest): @@ -26,79 +24,6 @@ def create_keras_elu_net(self, input_names, input_shapes, input_type, alpha, ir_ # create reference IR net ref_net = None - if check_ir_version(10, None, ir_version): - # convert NHWC to NCHW layout if tensor rank greater 3 - converted_input_shape = input_shapes[0].copy() - if len(converted_input_shape) > 3: - converted_input_shape[1] = input_shapes[0][-1] - converted_input_shape[2:] = input_shapes[0][1:-1] - if alpha == 1.0: - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'elu': {'kind': 'op', 'type': 'Elu'}, - 'elu_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('input1_data', 'elu', {'in': 0}), - ('elu', 'elu_data'), - ('elu_data', 'result')]) - else: - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - - 'alpha_input_data': {'kind': 'data', 'shape': [1], 'value': [0.0]}, - 'alpha': {'kind': 'op', 'type': 'Const'}, - 'alpha_data': {'kind': 'data'}, - - 'const_input_data': {'kind': 'data', 'shape': [1], 'value': [alpha]}, - 'const': {'kind': 'op', 'type': 'Const'}, - 'const_data': {'kind': 'data'}, - - 'greater': {'kind': 'op', 'type': 'Greater'}, - 'greater_data': {'shape': converted_input_shape, 'kind': 'data'}, - - 'elu': {'kind': 'op', 'type': 'Elu'}, - 'elu_data': {'shape': converted_input_shape, 'kind': 'data'}, - - '1select': {'kind': 'op', 'type': 'Select'}, - 'select_data': {'shape': converted_input_shape, 'kind': 'data'}, - - '2multiply': {'kind': 'op', 'type': 'Multiply'}, - 'multiply_data': {'shape': converted_input_shape, 'kind': 'data'}, - - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('alpha_input_data', 'alpha'), - ('alpha', 'alpha_data'), - ('const_input_data', 'const'), - ('const', 'const_data'), - - ('input1_data', 'greater', {'in': 0}), - ('alpha_data', 'greater', {'in': 1}), - ('greater', 'greater_data'), - - ('input1_data', 'elu', {'in': 0}), - ('elu', 'elu_data'), - - ('const_data', '2multiply', {'in': 0}), - ('elu_data', '2multiply', {'in': 1}), - ('2multiply', 'multiply_data'), - - ('greater_data', '1select', {'in': 0}), - ('elu_data', '1select', {'in': 1}), - ('multiply_data', '1select', {'in': 2}), - ('1select', 'select_data'), - - ('select_data', 'result')]) - return tf2_net, ref_net test_data_float32_precommit = [dict(input_names=["x1"], input_shapes=[[5, 4, 8, 3, 2]], diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_maximum.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_maximum.py index 1dbb54eb2f8e17..f4046c9e4277fc 100644 --- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_maximum.py +++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_maximum.py @@ -1,11 +1,9 @@ -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import pytest import tensorflow as tf -from common.layer_test_class import check_ir_version from common.tf2_layer_test_class import CommonTF2LayerTest -from unit_tests.utils.graph import build_graph class TestKerasMaximum(CommonTF2LayerTest): @@ -27,62 +25,6 @@ def create_keras_maximum_net(self, input_names, input_shapes, input_type, ir_ver # create reference IR net ref_net = None - op_name = "Maximum" - if check_ir_version(10, None, ir_version): - # convert NHWC to NCHW layout if tensor rank greater 3 - converted_input_shape = input_shapes[0].copy() - if len(converted_input_shape) > 3: - converted_input_shape[1] = input_shapes[0][-1] - converted_input_shape[2:] = input_shapes[0][1:-1] - if len(input_names) == 2: - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'input2': {'kind': 'op', 'type': 'Parameter'}, - 'input2_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'op': {'kind': 'op', 'type': op_name}, - 'op_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('input2', 'input2_data'), - ('input1_data', 'op', {'in': 0}), - ('input2_data', 'op', {'in': 1}), - ('op', 'op_data'), - ('op_data', 'result') - ]) - elif len(input_names) == 3: - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'input2': {'kind': 'op', 'type': 'Parameter'}, - 'input2_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'op1': {'kind': 'op', 'type': op_name}, - 'op1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'input3': {'kind': 'op', 'type': 'Parameter'}, - 'input3_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'op2': {'kind': 'op', 'type': op_name}, - 'op2_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('input2', 'input2_data'), - ('input1_data', 'op1', {'in': 0}), - ('input2_data', 'op1', {'in': 1}), - ('op1', 'op1_data'), - ('input3', 'input3_data'), - ('op1_data', 'op2', {'in': 0}), - ('input3_data', 'op2', {'in': 1}), - ('op2', 'op2_data'), - ('op2_data', 'result') - ]) - else: - AssertionError("Not supported case with input number greater 2") - return tf2_net, ref_net test_data_float32_precommit = [ diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_minimum.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_minimum.py index 17bd87cbc07ea0..ffd6ab01ec4aee 100644 --- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_minimum.py +++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_minimum.py @@ -1,11 +1,9 @@ -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import pytest import tensorflow as tf -from common.layer_test_class import check_ir_version from common.tf2_layer_test_class import CommonTF2LayerTest -from unit_tests.utils.graph import build_graph class TestKerasMinimum(CommonTF2LayerTest): @@ -27,62 +25,6 @@ def create_keras_minimum_net(self, input_names, input_shapes, input_type, ir_ver # create reference IR net ref_net = None - op_name = "Minimum" - if check_ir_version(10, None, ir_version): - # convert NHWC to NCHW layout if tensor rank greater 3 - converted_input_shape = input_shapes[0].copy() - if len(converted_input_shape) > 3: - converted_input_shape[1] = input_shapes[0][-1] - converted_input_shape[2:] = input_shapes[0][1:-1] - if len(input_names) == 2: - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'input2': {'kind': 'op', 'type': 'Parameter'}, - 'input2_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'op': {'kind': 'op', 'type': op_name}, - 'op_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('input2', 'input2_data'), - ('input1_data', 'op', {'in': 0}), - ('input2_data', 'op', {'in': 1}), - ('op', 'op_data'), - ('op_data', 'result') - ]) - elif len(input_names) == 3: - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'input2': {'kind': 'op', 'type': 'Parameter'}, - 'input2_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'op1': {'kind': 'op', 'type': op_name}, - 'op1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'input3': {'kind': 'op', 'type': 'Parameter'}, - 'input3_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'op2': {'kind': 'op', 'type': op_name}, - 'op2_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('input2', 'input2_data'), - ('input1_data', 'op1', {'in': 0}), - ('input2_data', 'op1', {'in': 1}), - ('op1', 'op1_data'), - ('input3', 'input3_data'), - ('op1_data', 'op2', {'in': 0}), - ('input3_data', 'op2', {'in': 1}), - ('op2', 'op2_data'), - ('op2_data', 'result') - ]) - else: - AssertionError("Not supported case with input number greater 2") - return tf2_net, ref_net test_data_float32_precommit = [ diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_multiply.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_multiply.py index c906ed4c7cf0b6..e3b62352978b9e 100644 --- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_multiply.py +++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_multiply.py @@ -1,11 +1,9 @@ -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import pytest import tensorflow as tf -from common.layer_test_class import check_ir_version from common.tf2_layer_test_class import CommonTF2LayerTest -from unit_tests.utils.graph import build_graph class TestKerasMultiply(CommonTF2LayerTest): @@ -27,62 +25,6 @@ def create_keras_multiply_net(self, input_names, input_shapes, input_type, ir_ve # create reference IR net ref_net = None - op_name = "Multiply" - if check_ir_version(10, None, ir_version): - # convert NHWC to NCHW layout if tensor rank greater 3 - converted_input_shape = input_shapes[0].copy() - if len(converted_input_shape) > 3: - converted_input_shape[1] = input_shapes[0][-1] - converted_input_shape[2:] = input_shapes[0][1:-1] - if len(input_names) == 2: - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'input2': {'kind': 'op', 'type': 'Parameter'}, - 'input2_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'op': {'kind': 'op', 'type': op_name}, - 'op_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('input2', 'input2_data'), - ('input1_data', 'op', {'in': 0}), - ('input2_data', 'op', {'in': 1}), - ('op', 'op_data'), - ('op_data', 'result') - ]) - elif len(input_names) == 3: - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'input2': {'kind': 'op', 'type': 'Parameter'}, - 'input2_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'op1': {'kind': 'op', 'type': op_name}, - 'op1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'input3': {'kind': 'op', 'type': 'Parameter'}, - 'input3_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'op2': {'kind': 'op', 'type': op_name}, - 'op2_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('input2', 'input2_data'), - ('input1_data', 'op1', {'in': 0}), - ('input2_data', 'op1', {'in': 1}), - ('op1', 'op1_data'), - ('input3', 'input3_data'), - ('op1_data', 'op2', {'in': 0}), - ('input3_data', 'op2', {'in': 1}), - ('op2', 'op2_data'), - ('op2_data', 'result') - ]) - else: - AssertionError("Not supported case with input number greater 2") - return tf2_net, ref_net test_data_float32_precommit = [ diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_relu.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_relu.py index 09d62264875faf..9df3eb5b5daf20 100644 --- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_relu.py +++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_relu.py @@ -1,11 +1,9 @@ -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import pytest import tensorflow as tf -from common.layer_test_class import check_ir_version from common.tf2_layer_test_class import CommonTF2LayerTest -from unit_tests.utils.graph import build_graph class TestKerasRelu(CommonTF2LayerTest): @@ -26,26 +24,6 @@ def create_keras_relu_net(self, input_names, input_shapes, input_type, ir_versio # create reference IR net ref_net = None - if check_ir_version(10, None, ir_version): - # convert NHWC to NCHW layout if tensor rank greater 3 - converted_input_shape = input_shapes[0].copy() - if len(converted_input_shape) > 3: - converted_input_shape[1] = input_shapes[0][-1] - converted_input_shape[2:] = input_shapes[0][1:-1] - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'relu': {'kind': 'op', 'type': 'ReLU'}, - 'relu_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('input1_data', 'relu', {'in': 0}), - ('relu', 'relu_data'), - ('relu_data', 'result')]) - return tf2_net, ref_net test_data_float32_precommit = [ diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_softmax.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_softmax.py index c889a69246489a..486df7dafafbd3 100644 --- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_softmax.py +++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_softmax.py @@ -1,11 +1,9 @@ -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import pytest import tensorflow as tf -from common.layer_test_class import check_ir_version from common.tf2_layer_test_class import CommonTF2LayerTest -from unit_tests.utils.graph import build_graph class TestKerasSoftmax(CommonTF2LayerTest): @@ -26,26 +24,6 @@ def create_keras_softmax_net(self, input_names, input_shapes, input_type, ir_ver # create reference IR net ref_net = None - if check_ir_version(10, None, ir_version): - # convert NHWC to NCHW layout if tensor rank greater 3 - converted_input_shape = input_shapes[0].copy() - if len(converted_input_shape) > 3: - converted_input_shape[1] = input_shapes[0][-1] - converted_input_shape[2:] = input_shapes[0][1:-1] - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'softmax': {'kind': 'op', 'type': 'SoftMax'}, - 'softmax_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('input1_data', 'softmax', {'in': 0}), - ('softmax', 'softmax_data'), - ('softmax_data', 'result')]) - return tf2_net, ref_net test_data_float32_precommit = [ diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_softplus.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_softplus.py index 9ad45afb9e22d4..11b2493355376b 100644 --- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_softplus.py +++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_softplus.py @@ -1,11 +1,9 @@ -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import pytest import tensorflow as tf -from common.layer_test_class import check_ir_version from common.tf2_layer_test_class import CommonTF2LayerTest -from unit_tests.utils.graph import build_graph class TestKerasSoftplus(CommonTF2LayerTest): @@ -26,26 +24,6 @@ def create_keras_softplus_net(self, input_names, input_shapes, input_type, ir_ve # create reference IR net ref_net = None - if check_ir_version(10, None, ir_version): - # convert NHWC to NCHW layout if tensor rank greater 3 - converted_input_shape = input_shapes[0].copy() - if len(converted_input_shape) > 3: - converted_input_shape[1] = input_shapes[0][-1] - converted_input_shape[2:] = input_shapes[0][1:-1] - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'softplus': {'kind': 'op', 'type': 'SoftPlus'}, - 'softplus_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('input1_data', 'softplus', {'in': 0}), - ('softplus', 'softplus_data'), - ('softplus_data', 'result')]) - return tf2_net, ref_net test_data_float32_precommit = [ diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_subtract.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_subtract.py index 70ea1ad9151f84..7c9f13868c0d3f 100644 --- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_subtract.py +++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_subtract.py @@ -1,11 +1,9 @@ -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import pytest import tensorflow as tf -from common.layer_test_class import check_ir_version from common.tf2_layer_test_class import CommonTF2LayerTest -from unit_tests.utils.graph import build_graph class TestKerasSubtract(CommonTF2LayerTest): @@ -27,32 +25,6 @@ def create_keras_subtract_net(self, input_names, input_shapes, input_type, ir_ve # create reference IR net ref_net = None - op_name = "Subtract" - if check_ir_version(10, None, ir_version): - # convert NHWC to NCHW layout if tensor rank greater 3 - converted_input_shape = input_shapes[0].copy() - if len(converted_input_shape) > 3: - converted_input_shape[1] = input_shapes[0][-1] - converted_input_shape[2:] = input_shapes[0][1:-1] - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'input2': {'kind': 'op', 'type': 'Parameter'}, - 'input2_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'op': {'kind': 'op', 'type': op_name}, - 'op_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('input2', 'input2_data'), - ('input1_data', 'op', {'in': 0}), - ('input2_data', 'op', {'in': 1}), - ('op', 'op_data'), - ('op_data', 'result') - ]) - return tf2_net, ref_net test_data_float32_precommit = [ diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_swish.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_swish.py index 6981e0a89887e9..e168bf2d655fb7 100644 --- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_swish.py +++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_swish.py @@ -1,11 +1,9 @@ -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import pytest import tensorflow as tf -from common.layer_test_class import check_ir_version from common.tf2_layer_test_class import CommonTF2LayerTest -from unit_tests.utils.graph import build_graph class TestKerasSWish(CommonTF2LayerTest): @@ -26,26 +24,6 @@ def create_keras_swish_net(self, input_names, input_shapes, input_type, ir_versi # create reference IR net ref_net = None - if check_ir_version(10, None, ir_version): - # convert NHWC to NCHW layout if tensor rank greater 3 - converted_input_shape = input_shapes[0].copy() - if len(converted_input_shape) > 3: - converted_input_shape[1] = input_shapes[0][-1] - converted_input_shape[2:] = input_shapes[0][1:-1] - nodes_attributes = { - 'input1': {'kind': 'op', 'type': 'Parameter'}, - 'input1_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'swish': {'kind': 'op', 'type': 'Swish'}, - 'swish_data': {'shape': converted_input_shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input1', 'input1_data'), - ('input1_data', 'swish', {'in': 0}), - ('swish', 'swish_data'), - ('swish_data', 'result')]) - return tf2_net, ref_net test_data_float32_precommit = [ diff --git a/tests/layer_tests/tensorflow_tests/test_tf_MatrixInverse.py b/tests/layer_tests/tensorflow_tests/test_tf_MatrixInverse.py new file mode 100644 index 00000000000000..c18de12d0ec7e7 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_MatrixInverse.py @@ -0,0 +1,66 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + +class TestMatrixInverse(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'input:0' in inputs_info + inputs_data = {} + inputs_data['input:0'] = self._generate_invertible_matrices(self.input_shape) + + return inputs_data + + def _generate_invertible_matrices(self, input_shape): + if input_shape == [2, 2]: + return np.array([[1, 2], + [3, 1] + ], dtype=np.float32) + elif input_shape == [3, 3]: + return np.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 1] + ], dtype=np.float32) + elif input_shape == [4, 4]: + return np.array([[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 2, 1], + [13, 14, 2, 1] + ], dtype=np.float32) + elif input_shape == [2, 4, 4]: + return np.array([[[10, 2, 3, 4], + [5, 10, 7, 8], + [9, 10, 11, 12], + [13, 14, 15, 16]], + [[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 6, 12], + [13, 14, 15, 10]] + ], dtype=np.float32) + + def create_matrix_inverse_net(self, input_shape, adjoint): + self.input_shape = input_shape + tf.compat.v1.reset_default_graph() + with tf.compat.v1.Session() as sess: + input_tensor = tf.compat.v1.placeholder(np.float32, input_shape, 'input') + tf.raw_ops.MatrixInverse(input=input_tensor, adjoint=adjoint) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + @pytest.mark.parametrize("input_shape", [[2, 2], [3, 3], [2, 4, 4]]) + @pytest.mark.parametrize("adjoint", [None, False, True]) + @pytest.mark.precommit + @pytest.mark.nightly + def test_matrix_inverse_basic(self, input_shape, adjoint, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): + if ie_device == 'GPU': + pytest.skip("GPU does not support Inverse operation") + if adjoint: + pytest.skip("TF FE does not support MatrixInverse with adjoint equal to True") + self._test(*self.create_matrix_inverse_net(input_shape=input_shape, adjoint=adjoint), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_legacy_frontend=use_legacy_frontend) \ No newline at end of file diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py b/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py index 2dfbe97acc0483..82f85f204ff458 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py @@ -4,11 +4,8 @@ import platform import pytest -from common.layer_test_class import check_ir_version from common.tf_layer_test_class import CommonTFLayerTest -from unit_tests.utils.graph import build_graph - class TestPooling(CommonTFLayerTest): def create_pooling_net(self, kernel_size, strides, pads, in_shape, out_shape, method, @@ -39,10 +36,10 @@ def create_pooling_net(self, kernel_size, strides, pads, in_shape, out_shape, me if method == 'max': tf.raw_ops.MaxPool(input=input, ksize=kernel, strides=stride, padding=padding, - name='Operation') + name='Operation') elif method == 'avg': tf.raw_ops.AvgPool(value=input, ksize=kernel, strides=stride, padding=padding, - name='Operation') + name='Operation') # 5D tensors elif len(in_shape) == 5: @@ -54,10 +51,10 @@ def create_pooling_net(self, kernel_size, strides, pads, in_shape, out_shape, me if method == 'max': tf.raw_ops.MaxPool3D(input=input, ksize=kernel, strides=stride, padding=padding, - name='Operation') + name='Operation') elif method == 'avg': tf.raw_ops.AvgPool3D(input=input, ksize=kernel, strides=stride, padding=padding, - name='Operation') + name='Operation') tf.compat.v1.global_variables_initializer() tf_net = sess.graph_def @@ -72,10 +69,10 @@ def create_pooling_net(self, kernel_size, strides, pads, in_shape, out_shape, me in_shape=[1, 3, 224, 224], out_shape=[1, 3, 224, 224], method=method), pytest.param( - dict(kernel_size=[2, 2], strides=[2, 2], pads=[[0, 0], [0, 0], 'SAME'], - in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 112], - method=method), - marks=pytest.mark.precommit), + dict(kernel_size=[2, 2], strides=[2, 2], pads=[[0, 0], [0, 0], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 112], + method=method), + marks=pytest.mark.precommit), dict(kernel_size=[2, 4], strides=[2, 4], pads=[[0, 0], [0, 0], 'SAME'], in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 56], method=method), @@ -98,10 +95,10 @@ def create_pooling_net(self, kernel_size, strides, pads, in_shape, out_shape, me in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 75], method=method), pytest.param( - dict(kernel_size=[111, 111], strides=[111, 111], - pads=[[54, 54], [55, 55], 'SAME'], - in_shape=[1, 3, 224, 224], out_shape=[1, 3, 3, 3], method=method), - marks=pytest.mark.precommit), + dict(kernel_size=[111, 111], strides=[111, 111], + pads=[[54, 54], [55, 55], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 3, 3], method=method), + marks=pytest.mark.precommit), dict(kernel_size=[111, 113], strides=[111, 113], pads=[[54, 1], [55, 1], 'SAME'], in_shape=[1, 3, 224, 224], out_shape=[1, 3, 3, 2], method=method), @@ -119,9 +116,9 @@ def create_pooling_net(self, kernel_size, strides, pads, in_shape, out_shape, me dict(kernel_size=[2, 2], strides=[2, 2], pads=[[0, 0], [0, 0], 'VALID'], in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 112], method=method), pytest.param( - dict(kernel_size=[2, 4], strides=[2, 4], pads=[[0, 0], [0, 0], 'VALID'], - in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 56], method=method), - marks=pytest.mark.precommit), + dict(kernel_size=[2, 4], strides=[2, 4], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 56], method=method), + marks=pytest.mark.precommit), dict(kernel_size=[4, 2], strides=[4, 2], pads=[[0, 0], [0, 0], 'VALID'], in_shape=[1, 3, 224, 224], out_shape=[1, 3, 56, 112], method=method), dict(kernel_size=[2, 3], strides=[2, 3], pads=[[0, 0], [0, 0], 'VALID'], @@ -161,9 +158,9 @@ def test_pool_4D(self, params, ie_device, precision, ir_version, temp_dir, use_l [dict(kernel_size=[1, 1, 1], strides=[1, 1, 1], pads=[[0, 0, 0], [0, 0, 0], 'SAME'], in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 224, 224, 224], method=method), pytest.param( - dict(kernel_size=[2, 2, 2], strides=[2, 2, 2], pads=[[0, 0, 0], [0, 0, 0], 'SAME'], - in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 112], method=method), - marks=pytest.mark.precommit), + dict(kernel_size=[2, 2, 2], strides=[2, 2, 2], pads=[[0, 0, 0], [0, 0, 0], 'SAME'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 112], method=method), + marks=pytest.mark.precommit), dict(kernel_size=[2, 2, 4], strides=[2, 2, 4], pads=[[0, 0, 0], [0, 0, 0], 'SAME'], in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 56], method=method), dict(kernel_size=[4, 2, 2], strides=[4, 2, 2], pads=[[0, 0, 0], [0, 0, 0], 'SAME'], @@ -195,9 +192,9 @@ def test_pool_4D(self, params, ie_device, precision, ir_version, temp_dir, use_l [dict(kernel_size=[1, 1, 1], strides=[1, 1, 1], pads=[[0, 0, 0], [0, 0, 0], 'VALID'], in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 224, 224, 224], method=method), pytest.param( - dict(kernel_size=[2, 2, 2], strides=[2, 2, 2], pads=[[0, 0, 0], [0, 0, 0], 'VALID'], - in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 112], method=method), - marks=pytest.mark.precommit), + dict(kernel_size=[2, 2, 2], strides=[2, 2, 2], pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 112], method=method), + marks=pytest.mark.precommit), dict(kernel_size=[2, 2, 4], strides=[2, 2, 4], pads=[[0, 0, 0], [0, 0, 0], 'VALID'], in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 56], method=method), dict(kernel_size=[4, 2, 2], strides=[4, 2, 2], pads=[[0, 0, 0], [0, 0, 0], 'VALID'], diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ReLU6.py b/tests/layer_tests/tensorflow_tests/test_tf_ReLU6.py index 005cd886d4f1e8..42bfbc9ccea6d8 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_ReLU6.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_ReLU6.py @@ -2,11 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest -from common.layer_test_class import check_ir_version from common.tf_layer_test_class import CommonTFLayerTest -from unit_tests.utils.graph import build_graph - class TestReLU6(CommonTFLayerTest): def create_relu6_net(self, shape, ir_version, use_legacy_frontend): @@ -25,22 +22,6 @@ def create_relu6_net(self, shape, ir_version, use_legacy_frontend): ref_net = None - if check_ir_version(10, None, ir_version) and not use_legacy_frontend: - nodes_attributes = { - 'input': {'kind': 'op', 'type': 'Parameter'}, - 'input_data': {'shape': shape, 'kind': 'data'}, - 'ReLU6': {'kind': 'op', 'type': 'Clamp', "max": 6, "min": 0}, - 'ReLU6_data': {'shape': shape, 'kind': 'data'}, - 'result': {'kind': 'op', 'type': 'Result'} - } - - ref_net = build_graph(nodes_attributes, - [('input', 'input_data'), - ('input_data', 'ReLU6'), - ('ReLU6', 'ReLU6_data'), - ('ReLU6_data', 'result') - ]) - return tf_net, ref_net test_data_precommit = [dict(shape=[1, 3, 50, 100, 224])] diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Resample_pattern_new.py b/tests/layer_tests/tensorflow_tests/test_tf_Resample_pattern_new.py index 4088b471bf435f..0e25cf0c05fcf1 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Resample_pattern_new.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Resample_pattern_new.py @@ -5,8 +5,6 @@ import pytest from common.tf_layer_test_class import CommonTFLayerTest -from unit_tests.utils.graph import build_graph - class TestResamplePattern(CommonTFLayerTest): def _prepare_input(self, inputs_dict): @@ -39,31 +37,7 @@ def create_resample_net(self, shape, factor, use_legacy_frontend): tf.compat.v1.global_variables_initializer() tf_net = sess.graph_def - # - # Create reference IR net - # Please, specify 'type': 'Input' for input node - # Moreover, do not forget to validate ALL layer attributes!!! - # - ref_net = None - if not use_legacy_frontend: - new_shape = shape.copy() - new_shape[2] *= factor - new_shape[3] *= factor - nodes_attributes = { - 'input': {'kind': 'op', 'type': 'Input'}, - 'input_data': {'shape': shape, 'kind': 'data'}, - 'resample': {'kind': 'op', 'type': 'caffe.ResampleParameter.NEAREST', - "factor": factor, - "height": 0, "width": 0, "antialias": 0}, - 'resample_data': {'shape': new_shape, 'kind': 'data'}, - } - - ref_net = build_graph(nodes_attributes, - [('input', 'input_data'), - ('input_data', 'resample'), - ('resample', 'resample_data') - ]) return tf_net, ref_net diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Round.py b/tests/layer_tests/tensorflow_tests/test_tf_Round.py new file mode 100644 index 00000000000000..dde79166a16f3a --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Round.py @@ -0,0 +1,31 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + +class TestRound(CommonTFLayerTest): + def create_tf_round_net(self, input_shape, input_type): + tf.compat.v1.reset_default_graph() + with tf.compat.v1.Session() as sess: + input = tf.compat.v1.placeholder(input_type, input_shape, 'input') + tf.raw_ops.Round(x=input) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + ref_net = None + + return tf_net, ref_net + + @pytest.mark.parametrize("input_shape", [[6], [2, 5, 3], [10, 5, 1, 5]]) + @pytest.mark.parametrize("input_type", [tf.float32, tf.int32, tf.int64, tf.float64]) + @pytest.mark.precommit + @pytest.mark.nightly + def test_round_basic(self, input_shape, input_type, ie_device, precision, + ir_version, temp_dir, use_legacy_frontend): + if ie_device == 'GPU' and input_type in [tf.int32, tf.int64]: + pytest.skip("GPU error: Requested activation is not supported for integer type") + self._test(*self.create_tf_round_net(input_shape, input_type), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_legacy_frontend=use_legacy_frontend) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_StringToHashBucketFast.py b/tests/layer_tests/tensorflow_tests/test_tf_StringToHashBucketFast.py new file mode 100644 index 00000000000000..08812fe7b46228 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_StringToHashBucketFast.py @@ -0,0 +1,60 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import platform + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest +from common.utils.tf_utils import run_in_jenkins + +rng = np.random.default_rng() + + +class TestStringToHashBucketFast(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'input:0' in inputs_info + input_shape = inputs_info['input:0'] + inputs_data = {} + sample_data = rng.choice(self.strings_dictionary, input_shape) + inputs_data['input:0'] = sample_data + return inputs_data + + def create_string_to_hash_bucket_fast_net(self, input_shape, strings_dictionary, num_buckets): + self.strings_dictionary = strings_dictionary + + tf.compat.v1.reset_default_graph() + with tf.compat.v1.Session() as sess: + input = tf.compat.v1.placeholder(tf.string, input_shape, 'input') + tf.raw_ops.StringToHashBucketFast(input=input, num_buckets=num_buckets) + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + ref_net = None + + return tf_net, ref_net + + @pytest.mark.parametrize("input_shape", [[], [2], [3, 4], [1, 3, 2]]) + @pytest.mark.parametrize("num_buckets", [1, 4, 7, 11]) + @pytest.mark.parametrize("strings_dictionary", + [['UPPER CASE SENTENCE', 'lower case sentence', ' UppEr LoweR CAse SENtence', ' '], + ['Первое Предложение', 'второе предложение', ' ', ' ТРЕТЬЕ ПРЕДЛОЖЕНИЕ '], + ['第一句話在這裡', '第二句話在這裡', '第三句話在這裡'], + ['', ' ', '12345 ']]) + @pytest.mark.precommit + @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ['arm', 'armv7l', + 'aarch64', + 'arm64', 'ARM64'], + reason='Ticket - 126314, 132699') + def test_string_to_hash_bucket_fast(self, input_shape, num_buckets, strings_dictionary, ie_device, precision, + ir_version, temp_dir, + use_legacy_frontend): + if ie_device == 'GPU' or run_in_jenkins(): + pytest.skip("operation extension is not supported on GPU") + self._test(*self.create_string_to_hash_bucket_fast_net(input_shape=input_shape, num_buckets=num_buckets, + strings_dictionary=strings_dictionary), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_legacy_frontend=use_legacy_frontend) diff --git a/tests/model_hub_tests/pytorch/torch_utils.py b/tests/model_hub_tests/pytorch/torch_utils.py index 2baa62f5cbdb9a..09826b058c7855 100644 --- a/tests/model_hub_tests/pytorch/torch_utils.py +++ b/tests/model_hub_tests/pytorch/torch_utils.py @@ -80,7 +80,7 @@ def convert_model_impl(self, model_obj): gm = graph.module() print(gm.code) - decoder = TorchFXPythonDecoder(gm, gm) + decoder = TorchFXPythonDecoder(gm) decoder._input_signature = list(self.example.keys()) ov_model = convert_model(decoder, verbose=True) else: diff --git a/thirdparty/telemetry b/thirdparty/telemetry new file mode 160000 index 00000000000000..58e16c257a512e --- /dev/null +++ b/thirdparty/telemetry @@ -0,0 +1 @@ +Subproject commit 58e16c257a512ec7f451c9fccf9ff455065b285b diff --git a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py index c1696c9898ec23..cb56fe77f84e21 100644 --- a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +++ b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py @@ -48,7 +48,7 @@ def extract_module_extensions(args): if version.parse(torch.__version__) >= version.parse("2.2"): model = model.run_decompositions() gm = model.module() - decoder = TorchFXPythonDecoder(gm, gm) + decoder = TorchFXPythonDecoder(gm) else: decoder = TorchScriptPythonDecoder( model,