diff --git a/.github/actions/setup_python/action.yml b/.github/actions/setup_python/action.yml index 6ead25c2f2e310..0d9138bc643d2a 100644 --- a/.github/actions/setup_python/action.yml +++ b/.github/actions/setup_python/action.yml @@ -23,7 +23,19 @@ runs: using: 'composite' steps: - - if: ${{ runner.os == 'Linux' && inputs.self-hosted-runner == 'true' }} + - name: Check if Python is already installed (Linux) + if: ${{ runner.os == 'Linux' }} + shell: bash + id: check_python + run: | + PYTHON_INSTALLED=$(python${{ inputs.version }} -V) || true + if [[ $PYTHON_INSTALLED ]]; then + echo "installed=true" >> $GITHUB_OUTPUT + else + echo "installed=false" >> $GITHUB_OUTPUT + fi + + - if: ${{ runner.os == 'Linux' && inputs.self-hosted-runner == 'true' && steps.check_python.outputs.installed == 'false' }} name: Install 'actions/setup-python@v4' dependencies shell: bash run: apt-get update && apt-get install -y ca-certificates software-properties-common gpg-agent tzdata @@ -31,18 +43,18 @@ runs: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input TZ: "Europe/London" # to prevent tzdata from waiting user input - - if: ${{ runner.os == 'Linux' && runner.arch == 'ARM64' }} + - if: ${{ runner.os == 'Linux' && runner.arch == 'ARM64' && steps.check_python.outputs.installed == 'false' }} name: Setup sudo and python3 shell: bash run: apt-get update && apt-get install -y sudo python3 # Needed for the deadsnakes action - - if: ${{ runner.os == 'Linux' && runner.arch == 'ARM64' }} + - if: ${{ runner.os == 'Linux' && runner.arch == 'ARM64' && steps.check_python.outputs.installed == 'false' }} name: Setup Python ${{ inputs.version }} uses: akashchi/deadsnakes-action@92417281055a5878a0450f240a5b95883eb2d7e2 with: python-version: ${{ inputs.version }} - - if: ${{ runner.os == 'macOS' || runner.os == 'Windows' || (runner.os == 'Linux' && runner.arch != 'ARM64') }} + - if: ${{ runner.os == 'macOS' || runner.os == 'Windows' || (runner.os == 'Linux' && runner.arch != 'ARM64' && steps.check_python.outputs.installed == 'false' ) }} name: Setup Python ${{ inputs.version }} uses: actions/setup-python@v5 with: diff --git a/.github/dockerfiles/docker_tag b/.github/dockerfiles/docker_tag index 452490c748b579..1143a81836cd5a 100644 --- a/.github/dockerfiles/docker_tag +++ b/.github/dockerfiles/docker_tag @@ -1 +1 @@ -pr-24689 \ No newline at end of file +pr-24742 \ No newline at end of file diff --git a/.github/dockerfiles/ov_build/ubuntu_20_04_x64_nvidia/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_20_04_x64_nvidia/Dockerfile new file mode 100644 index 00000000000000..c192227085e901 --- /dev/null +++ b/.github/dockerfiles/ov_build/ubuntu_20_04_x64_nvidia/Dockerfile @@ -0,0 +1,89 @@ +FROM openvinogithubactions.azurecr.io/dockerhub/nvidia/cuda:11.8.0-runtime-ubuntu20.04 + +USER root + +# APT configuration +RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf && \ + echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::Fix-Broken "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::no-install-recommends "true";' >> /etc/apt/apt.conf + +ENV DEBIAN_FRONTEND="noninteractive" \ + TZ="Europe/London" + +RUN apt-get update && \ + apt-get install software-properties-common && \ + add-apt-repository --yes --no-update ppa:git-core/ppa && \ + add-apt-repository --yes --no-update ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install \ + curl \ + wget \ + git \ + ca-certificates \ + gpg-agent \ + tzdata \ + # Pythons + python3.8-dev \ + python3.8-venv \ + python3.8-distutils \ + python3.11-dev \ + python3.11-venv \ + python3.11-distutils \ + # For Java API + default-jdk \ + # Compiler \ + gcc-10 \ + g++-10 \ + && \ + rm -rf /var/lib/apt/lists/* + +# Install build dependencies +ADD install_build_dependencies.sh /install_build_dependencies.sh +RUN chmod +x /install_build_dependencies.sh && \ + /install_build_dependencies.sh && \ + rm -rf /var/lib/apt/lists/* + +# Set gcc-10 as a default compiler +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 30 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 30 + +# Install sscache +ARG SCCACHE_VERSION="v0.7.5" +ENV SCCACHE_HOME="/opt/sccache" \ + SCCACHE_PATH="/opt/sccache/sccache" + +RUN mkdir ${SCCACHE_HOME} && cd ${SCCACHE_HOME} && \ + SCCACHE_ARCHIVE="sccache-${SCCACHE_VERSION}-x86_64-unknown-linux-musl.tar.gz" && \ + curl -SLO https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/${SCCACHE_ARCHIVE} && \ + tar -xzf ${SCCACHE_ARCHIVE} --strip-components=1 && rm ${SCCACHE_ARCHIVE} + +# Install CUDA +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \ + mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \ + add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" +RUN apt update && apt install -y \ + libcudnn8=8.9.4.*-1+cuda11.8 \ + libcudnn8-dev=8.9.4.*-1+cuda11.8 \ + libcudnn8-samples=8.9.4.*-1+cuda11.8 \ + cuda-runtime-11-8 \ + cuda-11-8 \ + libcutensor1=1.6.1.5-1 \ + libcutensor-dev=1.6.1.5-1 \ + cuda-drivers=520.61.05-1 && \ + rm -rf /var/lib/apt/lists/* + +# Setup pip +ENV PIP_VERSION="24.0" +RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ + python3.8 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ + python3.11 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ + rm -f get-pip.py + +# Use Python 3.11 as default instead of Python 3.8 +# Using venv here 'cause other methods to switch the default Python on Ubuntu 20 break both system and wheels build +RUN python3.11 -m venv venv +ENV PATH="/venv/bin:$SCCACHE_HOME:$PATH" + +ENV PIP_CACHE_DIR=/mount/caches/pip/linux/${PIP_VERSION} diff --git a/.github/dockerfiles/ov_test/ubuntu_20_04_arm64/Dockerfile b/.github/dockerfiles/ov_test/ubuntu_20_04_arm64/Dockerfile index b872db9dc05bfb..68d80858dac63e 100644 --- a/.github/dockerfiles/ov_test/ubuntu_20_04_arm64/Dockerfile +++ b/.github/dockerfiles/ov_test/ubuntu_20_04_arm64/Dockerfile @@ -48,3 +48,4 @@ RUN python3.11 -m venv venv ENV PATH="/venv/bin:$PATH" ENV PIP_CACHE_DIR=/mount/caches/pip/linux/${PIP_VERSION} +ENV PIP_INSTALL_PATH=/venv/lib/python3.11/site-packages diff --git a/.github/dockerfiles/ov_test/ubuntu_20_04_x64/Dockerfile b/.github/dockerfiles/ov_test/ubuntu_20_04_x64/Dockerfile new file mode 100644 index 00000000000000..0a151be1e6876d --- /dev/null +++ b/.github/dockerfiles/ov_test/ubuntu_20_04_x64/Dockerfile @@ -0,0 +1,52 @@ +FROM openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 + +USER root + +# APT configuration +RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf && \ + echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::Fix-Broken "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::no-install-recommends "true";' >> /etc/apt/apt.conf + +ENV DEBIAN_FRONTEND="noninteractive" \ + TZ="Europe/London" + +RUN apt-get update && \ + apt-get install software-properties-common && \ + add-apt-repository --yes --no-update ppa:git-core/ppa && \ + add-apt-repository --yes --no-update ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install \ + curl \ + git \ + ca-certificates \ + gpg-agent \ + tzdata \ + # Python + python3.11-dev \ + python3.11-venv \ + python3.11-distutils \ + libhdf5-dev \ + && \ + rm -rf /var/lib/apt/lists/* + +# Install build dependencies +ADD install_build_dependencies.sh /install_build_dependencies.sh +RUN chmod +x /install_build_dependencies.sh && \ + /install_build_dependencies.sh && \ + rm -rf /var/lib/apt/lists/* + +# Setup pip +ENV PIP_VERSION="24.0" +RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ + python3.8 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ + python3.11 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ + rm -f get-pip.py + +# Use Python 3.11 as default instead of Python 3.8 +# Using venv here 'cause other methods to switch the default Python on Ubuntu 20 break both system and wheels build +RUN python3.11 -m venv venv +ENV PATH="/venv/bin:$SCCACHE_HOME:$PATH" + +ENV PIP_CACHE_DIR=/mount/caches/pip/linux/${PIP_VERSION} +ENV PIP_INSTALL_PATH=/venv/lib/python3.11/site-packages diff --git a/.github/dockerfiles/ov_test/ubuntu_22_04_x64/Dockerfile b/.github/dockerfiles/ov_test/ubuntu_22_04_x64/Dockerfile new file mode 100644 index 00000000000000..1566c2305d04b7 --- /dev/null +++ b/.github/dockerfiles/ov_test/ubuntu_22_04_x64/Dockerfile @@ -0,0 +1,52 @@ +FROM openvinogithubactions.azurecr.io/dockerhub/ubuntu:22.04 + +USER root + +# APT configuration +RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf && \ + echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::Fix-Broken "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::no-install-recommends "true";' >> /etc/apt/apt.conf + +ENV DEBIAN_FRONTEND="noninteractive" \ + TZ="Europe/London" + +RUN apt-get update && \ + apt-get install software-properties-common && \ + add-apt-repository --yes --no-update ppa:git-core/ppa && \ + add-apt-repository --yes --no-update ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install \ + curl \ + git \ + ca-certificates \ + gpg-agent \ + tzdata \ + # Python + python3.11-dev \ + python3.11-venv \ + python3.11-distutils \ + libhdf5-dev \ + && \ + rm -rf /var/lib/apt/lists/* + +# Install build dependencies +ADD install_build_dependencies.sh /install_build_dependencies.sh +RUN chmod +x /install_build_dependencies.sh && \ + /install_build_dependencies.sh && \ + rm -rf /var/lib/apt/lists/* + +# Setup pip +ENV PIP_VERSION="24.0" +RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ + python3 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ + python3.11 get-pip.py --no-cache-dir pip==${PIP_VERSION} && \ + rm -f get-pip.py + +# Use Python 3.11 as default +# Using venv here 'cause other methods to switch the default Python on Ubuntu 20 break both system and wheels build +RUN python3.11 -m venv venv +ENV PATH="/venv/bin:$SCCACHE_HOME:$PATH" + +ENV PIP_CACHE_DIR=/mount/caches/pip/linux/${PIP_VERSION} +ENV PIP_INSTALL_PATH=/venv/lib/python3.11/site-packages diff --git a/.github/scripts/workflow_rerun/errors_to_look_for.json b/.github/scripts/workflow_rerun/errors_to_look_for.json index 26f29ee31c08a7..8ae2a203ba7332 100644 --- a/.github/scripts/workflow_rerun/errors_to_look_for.json +++ b/.github/scripts/workflow_rerun/errors_to_look_for.json @@ -50,5 +50,9 @@ { "error_text": "The requested URL returned error: 500", "ticket": 139384 + }, + { + "error_text": "Unable to fetch some archives", + "ticket": 130965 } ] \ No newline at end of file diff --git a/.github/workflows/job_cpu_functional_tests.yml b/.github/workflows/job_cpu_functional_tests.yml index f8a82031af621b..bfa0514950db83 100644 --- a/.github/workflows/job_cpu_functional_tests.yml +++ b/.github/workflows/job_cpu_functional_tests.yml @@ -30,10 +30,6 @@ jobs: PARALLEL_TEST_SCRIPT: ${{ github.workspace }}/install/tests/functional_test_utils/layer_tests_summary/run_parallel.py PARALLEL_TEST_CACHE: ${{ github.workspace }}/install/tests/test_cache.lst steps: - - name: Set apt retries - if: runner.os == 'Linux' - run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - - name: Download OpenVINO package uses: actions/download-artifact@v4 with: @@ -64,10 +60,6 @@ jobs: tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR popd - - name: Install OpenVINO dependencies (Linux) - if: runner.os == 'Linux' - run: $INSTALL_DIR/install_dependencies/install_openvino_dependencies.sh -c=core -c=dev -c=gpu -y - - name: Fetch setup_python action uses: actions/checkout@v4 with: diff --git a/.github/workflows/job_cxx_unit_tests.yml b/.github/workflows/job_cxx_unit_tests.yml index 2b2dd9e7572493..5ed1d17a27ca92 100644 --- a/.github/workflows/job_cxx_unit_tests.yml +++ b/.github/workflows/job_cxx_unit_tests.yml @@ -32,10 +32,6 @@ jobs: INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests steps: - - name: Set apt retries - if: runner.os == 'Linux' - run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - - name: Download OpenVINO package uses: actions/download-artifact@v4 with: @@ -63,10 +59,6 @@ jobs: tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR popd - - name: Install OpenVINO dependencies (Linux) - if: runner.os == 'Linux' - run: $INSTALL_DIR/install_dependencies/install_openvino_dependencies.sh -c=core -c=dev -c=gpu -y - # # Tests # diff --git a/.github/workflows/job_onnx_models_tests.yml b/.github/workflows/job_onnx_models_tests.yml index 8da73268e007cc..f919e2c0478662 100644 --- a/.github/workflows/job_onnx_models_tests.yml +++ b/.github/workflows/job_onnx_models_tests.yml @@ -35,10 +35,6 @@ jobs: ONNX_MODEL_ZOO_SHA: "5faef4c33eba0395177850e1e31c4a6a9e634c82" if: ${{ github.event_name != 'merge_group' }} steps: - - name: Set apt retries - if: runner.os == 'Linux' - run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - - name: Download OpenVINO package uses: actions/download-artifact@v4 with: @@ -70,27 +66,14 @@ jobs: tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR} popd - - name: Fetch setup_python action and model_zoo_preprocess script + - name: Fetch model_zoo_preprocess script uses: actions/checkout@v4 with: sparse-checkout: | - .github/actions/setup_python/action.yml src/frontends/onnx/tests/tests_python/model_zoo_preprocess.sh sparse-checkout-cone-mode: false path: 'openvino' - - name: Install dependencies - run: | - # install git (required to build pip deps from the sources) - apt-get update && apt-get install --assume-yes --no-install-recommends git ca-certificates git-lfs - - - name: Setup Python 3.11 - uses: ./openvino/.github/actions/setup_python - with: - version: '3.11' - should-setup-pip-paths: 'false' - self-hosted-runner: ${{ contains(inputs.runner, 'aks') }} - - name: Update Models run: bash ${OPENVINO_REPO}/src/frontends/onnx/tests/tests_python/model_zoo_preprocess.sh -d ${MODELS_SHARE_PATH} -o -s "${{ env.ONNX_MODEL_ZOO_SHA }}" diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index 2fe59dbdcf5d8c..f6d0c4ab255384 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -37,9 +37,6 @@ jobs: INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests steps: - - name: Set apt retries - if: runner.os == 'Linux' - run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - name: Download OpenVINO package uses: actions/download-artifact@v4 @@ -70,10 +67,6 @@ jobs: tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR popd - - name: Install OpenVINO dependencies (Linux) - if: runner.os == 'Linux' - run: $INSTALL_DIR/install_dependencies/install_openvino_dependencies.sh -c=core -c=dev -y -c=gpu - - name: Fetch setup_python action uses: actions/checkout@v4 with: diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index 16aa00873bc53b..5d339c8af0acec 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -140,11 +140,22 @@ jobs: USE_SYSTEM_CACHE: False OP_REPORT_FILE: ${{ env.INSTALL_TEST_DIR }}/TEST-torch_unsupported_ops.log + - name: PagedAttention Test + if: always() + run: | + export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch/test_pa_transformation.py -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -v --tb=short + env: + TYPE: ${{ inputs.event == 'schedule' && 'nightly' || 'precommit'}} + TEST_DEVICE: CPU + USE_SYSTEM_CACHE: False + OP_REPORT_FILE: ${{ env.INSTALL_TEST_DIR }}/TEST-torch_unsupported_ops.log + - name: Reformat unsupported ops file if: '!cancelled()' run: | python3 ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch/scripts/process_op_report.py ${INSTALL_TEST_DIR}/TEST-torch_unsupported_ops.log - + - name: Available storage after tests run: | echo "Available storage:" diff --git a/.github/workflows/job_samples_tests.yml b/.github/workflows/job_samples_tests.yml index 534ce33409c58c..3ca9719e75ebfe 100644 --- a/.github/workflows/job_samples_tests.yml +++ b/.github/workflows/job_samples_tests.yml @@ -31,10 +31,6 @@ jobs: INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests BUILD_DIR: ${{ github.workspace }}/build steps: - - name: Set apt retries - if: runner.os == 'Linux' - run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - - name: Download OpenVINO package uses: actions/download-artifact@v4 with: @@ -63,10 +59,6 @@ jobs: tar -xzf openvino_tests.tar.gz -C $INSTALL_DIR popd - - name: Install OpenVINO dependencies (Linux) - if: runner.os == 'Linux' - run: $INSTALL_DIR/install_dependencies/install_openvino_dependencies.sh -c=core -c=dev -y - - name: Install OpenVINO dependencies (mac) if: runner.os == 'macOS' run: brew install coreutils @@ -84,7 +76,6 @@ jobs: with: version: '3.11' should-setup-pip-paths: 'false' - self-hosted-runner: ${{ runner.os == 'Linux' }} - name: Build cpp samples - GCC run: $INSTALL_DIR/samples/cpp/build_samples.sh -i $INSTALL_DIR -b $BUILD_DIR/cpp_samples @@ -94,7 +85,7 @@ jobs: - name: Build cpp samples - Clang if: runner.os == 'Linux' run: | - apt-get install -y clang + apt-get update && apt-get install -y clang $INSTALL_DIR/samples/cpp/build_samples.sh -i $INSTALL_DIR -b $BUILD_DIR/cpp_samples_clang env: CMAKE_COMPILE_WARNING_AS_ERROR: 'ON' diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml index bb2efa0eec6049..01ed55d91d570f 100644 --- a/.github/workflows/job_tensorflow_layer_tests.yml +++ b/.github/workflows/job_tensorflow_layer_tests.yml @@ -41,10 +41,6 @@ jobs: INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests steps: - - name: Set apt retries - if: runner.os == 'Linux' - run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - - name: Download OpenVINO package uses: actions/download-artifact@v4 with: @@ -92,10 +88,6 @@ jobs: Expand-Archive openvino_tests.zip -DestinationPath ${{ env.INSTALL_DIR }} popd - - name: Install OpenVINO dependencies (Linux) - if: runner.os == 'Linux' - run: $INSTALL_DIR/install_dependencies/install_openvino_dependencies.sh -c=core -c=dev -y -c=gpu - - name: Fetch setup_python action uses: actions/checkout@v4 with: diff --git a/.github/workflows/job_tensorflow_models_tests.yml b/.github/workflows/job_tensorflow_models_tests.yml index 2dd36814d9c600..4635fabefbd13c 100644 --- a/.github/workflows/job_tensorflow_models_tests.yml +++ b/.github/workflows/job_tensorflow_models_tests.yml @@ -34,19 +34,6 @@ jobs: MODEL_HUB_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/model_hub_tests NUMBER_OF_REPLICAS: 2 steps: - - name: Check sudo - if: ${{ runner.os == 'Linux' }} - run: if [ "$(id -u)" -eq 0 ]; then apt update && apt --assume-yes install sudo; fi - - - name: Set apt retries - if: runner.os == 'Linux' - run: | - if [ "$(id -u)" -eq 0 ]; then - echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - else - sudo sh -c "echo 'Acquire::Retries \"10\";' >> /etc/apt/apt.conf.d/80-retries" - fi - - name: Download OpenVINO package uses: actions/download-artifact@v4 with: @@ -95,12 +82,6 @@ jobs: sparse-checkout-cone-mode: false path: 'openvino' - - name: Install dependencies - if: ${{ runner.os == 'Linux' }} - run: | - # install git (required to build pip deps from the sources) - sudo apt-get install --assume-yes --no-install-recommends g++ git ca-certificates wget - - name: Setup Python 3.11 uses: ./openvino/.github/actions/setup_python with: diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml index 5198ee5db996ae..721e4772b039ba 100644 --- a/.github/workflows/job_tokenizers.yml +++ b/.github/workflows/job_tokenizers.yml @@ -56,7 +56,6 @@ jobs: install_build_dependencies.sh - name: Setup Python ${{ env.PYTHON_VERSION }} - if: ${{ runner.os != 'Linux' }} # We do not need to install Python on Linux as we use Docker with it installed uses: ./.github/actions/setup_python with: version: ${{ env.PYTHON_VERSION }} diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 008c68854caf27..0a7a1b0375dd28 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -69,6 +69,8 @@ jobs: with: images: | ov_build/ubuntu_20_04_x64 + ov_build/ubuntu_20_04_x64_nvidia + ov_test/ubuntu_20_04_x64 registry: 'openvinogithubactions.azurecr.io' dockerfiles_root_dir: '.github/dockerfiles' changed_components: ${{ needs.smart_ci.outputs.changed_components }} @@ -308,22 +310,22 @@ jobs: image: 'openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04' Samples: - needs: [ Build, Smart_CI ] + needs: [ Docker, Build, Smart_CI ] if: fromJSON(needs.smart_ci.outputs.affected_components).samples uses: ./.github/workflows/job_samples_tests.yml with: runner: 'aks-linux-4-cores-16gb' - image: 'openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04' + image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_x64 }} affected-components: ${{ needs.smart_ci.outputs.affected_components }} JS_API: name: OpenVINO JS API - needs: [ Build, Smart_CI ] + needs: [ Docker, Build, Smart_CI ] if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API uses: ./.github/workflows/job_openvino_js.yml with: runner: 'aks-linux-4-cores-16gb' - container: '{"image": "openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04"}' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_x64 }}"}' Conformance: needs: [ Build, Smart_CI ] @@ -470,79 +472,79 @@ jobs: name: ONNX Models Tests if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test || fromJSON(needs.smart_ci.outputs.affected_components).ONNX_FE.test - needs: [ Build, Smart_CI ] + needs: [ Docker, Build, Smart_CI ] uses: ./.github/workflows/job_onnx_models_tests.yml with: runner: 'aks-linux-16-cores-32gb' - container: '{"image": "openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04", "volumes": ["/mount:/mount"]}' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_x64 }}", "volumes": ["/mount:/mount"]}' CXX_Unit_Tests: name: C++ unit tests - needs: [ Build, Smart_CI ] + needs: [ Docker, Build, Smart_CI ] uses: ./.github/workflows/job_cxx_unit_tests.yml with: runner: 'aks-linux-4-cores-16gb' - image: 'openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04' + image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_x64 }} affected-components: ${{ needs.smart_ci.outputs.affected_components }} Python_Unit_Tests: name: Python unit tests - needs: [ Build, Smart_CI ] + needs: [ Docker, Build, Smart_CI ] uses: ./.github/workflows/job_python_unit_tests.yml with: runner: 'aks-linux-4-cores-16gb' - container: '{"image": "openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04", "volumes": ["/mount:/mount"]}' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_x64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} TensorFlow_Layer_Tests: name: TensorFlow Layer Tests - needs: [ Build, Smart_CI, Openvino_tokenizers ] + needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ] uses: ./.github/workflows/job_tensorflow_layer_tests.yml with: runner: 'aks-linux-4-cores-16gb' shell: bash - container: '{"image": "openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04", "volumes": ["/mount:/mount"]}' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_x64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} CPU_Functional_Tests: name: CPU functional tests if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test - needs: [ Build, Smart_CI ] + needs: [ Docker, Build, Smart_CI ] uses: ./.github/workflows/job_cpu_functional_tests.yml with: runner: 'aks-linux-8-cores-32gb' - image: 'openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04' + image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_x64 }} TensorFlow_Models_Tests_Precommit: name: TensorFlow Models tests if: fromJSON(needs.smart_ci.outputs.affected_components).TF_FE.test || fromJSON(needs.smart_ci.outputs.affected_components).TFL_FE.test - needs: [ Build, Smart_CI, Openvino_tokenizers ] + needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ] uses: ./.github/workflows/job_tensorflow_models_tests.yml with: runner: 'aks-linux-8-cores-16gb' model_scope: 'precommit' - container: '{"image": "openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04", "volumes": ["/mount:/mount"]}' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_x64 }}", "volumes": ["/mount:/mount"]}' TensorFlow_Models_Tests_Nightly_TF_HUB: name: TensorFlow TF Hub Models tests if: ${{ github.event_name == 'schedule' }} - needs: [ Build, Smart_CI, Openvino_tokenizers ] + needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ] uses: ./.github/workflows/job_tensorflow_models_tests.yml with: runner: 'aks-linux-8-cores-32gb' model_scope: 'nightly_tf_hub' - container: '{"image": "openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04", "volumes": ["/mount:/mount"]}' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_x64 }}", "volumes": ["/mount:/mount"]}' TensorFlow_Models_Tests_Nightly_HF: name: TensorFlow Hugging Face Models tests if: ${{ github.event_name == 'schedule' }} - needs: [ Build, Smart_CI, Openvino_tokenizers ] + needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ] uses: ./.github/workflows/job_tensorflow_models_tests.yml with: runner: 'aks-linux-8-cores-32gb' model_scope: 'nightly_hf' - container: '{"image": "openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04", "volumes": ["/mount:/mount"]}' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_x64 }}", "volumes": ["/mount:/mount"]}' # TODO: Switch back to self-hosted runners # container: @@ -560,14 +562,14 @@ jobs: NVIDIA_Plugin: name: NVIDIA plugin - needs: [ Build, Smart_CI ] + needs: [ Docker, Build, Smart_CI ] timeout-minutes: 15 defaults: run: shell: bash runs-on: aks-linux-16-cores-32gb container: - image: openvinogithubactions.azurecr.io/dockerhub/nvidia/cuda:11.8.0-runtime-ubuntu20.04 + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_x64_nvidia }} volumes: - /mount:/mount options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING @@ -591,20 +593,6 @@ jobs: if: fromJSON(needs.smart_ci.outputs.affected_components).NVIDIA steps: - - name: Set apt retries - run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - - - name: Fetch install_build_dependencies.sh - uses: actions/checkout@v4 - with: - sparse-checkout: | - install_build_dependencies.sh - sparse-checkout-cone-mode: false - path: ${{ env.OPENVINO_REPO }} - - - name: Install Prerequisites - run: apt update && apt install -y git ca-certificates - - name: Download OpenVINO package uses: actions/download-artifact@v4 with: @@ -634,38 +622,6 @@ jobs: path: ${{ env.OPENVINO_CONTRIB_REPO }} ref: 'master' - # - # Dependencies - # - - - name: Install build dependencies - run: | - ${OPENVINO_REPO}/install_build_dependencies.sh - apt -y --no-install-recommends install software-properties-common curl - - - name: Install sccache - uses: mozilla-actions/sccache-action@v0.0.4 - with: - version: "v0.7.5" - - - name: Install CUDA - run: | - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin - mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 - - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub - add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" - apt update - apt install -y \ - libcudnn8=8.9.4.*-1+cuda11.8 \ - libcudnn8-dev=8.9.4.*-1+cuda11.8 \ - libcudnn8-samples=8.9.4.*-1+cuda11.8 \ - cuda-runtime-11-8 \ - cuda-11-8 \ - libcutensor1=1.6.1.5-1 \ - libcutensor-dev=1.6.1.5-1 \ - cuda-drivers=520.61.05-1 - # # Build # diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index bebf06fdb1d230..97e8c533b43ca5 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -66,6 +66,7 @@ jobs: with: images: | ov_build/ubuntu_22_04_x64_cc + ov_test/ubuntu_22_04_x64 registry: 'openvinogithubactions.azurecr.io' dockerfiles_root_dir: '.github/dockerfiles' changed_components: ${{ needs.smart_ci.outputs.changed_components }} @@ -333,11 +334,11 @@ jobs: CPU_Functional_Tests: name: CPU functional tests if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test - needs: [ Build, Smart_CI ] + needs: [ Docker, Build, Smart_CI ] uses: ./.github/workflows/job_cpu_functional_tests.yml with: runner: 'aks-linux-8-cores-32gb' - image: 'openvinogithubactions.azurecr.io/dockerhub/ubuntu:22.04' + image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_22_04_x64 }} Overall_Status: name: ci/gha_overall_status_linux_cc diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index 66d814c863acf5..d4a517de57eef2 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -401,7 +401,7 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OpImplTests.xml - name: AUTO unit tests - if: ${{ 'false' }} # Ticket: 134423 + if: always() run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_auto_unit_tests --gtest_print_time=1 \ diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst b/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst index 7096284df6cd49..058b93f3b9dd75 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst @@ -155,7 +155,7 @@ Step 2: Install OpenVINO Runtime Using the APT Package Manager .. code-block:: sh - sudo apt install openvino-2024.0.0 + sudo apt install openvino-2024.2.0 .. note:: @@ -228,7 +228,7 @@ To uninstall OpenVINO Runtime via APT, run the following command based on your n .. code-block:: sh - sudo apt autoremove openvino-2024.0.0 + sudo apt autoremove openvino-2024.2.0 What's Next? diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-conan.rst b/docs/articles_en/get-started/install-openvino/install-openvino-conan.rst index b66e5f993bf8b4..38e5871af20ff3 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-conan.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-conan.rst @@ -52,7 +52,7 @@ Installing OpenVINO Runtime with Conan Package Manager .. code-block:: sh [requires] - openvino/2024.1.0 + openvino/2024.2.0 [generators] CMakeDeps CMakeToolchain diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst b/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst index 123a52586621b4..d5461348e35112 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst @@ -64,7 +64,7 @@ Installing OpenVINO Runtime with Anaconda Package Manager .. code-block:: sh - conda install -c conda-forge openvino=2024.1.0 + conda install -c conda-forge openvino=2024.2.0 Congratulations! You've just Installed OpenVINO! For some use cases you may still need to install additional components. Check the description below, as well as the @@ -115,7 +115,7 @@ with the proper OpenVINO version number: .. code-block:: sh - conda remove openvino=2024.1.0 + conda remove openvino=2024.2.0 What's Next? ############################################################ diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst b/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst index 2d404187b16cd1..a5559e937e50d6 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst @@ -128,7 +128,7 @@ Install OpenVINO Runtime .. code-block:: sh - sudo yum install openvino-2024.0.0 + sudo yum install openvino-2024.2.0 @@ -199,7 +199,7 @@ To uninstall OpenVINO Runtime via YUM, run the following command based on your n .. code-block:: sh - sudo yum autoremove openvino-2024.0.0 + sudo yum autoremove openvino-2024.2.0 diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst b/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst index ca61d35844222e..20166da049fee7 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst @@ -143,7 +143,7 @@ To uninstall OpenVINO Runtime via ZYPPER, run the following command based on you .. code-block:: sh - sudo zypper remove *openvino-2024.0.0* + sudo zypper remove *openvino-2024.2.0* diff --git a/docs/articles_en/learn-openvino/openvino-samples/bert-benchmark.rst b/docs/articles_en/learn-openvino/openvino-samples/bert-benchmark.rst index 43c703a47bed36..eb710813f0e579 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/bert-benchmark.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/bert-benchmark.rst @@ -9,7 +9,7 @@ Bert Benchmark Python Sample This sample demonstrates how to estimate performance of a Bert model using Asynchronous -Inference Request API. Unlike `demos `__ this sample does not have +Inference Request API. Unlike `demos `__ this sample does not have configurable command line arguments. Feel free to modify sample's source code to try out different options. diff --git a/docs/articles_en/learn-openvino/openvino-samples/get-started-demos.rst b/docs/articles_en/learn-openvino/openvino-samples/get-started-demos.rst index a0137b0ee25d8f..c5c48a5319a266 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/get-started-demos.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/get-started-demos.rst @@ -264,7 +264,7 @@ You need a model that is specific for your inference task. You can get it from o Convert the Model -------------------- -If Your model requires conversion, check the `article `__ for information how to do it. +If Your model requires conversion, check the `article `__ for information how to do it. .. _download-media: diff --git a/docs/articles_en/learn-openvino/openvino-samples/hello-nv12-input-classification.rst b/docs/articles_en/learn-openvino/openvino-samples/hello-nv12-input-classification.rst index 888e4fd142c3c3..9c48454afc0802 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/hello-nv12-input-classification.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/hello-nv12-input-classification.rst @@ -211,6 +211,6 @@ Additional Resources - :doc:`Get Started with Samples ` - :doc:`Using OpenVINO Samples <../openvino-samples>` - :doc:`Convert a Model <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api>` -- `API Reference `__ +- `API Reference `__ - `Hello NV12 Input Classification C++ Sample on Github `__ - `Hello NV12 Input Classification C Sample on Github `__ diff --git a/docs/articles_en/learn-openvino/openvino-samples/sync-benchmark.rst b/docs/articles_en/learn-openvino/openvino-samples/sync-benchmark.rst index d706c2fb22f197..d001e5c018e93d 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/sync-benchmark.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/sync-benchmark.rst @@ -11,7 +11,7 @@ Sync Benchmark Sample This sample demonstrates how to estimate performance of a model using Synchronous Inference Request API. It makes sense to use synchronous inference only in latency oriented scenarios. Models with static input shapes are supported. Unlike -`demos `__ this sample does not have other configurable command-line +`demos `__ this sample does not have other configurable command-line arguments. Feel free to modify sample's source code to try out different options. Before using the sample, refer to the following requirements: diff --git a/docs/articles_en/learn-openvino/openvino-samples/throughput-benchmark.rst b/docs/articles_en/learn-openvino/openvino-samples/throughput-benchmark.rst index 9e11cdfc6e9be0..6fbdaf8dd999ea 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/throughput-benchmark.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/throughput-benchmark.rst @@ -9,7 +9,7 @@ Throughput Benchmark Sample This sample demonstrates how to estimate performance of a model using Asynchronous -Inference Request API in throughput mode. Unlike `demos `__ this sample +Inference Request API in throughput mode. Unlike `demos `__ this sample does not have other configurable command-line arguments. Feel free to modify sample's source code to try out different options. diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/general-optimizations.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/general-optimizations.rst index d7520f57315ab0..4f37cc350924fd 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/general-optimizations.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/general-optimizations.rst @@ -62,7 +62,7 @@ Below are example-codes for the regular and async-based approaches to compare: The technique can be generalized to any available parallel slack. For example, you can do inference and simultaneously encode the resulting or previous frames or run further inference, like emotion detection on top of the face detection results. -Refer to the `Object Detection C++ Demo `__ , `Object Detection Python Demo `__ (latency-oriented Async API showcase) and :doc:`Benchmark App Sample <../../../learn-openvino/openvino-samples/benchmark-tool>` for complete examples of the Async API in action. +Refer to the `Object Detection C++ Demo `__ , `Object Detection Python Demo `__ (latency-oriented Async API showcase) and :doc:`Benchmark App Sample <../../../learn-openvino/openvino-samples/benchmark-tool>` for complete examples of the Async API in action. .. note:: diff --git a/docs/dev/pypi_publish/pypi-openvino-dev.md b/docs/dev/pypi_publish/pypi-openvino-dev.md index 54f943515015d2..24f9c35b1e4831 100644 --- a/docs/dev/pypi_publish/pypi-openvino-dev.md +++ b/docs/dev/pypi_publish/pypi-openvino-dev.md @@ -119,10 +119,10 @@ For example, to install and configure the components for working with TensorFlow **In addition, the openvino-dev package installs the following components by default:** -| Component | Console Script | Description | +| Component | Console Script | Description | |------------------|---------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [Legacy Model conversion API](https://docs.openvino.ai/nightly/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) | `mo` |**Model conversion API** imports, converts, and optimizes models that were trained in popular frameworks to a format usable by OpenVINO components.
Supported frameworks include Caffe\*, TensorFlow\*, MXNet\*, PaddlePaddle\*, and ONNX\*. | | -| [Model Downloader and other Open Model Zoo tools](https://docs.openvino.ai/nightly/omz_tools_downloader.html)| `omz_downloader`
`omz_converter`
`omz_quantizer`
`omz_info_dumper`| **Model Downloader** is a tool for getting access to the collection of high-quality and extremely fast pre-trained deep learning [public](@ref omz_models_group_public) and [Intel](@ref omz_models_group_intel)-trained models. These free pre-trained models can be used to speed up the development and production deployment process without training your own models. The tool downloads model files from online sources and, if necessary, patches them to make them more usable with model conversion API. A number of additional tools are also provided to automate the process of working with downloaded models:
**Model Converter** is a tool for converting Open Model Zoo models that are stored in an original deep learning framework format into the OpenVINO Intermediate Representation (IR) using model conversion API.
**Model Quantizer** is a tool for automatic quantization of full-precision models in the IR format into low-precision versions using the Post-Training Optimization Tool.
**Model Information Dumper** is a helper utility for dumping information about the models to a stable, machine-readable format. | +| [Legacy Model conversion API](https://docs.openvino.ai/2024/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api.html) | `mo` |**Model conversion API** imports, converts, and optimizes models that were trained in popular frameworks to a format usable by OpenVINO components.
Supported frameworks include Caffe\*, TensorFlow\*, MXNet\*, PaddlePaddle\*, and ONNX\*. | | +| [Model Downloader and other Open Model Zoo tools](https://docs.openvino.ai/2024/omz_tools_downloader.html)| `omz_downloader`
`omz_converter`
`omz_quantizer`
`omz_info_dumper`| **Model Downloader** is a tool for getting access to the collection of high-quality and extremely fast pre-trained deep learning [public](@ref omz_models_group_public) and [Intel](@ref omz_models_group_intel)-trained models. These free pre-trained models can be used to speed up the development and production deployment process without training your own models. The tool downloads model files from online sources and, if necessary, patches them to make them more usable with model conversion API. A number of additional tools are also provided to automate the process of working with downloaded models:
**Model Converter** is a tool for converting Open Model Zoo models that are stored in an original deep learning framework format into the OpenVINO Intermediate Representation (IR) using model conversion API.
**Model Quantizer** is a tool for automatic quantization of full-precision models in the IR format into low-precision versions using the Post-Training Optimization Tool.
**Model Information Dumper** is a helper utility for dumping information about the models to a stable, machine-readable format. | ## Troubleshooting diff --git a/docs/home.rst b/docs/home.rst index 08b2e8d62cc340..b4f63f81d7da09 100644 --- a/docs/home.rst +++ b/docs/home.rst @@ -1,5 +1,5 @@ ============================ -OpenVINO 2024.1 +OpenVINO 2024.2 ============================ .. meta:: diff --git a/samples/cpp/benchmark/sync_benchmark/README.md b/samples/cpp/benchmark/sync_benchmark/README.md index b9c884f93fcfef..b1eb079216064d 100644 --- a/samples/cpp/benchmark/sync_benchmark/README.md +++ b/samples/cpp/benchmark/sync_benchmark/README.md @@ -8,14 +8,12 @@ For more detailed information on how this sample works, check the dedicated [art | Options | Values | | -------------------------------| -------------------------------------------------------------------------------------------------------------------------| -| Validated Models | [alexnet](https://docs.openvino.ai/nightly/omz_models_model_alexnet.html), | -| | [googlenet-v1](https://docs.openvino.ai/nightly/omz_models_model_googlenet_v1.html), | -| | [yolo-v3-tf](https://docs.openvino.ai/nightly/omz_models_model_yolo_v3_tf.html), | -| | [face-detection-0200](https://docs.openvino.ai/nightly/omz_models_model_face_detection_0200.html) | +| Validated Models | [yolo-v3-tf](https://docs.openvino.ai/2024/omz_models_model_yolo_v3_tf.html), | +| | [face-detection-0200](https://docs.openvino.ai/2024/omz_models_model_face_detection_0200.html) | | Model Format | OpenVINO™ toolkit Intermediate Representation | | | (\*.xml + \*.bin), ONNX (\*.onnx) | -| Supported devices | [All](https://docs.openvino.ai/2024/about-openvino/compatibility-and-support/supported-devices.html) | -| Other language realization | [Python](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/sync-benchmark.html) | +| Supported devices | [All](https://docs.openvino.ai/2024/about-openvino/compatibility-and-support/supported-devices.html) | +| Other language realization | [Python](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/sync-benchmark.html) | The following C++ API is used in the application: diff --git a/samples/cpp/benchmark/throughput_benchmark/README.md b/samples/cpp/benchmark/throughput_benchmark/README.md index ca022b64cc1e7b..43633498321c1e 100644 --- a/samples/cpp/benchmark/throughput_benchmark/README.md +++ b/samples/cpp/benchmark/throughput_benchmark/README.md @@ -10,14 +10,12 @@ For more detailed information on how this sample works, check the dedicated [art | Options | Values | | ----------------------------| -------------------------------------------------------------------------------------------------------------------------------| -| Validated Models | [alexnet](https://docs.openvino.ai/nightly/omz_models_model_alexnet.html), | -| | [googlenet-v1](https://docs.openvino.ai/nightly/omz_models_model_googlenet_v1.html), | -| | [yolo-v3-tf](https://docs.openvino.ai/nightly/omz_models_model_yolo_v3_tf.html), | -| | [face-detection-](https://docs.openvino.ai/nightly/omz_models_model_face_detection_0200.html) | +| Validated Models | [yolo-v3-tf](https://docs.openvino.ai/2024/omz_models_model_yolo_v3_tf.html), | +| | [face-detection-](https://docs.openvino.ai/2024/omz_models_model_face_detection_0200.html) | | Model Format | OpenVINO™ toolkit Intermediate Representation | | | (\*.xml + \*.bin), ONNX (\*.onnx) | -| Supported devices | [All](https://docs.openvino.ai/2024/about-openvino/compatibility-and-support/supported-devices.html) | -| Other language realization | [Python](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/throughput-benchmark.html) | +| Supported devices | [All](https://docs.openvino.ai/2024/about-openvino/compatibility-and-support/supported-devices.html) | +| Other language realization | [Python](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/throughput-benchmark.html) | The following C++ API is used in the application: diff --git a/samples/cpp/hello_reshape_ssd/README.md b/samples/cpp/hello_reshape_ssd/README.md index ba79849d3b80ba..bc346e850cf5ba 100644 --- a/samples/cpp/hello_reshape_ssd/README.md +++ b/samples/cpp/hello_reshape_ssd/README.md @@ -9,10 +9,10 @@ For more detailed information on how this sample works, check the dedicated [art | Options | Values | | ----------------------------| -----------------------------------------------------------------------------------------------------------------------------------------| -| Validated Models | [person-detection-retail-0013](https://docs.openvino.ai/nightly/omz_models_model_person_detection_retail_0013.html) | +| Validated Models | [person-detection-retail-0013](https://docs.openvino.ai/2024/omz_models_model_person_detection_retail_0013.html) | | Model Format | OpenVINO™ toolkit Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx) | -| Supported devices | [All](https://docs.openvino.ai/2024/about-openvino/compatibility-and-support/supported-devices.html) | -| Other language realization | [Python](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/hello-reshape-ssd.html) | +| Supported devices | [All](https://docs.openvino.ai/2024/about-openvino/compatibility-and-support/supported-devices.html) | +| Other language realization | [Python](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/hello-reshape-ssd.html) | The following C++ API is used in the application: diff --git a/src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp b/src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp index c6a58dbf91b9c1..590c9f2171efcc 100644 --- a/src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp +++ b/src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp @@ -95,6 +95,7 @@ class LP_TRANSFORMATIONS_API DataPrecision { } } + // the lowest value (example, for signed symetric types: -max) static float getMinValue(const element::Type precision, const size_t levels) { switch (precision) { case element::u4: @@ -134,6 +135,8 @@ class LP_TRANSFORMATIONS_API DataPrecision { break; case element::f16: return -1.0e15f; + case element::bf16: + return -3.38953139e38f; case element::f32: return std::numeric_limits::lowest(); default: @@ -172,6 +175,8 @@ class LP_TRANSFORMATIONS_API DataPrecision { return 2147483648.f; // 2147483648.f == 2147483647.f case element::f16: return 1.0e15f; + case element::bf16: + return 3.38953139e38f; case element::f32: return std::numeric_limits::max(); default: diff --git a/src/common/low_precision_transformations/src/eliminate_fake_quantize.cpp b/src/common/low_precision_transformations/src/eliminate_fake_quantize.cpp index 3010ea213d04f2..cb5d9270a43768 100644 --- a/src/common/low_precision_transformations/src/eliminate_fake_quantize.cpp +++ b/src/common/low_precision_transformations/src/eliminate_fake_quantize.cpp @@ -93,9 +93,13 @@ bool check_interval(const std::shared_ptr& fq, bool check_intervals(const std::shared_ptr& fakeQuantize) { const auto& element_type = fakeQuantize->get_output_element_type(0); const auto levels = fakeQuantize->get_levels(); + if (levels == 0) { + return false; + } const auto min_value = DataPrecision::getMinValue(element_type, levels); const auto max_value = DataPrecision::getMaxValue(element_type, levels); - const auto max_diff = (max_value - min_value) / levels; + // let's divide before to avoid overflow + const auto max_diff = max_value / levels - min_value / levels; // input intervals can be not equal with type intervals for low precision only const auto exact_comparison = !element_type.is_integral(); diff --git a/src/common/low_precision_transformations/tests/eliminate_fake_quantize_transformation.cpp b/src/common/low_precision_transformations/tests/eliminate_fake_quantize_transformation.cpp index e33739832a6d4a..d4a55513306f5f 100644 --- a/src/common/low_precision_transformations/tests/eliminate_fake_quantize_transformation.cpp +++ b/src/common/low_precision_transformations/tests/eliminate_fake_quantize_transformation.cpp @@ -25,14 +25,12 @@ class TransformationTestValues { public: class Actual { public: - ov::element::Type precisionBefore; ov::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData1; ov::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData2; }; class Expected { public: - ov::element::Type precisionBefore; ov::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData1; ov::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData2; ov::builder::subgraph::DequantizationOperations dequantizationOperations2; @@ -44,17 +42,28 @@ class TransformationTestValues { Expected expected; }; +typedef std::tuple< + ov::element::Type, + TransformationTestValues +> EliminateFakeQuantizeTransformationParams; + class EliminateFakeQuantizeTransformation : public LayerTransformation, - public testing::WithParamInterface { + public testing::WithParamInterface { public: void SetUp() override { - const TransformationTestValues testValues = GetParam(); + const ov::element::Type execPrecision = std::get<0>(GetParam()); + TransformationTestValues testValues = std::get<1>(GetParam()); + + if (!testValues.expected.dequantizationOperations2.multiply.empty()) { + testValues.expected.dequantizationOperations2.multiply.outPrecision = execPrecision; + } actualFunction = ov::builder::subgraph::FuseFakeQuantizeFunction::get(testValues.inputShape, - testValues.actual.precisionBefore, - testValues.actual.fakeQuantizeOnData1, - testValues.actual.fakeQuantizeOnData2, - {}); + execPrecision, + testValues.actual.fakeQuantizeOnData1, + testValues.actual.fakeQuantizeOnData2, + {}); + SimpleLowPrecisionTransformer transformer; transformer.add( testValues.params); @@ -67,20 +76,28 @@ class EliminateFakeQuantizeTransformation : public LayerTransformation, referenceFunction = ov::builder::subgraph::FuseFakeQuantizeFunction::get(testValues.inputShape, - testValues.expected.precisionBefore, - testValues.expected.fakeQuantizeOnData1, - testValues.expected.fakeQuantizeOnData2, - testValues.expected.dequantizationOperations2); + execPrecision, + testValues.expected.fakeQuantizeOnData1, + testValues.expected.fakeQuantizeOnData2, + testValues.expected.dequantizationOperations2); + } - static std::string getTestCaseName(testing::TestParamInfo obj) { - const TransformationTestValues testValues = obj.param; + static std::string getTestCaseName(testing::TestParamInfo obj) { + const ov::element::Type execPrecision = std::get<0>(obj.param); + TransformationTestValues testValues = std::get<1>(obj.param); + + if (!testValues.expected.dequantizationOperations2.multiply.empty()) { + testValues.expected.dequantizationOperations2.multiply.outPrecision = execPrecision; + } std::ostringstream result; result << testValues.inputShape << "_" << testValues.params.updatePrecisions << "_" - << testValues.actual.precisionBefore << "_" << testValues.actual.fakeQuantizeOnData1 << "_" - << testValues.actual.fakeQuantizeOnData2 << "_" << testValues.expected.precisionBefore << "_" - << testValues.expected.fakeQuantizeOnData1 << "_" << testValues.expected.fakeQuantizeOnData2 << "_" + << execPrecision << "_" + << testValues.actual.fakeQuantizeOnData1 << "_" + << testValues.actual.fakeQuantizeOnData2 << "_" + << testValues.expected.fakeQuantizeOnData1 << "_" + << testValues.expected.fakeQuantizeOnData2 << "_" << testValues.expected.dequantizationOperations2; return result.str(); } @@ -100,12 +117,10 @@ const std::vector testValues = { {1, 3, 16, 16}, TestTransformationParams(true, {ov::element::u8}, {ov::element::i8}), { - element::f32, {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}} }, { - element::f32, {256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, element::u8}, {}, { ov::element::f32, {}, {{0.01f}, ov::element::f32, {}} } @@ -115,12 +130,10 @@ const std::vector testValues = { {1, 3, 16, 16}, TestTransformationParams(true, {ov::element::u8}, {ov::element::i8}), { - element::f32, {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, {256ul, {}, {0.f}, {2.549f}, {0.f}, {2.55f}} }, { - element::f32, {256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, element::u8}, {}, { ov::element::f32, {}, {{0.01f}, ov::element::f32, {}} } @@ -130,27 +143,35 @@ const std::vector testValues = { {1, 3, 16, 16}, TestTransformationParams(true, {ov::element::u8}, {ov::element::i8}), { - element::f32, {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f / 2.f}} }, { - element::f32, {256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, element::u8}, {}, { ov::element::f32, {}, {{0.005f}, ov::element::f32, {}} } } - }, + } +}; +// clang-format on + +INSTANTIATE_TEST_SUITE_P(smoke_LPT, + EliminateFakeQuantizeTransformation, + ::testing::Combine( + ::testing::ValuesIn({ov::element::f32, ov::element::bf16}), + ::testing::ValuesIn(testValues)), + EliminateFakeQuantizeTransformation::getTestCaseName); + +// clang-format off +const std::vector testValuesDiffFq = { { {1, 3, 16, 16}, TestTransformationParams(true, {ov::element::u8}, {ov::element::i8}), { - element::f32, {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, {256ul, {}, {0.f}, {2.55f / 2.f}, {0.f}, {2.55f / 2.f}} }, { - element::f32, {256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, element::u8}, {256ul, {}, {0.f}, {127.5f}, {0.f}, {255.f}, element::u8}, { ov::element::f32, {}, {{0.005f}, ov::element::f32, {}} } @@ -159,9 +180,11 @@ const std::vector testValues = { }; // clang-format on -INSTANTIATE_TEST_SUITE_P(smoke_LPT, +INSTANTIATE_TEST_SUITE_P(smoke_LPT_DiffFq, EliminateFakeQuantizeTransformation, - ::testing::ValuesIn(testValues), + ::testing::Combine( + ::testing::ValuesIn({ov::element::f32}), + ::testing::ValuesIn(testValuesDiffFq)), EliminateFakeQuantizeTransformation::getTestCaseName); } // namespace diff --git a/src/common/snippets/include/snippets/lowered/loop_info.hpp b/src/common/snippets/include/snippets/lowered/loop_info.hpp index ca28b27a760ac7..2bd1a4f8babc77 100644 --- a/src/common/snippets/include/snippets/lowered/loop_info.hpp +++ b/src/common/snippets/include/snippets/lowered/loop_info.hpp @@ -34,6 +34,12 @@ class LoopInfo { */ virtual std::shared_ptr clone_with_new_expr(const ExpressionMap& expr_map) const = 0; + /** + * @brief Check if some parameters of Loop are dynamic (undefined) + * @return True if some parameters of Loop are unknown, False if all parameters are static + */ + virtual bool is_dynamic() const; + /** * @brief Returns count of input ports * @return count @@ -184,6 +190,8 @@ class UnifiedLoopInfo : public LoopInfo { int64_t ptr_increment = 0; int64_t finalization_offset = 0; int64_t data_size = 0; + + bool is_dynamic() const; }; // The structure describes full information about port // - TODO [140365] : UnifiedLoopInfo should have the map of LoopPorts and LoopDesc as class field @@ -212,6 +220,12 @@ class UnifiedLoopInfo : public LoopInfo { */ std::shared_ptr clone_with_new_expr(const ExpressionMap& expr_map) const override; + /** + * @brief Check if some parameters of Loop are dynamic (undefined) + * @return True if some parameters of Loop are unknown, False if all parameters are static + */ + bool is_dynamic() const override; + /** * @brief Returns handlers of loop specific iterations * @return m_handlers @@ -373,6 +387,12 @@ class ExpandedLoopInfo : public LoopInfo { */ std::shared_ptr clone_with_new_expr(const ExpressionMap& expr_map) const override; + /** + * @brief Check if some parameters of Loop are dynamic (undefined) + * @return True if some parameters of Loop are unknown, False if all parameters are static + */ + bool is_dynamic() const override; + /** * @brief Returns original unified LoopInfo from which this LoopInfo was created * @return const reference of m_unified_loop_info diff --git a/src/common/snippets/include/snippets/lowered/pass/identify_buffers.hpp b/src/common/snippets/include/snippets/lowered/pass/identify_buffers.hpp index 31631b9b0ec638..2289ef0246e8ff 100644 --- a/src/common/snippets/include/snippets/lowered/pass/identify_buffers.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/identify_buffers.hpp @@ -6,6 +6,8 @@ #include "pass.hpp" +#include "snippets/utils.hpp" + namespace ov { namespace snippets { namespace lowered { @@ -46,6 +48,10 @@ class IdentifyBuffers: public RangedPass { int64_t ptr_increment = 0; int64_t finalization_offset = 0; + inline bool is_static() const { + return !utils::is_dynamic_value(ptr_increment) && !utils::is_dynamic_value(finalization_offset); + } + friend bool operator==(const ShiftPtrParams& lhs, const ShiftPtrParams& rhs); friend bool operator!=(const ShiftPtrParams& lhs, const ShiftPtrParams& rhs); }; diff --git a/src/common/snippets/include/snippets/lowered/pass/insert_loops.hpp b/src/common/snippets/include/snippets/lowered/pass/insert_loops.hpp index 1329fa22a6b572..1c86ccbbc835a3 100644 --- a/src/common/snippets/include/snippets/lowered/pass/insert_loops.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/insert_loops.hpp @@ -26,7 +26,6 @@ class InsertLoops : public RangedPass { bool run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) override; private: static void insertion(LinearIR& linear_ir, const LoopManagerPtr& loop_manager, size_t loop_id); - static bool is_loop_dynamic(const UnifiedLoopInfoPtr& loop_info); }; } // namespace pass diff --git a/src/common/snippets/include/snippets/op/loop.hpp b/src/common/snippets/include/snippets/op/loop.hpp index 1053dba7d2ad7f..2226110555ba97 100644 --- a/src/common/snippets/include/snippets/op/loop.hpp +++ b/src/common/snippets/include/snippets/op/loop.hpp @@ -40,26 +40,13 @@ class LoopBegin : public LoopBase { LoopBegin(); void validate_and_infer_types() override; + std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override; std::shared_ptr get_loop_end() const; protected: void validate_and_infer_types_except_LoopEnd(); }; -class LoopBeginStatic : public LoopBegin { -public: - OPENVINO_OP("LoopBeginStatic", "SnippetsOpset", LoopBegin); - LoopBeginStatic() = default; - std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override; -}; - -class LoopBeginDynamic : public LoopBegin { -public: - OPENVINO_OP("LoopBeginDynamic", "SnippetsOpset", LoopBegin); - LoopBeginDynamic() = default; - std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override; -}; - /** * @interface LoopEnd * @brief Marks the end of the Loop region and defines the loop properties. @@ -77,78 +64,50 @@ class LoopEnd : public LoopBase { public: OPENVINO_OP("LoopEnd", "SnippetsOpset", LoopBase); LoopEnd() = default; - LoopEnd(const Output& loop_begin, size_t work_amount_increment, std::vector is_incremented, + LoopEnd(const Output& loop_begin, size_t work_amount, size_t work_amount_increment, + std::vector is_incremented, std::vector ptr_increments, std::vector finalization_offsets, std::vector element_type_sizes, size_t input_num, size_t output_num, size_t id); void validate_and_infer_types() override; bool visit_attributes(AttributeVisitor& visitor) override; + std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override; + std::shared_ptr get_loop_begin(); const std::vector& get_is_incremented() const; + const std::vector& get_finalization_offsets() const; + const std::vector& get_ptr_increments() const; const std::vector& get_element_type_sizes() const; + size_t get_work_amount() const; size_t get_increment() const; size_t get_id() const; size_t get_input_num() const; size_t get_output_num() const; bool get_evaluate_once() const; + bool has_dynamic_params() const; void set_is_incremented(std::vector is_incremented); + void set_finalization_offsets(std::vector offsets); + void set_ptr_increments(std::vector new_ptr_increments); + void set_work_amount(size_t new_work_amount); void set_increment(size_t new_increment); void set_evaluate_once(bool once); void set_id(size_t id); protected: std::vector m_is_incremented = {}; + std::vector m_ptr_increments = {}; + std::vector m_finalization_offsets = {}; std::vector m_element_type_sizes = {}; + size_t m_work_amount = 0; size_t m_work_amount_increment = 0; size_t m_input_num = 0; size_t m_output_num = 0; size_t m_id = 0; // the corresponding Loop identificator in LoopManager -}; - -class LoopEndStatic : public LoopEnd { -public: - OPENVINO_OP("LoopEndStatic", "SnippetsOpset", LoopEnd); - LoopEndStatic() = default; - LoopEndStatic(const Output& loop_begin, size_t work_amount, size_t work_amount_increment, - std::vector is_incremented, std::vector ptr_increments, std::vector finalization_offsets, - std::vector element_type_sizes, size_t input_num, size_t output_num, size_t id); - std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override; - void validate_and_infer_types() override; - bool visit_attributes(AttributeVisitor& visitor) override; - - // update_ptr_increments resets non-zero increments to the new_increments. It's used when work_amount_increment is - // updated and we need to refresh ptr increments accordingly while respecting the broadcasting pattern - void update_ptr_increments(int64_t new_increment); - - const std::vector& get_finalization_offsets() const; - const std::vector& get_ptr_increments() const; - size_t get_work_amount() const; - bool get_evaluate_once() const; - - void set_finalization_offsets(std::vector offsets); - void set_ptr_increments(std::vector new_ptr_increments); - void set_work_amount(size_t new_work_amount); - void set_evaluate_once(bool once); - -protected: - std::vector m_ptr_increments = {}; - std::vector m_finalization_offsets = {}; - size_t m_work_amount = 0; bool m_evaluate_once = false; // true if the Loop is executed only once, used to skip setting and testing the loop counter }; -class LoopEndDynamic : public LoopEnd { -public: - OPENVINO_OP("LoopEndDynamic", "SnippetsOpset", LoopEnd); - LoopEndDynamic() = default; - LoopEndDynamic(const Output& loop_begin, size_t work_amount_increment, std::vector is_incremented, - std::vector element_type_sizes, size_t input_num, size_t output_num, size_t id); - - std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override; -}; - } // namespace op } // namespace snippets } // namespace ov diff --git a/src/common/snippets/src/lowered/loop_info.cpp b/src/common/snippets/src/lowered/loop_info.cpp index e26703c294881b..00b364132cd2f3 100644 --- a/src/common/snippets/src/lowered/loop_info.cpp +++ b/src/common/snippets/src/lowered/loop_info.cpp @@ -24,6 +24,10 @@ LoopInfo::LoopInfo(size_t work_amount, size_t increment, const std::vector LoopInfo::clone_loop_ports(const ExpressionMap& expr_map, return cloned_port_points; } +bool UnifiedLoopInfo::LoopPortDesc::is_dynamic() const { + return utils::is_dynamic_value(ptr_increment) || utils::is_dynamic_value(finalization_offset); +} + UnifiedLoopInfo::UnifiedLoopInfo(size_t work_amount, size_t increment, const std::vector& entries, const std::vector& exits, const SpecificIterationHandlers& handlers) @@ -173,6 +181,12 @@ std::shared_ptr UnifiedLoopInfo::clone_with_new_expr(const ExpressionM m_input_port_descs, m_output_port_descs, m_handlers); } +bool UnifiedLoopInfo::is_dynamic() const { + return LoopInfo::is_dynamic() || + std::any_of(m_input_port_descs.cbegin(), m_input_port_descs.cend(), [](const LoopPortDesc& shift) { return shift.is_dynamic(); }) || + std::any_of(m_output_port_descs.cbegin(), m_output_port_descs.cend(), [](const LoopPortDesc& shift) { return shift.is_dynamic(); }); +} + const SpecificIterationHandlers& UnifiedLoopInfo::get_handlers() const { return m_handlers; } @@ -307,7 +321,6 @@ void ExpandedLoopInfo::validate() const { "Incompatible data ptr shifts!"); } - std::shared_ptr ExpandedLoopInfo::clone_with_new_expr(const ExpressionMap& expr_map) const { const auto& new_input_ports = clone_loop_ports(expr_map, m_input_ports); const auto& new_output_ports = clone_loop_ports(expr_map, m_output_ports); @@ -316,6 +329,12 @@ std::shared_ptr ExpandedLoopInfo::clone_with_new_expr(const Expression m_ptr_increments, m_finalization_offsets, m_data_sizes, m_type, m_unified_loop_info); } +bool ExpandedLoopInfo::is_dynamic() const { + return LoopInfo::is_dynamic() || + std::any_of(m_ptr_increments.cbegin(), m_ptr_increments.cend(), [](size_t v) { return utils::is_dynamic_value(v); }) || + std::any_of(m_finalization_offsets.cbegin(), m_finalization_offsets.cend(), [](size_t v) { return utils::is_dynamic_value(v); }); +} + const std::shared_ptr& ExpandedLoopInfo::get_unified_loop_info() const { OPENVINO_ASSERT(m_unified_loop_info, "Failed to get unified loop info: it's nullptr"); return m_unified_loop_info; diff --git a/src/common/snippets/src/lowered/pass/clean_repeated_ptr_shifts.cpp b/src/common/snippets/src/lowered/pass/clean_repeated_ptr_shifts.cpp index e8aa00c426e279..9552cbfdfbee76 100644 --- a/src/common/snippets/src/lowered/pass/clean_repeated_ptr_shifts.cpp +++ b/src/common/snippets/src/lowered/pass/clean_repeated_ptr_shifts.cpp @@ -82,22 +82,16 @@ bool CleanRepeatedDataPointerShifts::reuse_increments(const LoopManagerPtr& loop // TODO [133463]: We have to update LoopEnd and LoopInfo since the both entities must be valid. // To avoid the both changes, we have to insert Loop ops to LinearIR in the end of pipeline. auto new_is_incremented = loop_end->get_is_incremented(); - if (const auto loop_end_dynamic = ov::as_type_ptr(loop_end_expr->get_node())) { - for (auto idx_to_drop : resetting_data_indexes) { - new_is_incremented[idx_to_drop] = false; - } - } else if (const auto loop_end_static = ov::as_type_ptr(loop_end_expr->get_node())) { - auto new_ptr_increments = loop_end_static->get_ptr_increments(); - auto new_finalization_offsets = loop_end_static->get_finalization_offsets(); - for (auto idx_to_drop : resetting_data_indexes) { - new_ptr_increments[idx_to_drop] = 0; - new_finalization_offsets[idx_to_drop] = 0; - new_is_incremented[idx_to_drop] = false; - } - loop_end_static->set_ptr_increments(new_ptr_increments); - loop_end_static->set_finalization_offsets(new_finalization_offsets); + auto new_ptr_increments = loop_end->get_ptr_increments(); + auto new_finalization_offsets = loop_end->get_finalization_offsets(); + for (auto idx_to_drop : resetting_data_indexes) { + new_is_incremented[idx_to_drop] = false; + new_ptr_increments[idx_to_drop] = 0; + new_finalization_offsets[idx_to_drop] = 0; } loop_end->set_is_incremented(new_is_incremented); + loop_end->set_ptr_increments(new_ptr_increments); + loop_end->set_finalization_offsets(new_finalization_offsets); const auto loop_info = loop_manager->get_loop_info(loop_end->get_id()); size_t loop_port_idx = 0; diff --git a/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp b/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp index 2b5162d8972e87..f3c34212072eac 100644 --- a/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp +++ b/src/common/snippets/src/lowered/pass/cleanup_loop_offsets.cpp @@ -5,7 +5,8 @@ #include "snippets/lowered/pass/cleanup_loop_offsets.hpp" #include "snippets/lowered/linear_ir.hpp" -#include "snippets/snippets_isa.hpp" +#include "snippets/op/loop.hpp" +#include "snippets/utils.hpp" #include "snippets/itt.hpp" namespace ov { @@ -18,7 +19,7 @@ bool CleanupLoopOffsets::run(lowered::LinearIR& linear_ir, lowered::LinearIR::co bool is_modified = false; for (auto expr_it = begin; expr_it != end; expr_it++) { const auto& node = expr_it->get()->get_node(); - if (auto loop_end = as_type_ptr(node)) { + if (auto loop_end = as_type_ptr(node)) { auto next_expr_it = std::next(expr_it); const auto& next_node = next_expr_it->get()->get_node(); // Note: Finalization offsets before the Result can be safely disregarded @@ -29,7 +30,7 @@ bool CleanupLoopOffsets::run(lowered::LinearIR& linear_ir, lowered::LinearIR::co loop_end->set_finalization_offsets(std::vector(fin_offsets.size(), 0)); is_modified = true; } - if (auto outer_loop_end = as_type_ptr(next_node)) { + if (auto outer_loop_end = as_type_ptr(next_node)) { const auto& is_incremented = loop_end->get_is_incremented(); const auto& data_sizes = loop_end->get_element_type_sizes(); auto fin_offsets = loop_end->get_finalization_offsets(); @@ -51,6 +52,8 @@ bool CleanupLoopOffsets::run(lowered::LinearIR& linear_ir, lowered::LinearIR::co if (found != per_port_connector_offset.end()) { if (!is_incremented[found->second] || outer_data_sizes[i] != data_sizes[found->second]) continue; + if (utils::is_dynamic_value(outer_ptr_increments[i]) || utils::is_dynamic_value(fin_offsets[found->second])) + continue; // Since data ptr is incremented on [ptr_increment x increment], // we should guarantee proportionality of ptr shifts. // If the data ptr can't be proportionally shifted, the optimization is not applied diff --git a/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp b/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp index 22bfe21c338e52..d093085dcc8922 100644 --- a/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp +++ b/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp @@ -6,6 +6,7 @@ #include "snippets/lowered/pass/identify_buffers.hpp" #include "snippets/pass/tokenization.hpp" +#include "snippets/utils.hpp" #include "snippets/itt.hpp" namespace ov { @@ -46,7 +47,7 @@ size_t DefineBufferClusters::get_cluster_buffer_id(const AllocateBuffers::Buffer DefineBufferClusters::BufferPorts DefineBufferClusters::get_input_buffers(const ExpressionPtr& loop_expr) const { BufferPorts input_buffers; - const auto loop_end = ov::as_type_ptr(loop_expr->get_node()); + const auto loop_end = ov::as_type_ptr(loop_expr->get_node()); const auto in_count = loop_end->get_input_num(); const auto& connectors = loop_expr->get_input_port_connectors(); @@ -66,7 +67,7 @@ DefineBufferClusters::BufferPorts DefineBufferClusters::get_input_buffers(const DefineBufferClusters::BufferPorts DefineBufferClusters::get_output_buffers(const ExpressionPtr& loop_expr) const { BufferPorts output_buffers; - const auto loop_end = ov::as_type_ptr(loop_expr->get_node()); + const auto loop_end = ov::as_type_ptr(loop_expr->get_node()); const auto in_count = loop_end->get_input_num(); const auto out_count = loop_end->get_output_num(); const auto& connectors = loop_expr->get_input_port_connectors(); @@ -85,7 +86,7 @@ DefineBufferClusters::BufferPorts DefineBufferClusters::get_output_buffers(const void DefineBufferClusters::parse_loop(const LinearIR::constExprIt& expr_it) { const auto& expr = *expr_it; - const auto loop_end = ov::as_type_ptr(expr->get_node()); + const auto loop_end = ov::as_type_ptr(expr->get_node()); const auto& ptr_increments = loop_end->get_ptr_increments(); const auto& final_offsets = loop_end->get_finalization_offsets(); const auto& data_sizes = loop_end->get_element_type_sizes(); @@ -110,19 +111,30 @@ void DefineBufferClusters::parse_loop(const LinearIR::constExprIt& expr_it) { continue; const auto input_buffer = ov::as_type_ptr(input_buffer_expr->get_node()); + + // If allocated sizes of buffers are unkown on compilation stage (dynamic), + // we cannot be sure that they're will be the same in runtime. + if ((utils::is_dynamic_value(input_buffer->get_byte_size()) || utils::is_dynamic_value(output_buffer->get_byte_size()))) + continue; + + // Memory can be reused if reading and writing are executed proportionally: + // - the same reading/writing order + // - the same buffer memory sizes + if ((input_buffer->get_byte_size() != output_buffer->get_byte_size()) || + (input_buffer_expr->get_output_port_descriptor(0)->get_layout() != output_buffer_expr->get_input_port_descriptor(0)->get_layout())) + continue; + + // Also memory can be reused if there are the same ShiftPtrParams (data size, final offsets, ptr increments) const auto& input_buffer_ports = in.second; for (const auto& input_buffer_port_idx : input_buffer_ports) { - // Memory can be reused if reading and writing are executed proportionally: - // - the same ShiftPtrParams (data size, final offsets, ptr increments) - // - the same reading/writing order - // - the same buffer memory sizes const auto input_params = ShiftPtrParams(data_sizes[input_buffer_port_idx], ptr_increments[input_buffer_port_idx], final_offsets[input_buffer_port_idx]); const auto output_params = ShiftPtrParams(data_sizes[output_buffer_port_idx], ptr_increments[output_buffer_port_idx], final_offsets[output_buffer_port_idx]); - if (input_buffer->get_byte_size() == output_buffer->get_byte_size() && - input_buffer_expr->get_output_port_descriptor(0)->get_layout() == output_buffer_expr->get_input_port_descriptor(0)->get_layout() && - input_params == output_params) { + + // If data pointer shift parameters are unknown on model compilation stage (dynamic), + // we cannot be sure that these data pointers will be proportionally shifted in runtime. + if (input_params.is_static() && output_params.is_static() && input_params == output_params) { const auto cluster_it = find_cluster_by_expr(input_buffer_expr); OPENVINO_ASSERT(cluster_it != m_clusters.end(), "Buffer on inputs of Loop must be already saved in clusters"); // Add to the existing cluster @@ -157,11 +169,15 @@ void DefineBufferClusters::parse_nested_loops(const BufferPorts& input_buffers, auto can_be_data_ptr_proportionally_shifted = [](int64_t outer_buffer_ptr_increment, int64_t outer_buffer_data_size, int64_t inner_buffer_final_offsets, int64_t inner_buffer_data_size) { + // If data pointer shift parameters are unknown on model compilation stage (dynamic), + // we cannot be sure that these data pointers will be proportionally shifted in runtime. + if (utils::is_dynamic_value(outer_buffer_ptr_increment) || utils::is_dynamic_value(inner_buffer_final_offsets)) + return false; return (outer_buffer_ptr_increment != 0) && ((inner_buffer_data_size * inner_buffer_final_offsets * -1) == outer_buffer_ptr_increment * outer_buffer_data_size); }; - const auto outer_loop_end = ov::as_type_ptr(outer_loop_end_expr_it->get()->get_node()); + const auto outer_loop_end = ov::as_type_ptr(outer_loop_end_expr_it->get()->get_node()); const auto outer_loop_begin = outer_loop_end->get_loop_begin(); const auto& outer_ptr_increments = outer_loop_end->get_ptr_increments(); const auto& outer_data_sizes = outer_loop_end->get_element_type_sizes(); @@ -218,7 +234,7 @@ int64_t DefineBufferClusters::get_buffer_finalization_offset(const ExpressionPtr const auto consumers = buffer_out->get_consumers(); for (const auto& consumer : consumers) { const auto consumer_expr = consumer.get_expr(); - const auto loop_end = ov::as_type_ptr(consumer_expr->get_node()); + const auto loop_end = ov::as_type_ptr(consumer_expr->get_node()); if (loop_end && consumer_expr->get_loop_ids() == buffer_expr->get_loop_ids()) { const auto loop_order = ov::snippets::pass::GetTopologicalOrder(loop_end); if (loop_order > last_loop_exec_order) { @@ -243,7 +259,7 @@ bool DefineBufferClusters::unite_nested_clusters(const AllocateBuffers::BufferCl auto& up_idx = is_outer_up ? outer_idx : inner_idx; auto& down_idx = is_outer_up ? inner_idx : outer_idx; if (are_buffer_neighbours(up_buffer, down_buffer, common_loop_end_expr, up_idx, down_idx)) { - const auto common_loop_end = ov::as_type_ptr(common_loop_end_expr->get_node()); + const auto common_loop_end = ov::as_type_ptr(common_loop_end_expr->get_node()); const auto& inner_ptr_increments = common_loop_end->get_ptr_increments(); const auto& inner_final_offsets = common_loop_end->get_finalization_offsets(); const auto& inner_data_sizes = common_loop_end->get_element_type_sizes(); @@ -289,7 +305,7 @@ bool DefineBufferClusters::are_buffer_neighbours(const ExpressionPtr& up, const for (const auto& out : up->get_output_port_connectors()) { for (const auto& buffer_consumer : out->get_consumers()) { const auto buffer_consumer_expr = buffer_consumer.get_expr(); - const auto loop_end = ov::as_type_ptr(buffer_consumer_expr->get_node()); + const auto loop_end = ov::as_type_ptr(buffer_consumer_expr->get_node()); if (!loop_end) continue; const auto& loop_inputs = buffer_consumer_expr->get_input_port_connectors(); @@ -326,7 +342,7 @@ bool DefineBufferClusters::run(lowered::LinearIR& linear_ir, lowered::LinearIR:: for (auto expr_it = begin; expr_it != end; ++expr_it) { const auto& expr = *expr_it; const auto op = expr->get_node(); - if (ov::is_type(op)) { + if (ov::is_type(op)) { parse_loop(expr_it); continue; } diff --git a/src/common/snippets/src/lowered/pass/identify_buffers.cpp b/src/common/snippets/src/lowered/pass/identify_buffers.cpp index d01c0c1d2e3586..7e859ce8b1b173 100644 --- a/src/common/snippets/src/lowered/pass/identify_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/identify_buffers.cpp @@ -4,10 +4,9 @@ #include "snippets/lowered/pass/identify_buffers.hpp" -#include "snippets/itt.hpp" #include "snippets/lowered/linear_ir.hpp" -#include "snippets/op/brgemm.hpp" #include "snippets/snippets_isa.hpp" +#include "snippets/itt.hpp" namespace ov { namespace snippets { @@ -36,9 +35,13 @@ size_t IdentifyBuffers::get_buffer_idx(const ExpressionPtr& target, const Buffer } bool IdentifyBuffers::can_reuse_id(const ShiftPtrParams& lhs, const ShiftPtrParams& rhs) { + // If data pointer shift parameters are unknown on model compilation stage (dynamic), + // we cannot be sure that these data pointers will be proportionally shifted. + // Then we force `false` value here to set unique registers for these buffers + const auto are_static = lhs.is_static() && rhs.is_static(); const auto equal_ptr_params_shifting = lhs.ptr_increment == rhs.ptr_increment && lhs.finalization_offset == rhs.finalization_offset; const auto equal_element_type_sizes = lhs.data_size == rhs.data_size; - return equal_ptr_params_shifting && (equal_element_type_sizes || (lhs.ptr_increment == 0 && lhs.finalization_offset == 0)); + return are_static && equal_ptr_params_shifting && (equal_element_type_sizes || (lhs.ptr_increment == 0 && lhs.finalization_offset == 0)); } bool IdentifyBuffers::are_adjacent(const std::pair& lhs, @@ -57,7 +60,7 @@ bool IdentifyBuffers::are_adjacent(const std::pair IdentifyBuffers::create_adjacency_matrix(LinearIR::constExprIt for (auto expr_it = begin; expr_it != end; expr_it++) { const auto &expr = *expr_it; - if (!ov::is_type(expr->get_node())) + if (!ov::is_type(expr->get_node())) continue; const auto buffer_loop_neighbours = get_buffer_loop_neighbours(expr); @@ -111,7 +114,7 @@ std::vector IdentifyBuffers::create_adjacency_matrix(LinearIR::constExprIt } IdentifyBuffers::BufferMap IdentifyBuffers::get_buffer_loop_neighbours(const ExpressionPtr& loop_end_expr) { - const auto& loop_end = ov::as_type_ptr(loop_end_expr->get_node()); + const auto& loop_end = ov::as_type_ptr(loop_end_expr->get_node()); const auto input_count = loop_end->get_input_num(); const auto output_count = loop_end->get_output_num(); @@ -142,7 +145,7 @@ IdentifyBuffers::BufferMap IdentifyBuffers::get_buffer_loop_neighbours(const Exp if (ov::is_type(child_expr->get_node())) { buffer_neighbours[child_expr] = { data_sizes[i], ptr_increments[i], finalization_offsets[i] }; buffer_count++; - } else if (ov::is_type(child_expr->get_node())) { + } else if (ov::is_type(child_expr->get_node())) { loop_count++; } } @@ -155,7 +158,7 @@ IdentifyBuffers::BufferMap IdentifyBuffers::get_buffer_loop_neighbours(const Exp } IdentifyBuffers::BufferMap IdentifyBuffers::get_buffer_loop_inside(const LinearIR::constExprIt& loop_end_it) { - const auto& loop_end = ov::as_type_ptr((*loop_end_it)->get_node()); + const auto& loop_end = ov::as_type_ptr((*loop_end_it)->get_node()); const auto loop_begin = loop_end->get_loop_begin(); BufferMap inner_buffers; for (auto it = std::reverse_iterator(loop_end_it); (*it)->get_node() != loop_begin; ++it) { diff --git a/src/common/snippets/src/lowered/pass/init_loops.cpp b/src/common/snippets/src/lowered/pass/init_loops.cpp index e9253901cfc8fd..ec8f5a228a237c 100644 --- a/src/common/snippets/src/lowered/pass/init_loops.cpp +++ b/src/common/snippets/src/lowered/pass/init_loops.cpp @@ -72,7 +72,7 @@ inline void init_is_incremented(LoopPort& port, size_t loop_id) { } } -inline int64_t get_ptr_increment(const LoopPort& loop_port, size_t work_amount) { +inline int64_t get_ptr_increment(const LoopPort& loop_port, size_t work_amount, size_t port_count) { if (!loop_port.is_incremented) return 0; @@ -87,8 +87,8 @@ inline int64_t get_ptr_increment(const LoopPort& loop_port, size_t work_amount) } else { OPENVINO_THROW("Unsupported expression port type!"); } - // When we cannot say about broadcasting by last dim - if (dim == shape.size() - 1 && utils::is_dynamic_value(shape.back())) { + // When we cannot say about broadcasting + if (utils::is_dynamic_value(shape[dim]) && port_count > 1) { return utils::get_dynamic_value(); } else if (!(shape[dim] == 1 && work_amount != 1)) { return get_stride(dim, shape); @@ -134,9 +134,12 @@ void InitLoops::init_loop_info(const UnifiedLoopInfoPtr& loop_info, const size_t init_work_amount(loop_info); const auto work_amount = loop_info->get_work_amount(); + const auto input_count = loop_info->get_input_count(); + const auto output_count = loop_info->get_output_count(); - auto init_runtime_parameters = [&work_amount](LoopPort& loop_port, UnifiedLoopInfo::LoopPortDesc& ptr_shifts_params) { - ptr_shifts_params.ptr_increment = get_ptr_increment(loop_port, work_amount); + auto init_runtime_parameters = [&work_amount, &input_count, &output_count](LoopPort& loop_port, UnifiedLoopInfo::LoopPortDesc& ptr_shifts_params) { + ptr_shifts_params.ptr_increment = get_ptr_increment(loop_port, work_amount, + loop_port.expr_port->get_type() == ExpressionPort::Input ? input_count : output_count); ptr_shifts_params.finalization_offset = get_finalization_offset(work_amount, ptr_shifts_params.ptr_increment); }; diff --git a/src/common/snippets/src/lowered/pass/insert_loops.cpp b/src/common/snippets/src/lowered/pass/insert_loops.cpp index a10799626250ec..07574d214de1c1 100644 --- a/src/common/snippets/src/lowered/pass/insert_loops.cpp +++ b/src/common/snippets/src/lowered/pass/insert_loops.cpp @@ -17,41 +17,27 @@ namespace pass { void InsertLoops::insertion(LinearIR& linear_ir, const LoopManagerPtr& loop_manager, size_t loop_id) { const auto loop_info = loop_manager->get_loop_info(loop_id); - auto loop_entries = loop_info->get_input_ports(); - auto loop_exits = loop_info->get_output_ports(); const auto work_amount = loop_info->get_work_amount(); const auto work_amount_increment = loop_info->get_increment(); - - const auto loop_bounds = loop_manager->get_loop_bounds(linear_ir, loop_id); + const auto in_num = loop_info->get_input_count(); + const auto out_num = loop_info->get_output_count(); std::vector loop_end_inputs; - loop_end_inputs.reserve(loop_entries.size() + loop_exits.size()); + loop_end_inputs.reserve(in_num + out_num); loop_info->iterate_through_ports([&loop_end_inputs](const LoopPort& port) { loop_end_inputs.push_back(port.expr_port->get_port_connector_ptr()); }); const auto is_incremented = loop_info->get_is_incremented(); + const auto ptr_increments = loop_info->get_ptr_increments(); + const auto finalization_offsets = loop_info->get_finalization_offsets(); const auto io_data_sizes = loop_info->get_data_sizes(); - // Should be inited by LoopInfo - const auto is_dynamic_loop = is_loop_dynamic(loop_info); - - std::shared_ptr loop_begin = nullptr; - std::shared_ptr loop_end = nullptr; - if (is_dynamic_loop) { - loop_begin = std::make_shared(); - loop_end = std::make_shared(loop_begin, work_amount_increment, is_incremented, io_data_sizes, - loop_entries.size(), loop_exits.size(), loop_id); - - } else { - const auto ptr_increments = loop_info->get_ptr_increments(); - const auto finalization_offsets = loop_info->get_finalization_offsets(); - - loop_begin = std::make_shared(); - loop_end = std::make_shared(loop_begin, work_amount, work_amount_increment, is_incremented, ptr_increments, - finalization_offsets, io_data_sizes, loop_entries.size(), loop_exits.size(), loop_id); - } + const auto loop_begin = std::make_shared(); + const auto loop_end = std::make_shared(loop_begin, work_amount, work_amount_increment, is_incremented, ptr_increments, + finalization_offsets, io_data_sizes, in_num, out_num, loop_id); + const auto loop_bounds = loop_manager->get_loop_bounds(linear_ir, loop_id); const auto outer_loop_ids = loop_manager->get_outer_expr_loops(*loop_bounds.first, loop_id); const auto loop_begin_expr = *linear_ir.insert_node(loop_begin, std::vector{}, outer_loop_ids, false, loop_bounds.first); @@ -60,17 +46,6 @@ void InsertLoops::insertion(LinearIR& linear_ir, const LoopManagerPtr& loop_mana linear_ir.insert_node(loop_end, loop_end_inputs, outer_loop_ids, false, loop_bounds.second); } -bool InsertLoops::is_loop_dynamic(const UnifiedLoopInfoPtr& loop_info) { - auto is_loop_port_dynamic = [](const UnifiedLoopInfo::LoopPortDesc& shifts) { - return utils::is_dynamic_value(shifts.ptr_increment) || utils::is_dynamic_value(shifts.finalization_offset); - }; - const auto& entry_shifts = loop_info->get_input_port_descs(); - const auto& exit_shifts = loop_info->get_output_port_descs(); - return utils::is_dynamic_value(loop_info->get_work_amount()) || - std::any_of(entry_shifts.cbegin(), entry_shifts.cend(), is_loop_port_dynamic) || - std::any_of(exit_shifts.cbegin(), exit_shifts.cend(), is_loop_port_dynamic); -} - bool InsertLoops::run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::InsertLoops") const auto& loop_manager = linear_ir.get_loop_manager(); diff --git a/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp b/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp index 3e6f6e85e0b6c3..e89c711627a911 100644 --- a/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp +++ b/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp @@ -101,12 +101,10 @@ void InsertSpecificIterations::init_decomposed_loop(LinearIR& linear_ir, LinearI const auto& loop_manager = linear_ir.get_loop_manager(); const auto new_id = loop_manager->replace_with_new_loop(linear_ir, begin, std::next(end), decomposed_loop_info, unified_loop_id); decomposed_loop_end->set_id(new_id); + decomposed_loop_end->set_work_amount(decomposed_loop_info->get_work_amount()); decomposed_loop_end->set_increment(decomposed_loop_info->get_increment()); - if (const auto static_loop_end = ov::as_type_ptr(decomposed_loop_end)) { - static_loop_end->set_work_amount(decomposed_loop_info->get_work_amount()); - static_loop_end->set_ptr_increments(decomposed_loop_info->get_ptr_increments()); - static_loop_end->set_finalization_offsets(decomposed_loop_info->get_finalization_offsets()); - } + decomposed_loop_end->set_ptr_increments(decomposed_loop_info->get_ptr_increments()); + decomposed_loop_end->set_finalization_offsets(decomposed_loop_info->get_finalization_offsets()); // Note: handlers must be run on the range started with the first operation in the loop body. const auto handlers = decomposed_loop_info->get_handler_passes(); handlers.run(linear_ir, std::next(begin), end); diff --git a/src/common/snippets/src/lowered/pass/iter_handler.cpp b/src/common/snippets/src/lowered/pass/iter_handler.cpp index 5445c229571fc9..dd2d601366cb1a 100644 --- a/src/common/snippets/src/lowered/pass/iter_handler.cpp +++ b/src/common/snippets/src/lowered/pass/iter_handler.cpp @@ -85,7 +85,7 @@ TransformInnerSplitLoop::TransformInnerSplitLoop(size_t tail_size) : RangedPass( bool TransformInnerSplitLoop::run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) { const auto& expr = *end; const auto node = expr->get_node(); - const auto loop_end = ov::as_type_ptr(node); + const auto loop_end = ov::as_type_ptr(node); OPENVINO_ASSERT(loop_end, "the last operation in range must be LoopEnd"); const auto& loop_manager = linear_ir.get_loop_manager(); @@ -97,7 +97,7 @@ bool TransformInnerSplitLoop::run(LinearIR& linear_ir, LinearIR::constExprIt beg bool modified = false; for (auto it = begin; it != end; ++it) { const auto& expr = *it; - const auto inner_loop_end = ov::as_type_ptr(expr->get_node()); + const auto inner_loop_end = ov::as_type_ptr(expr->get_node()); if (!inner_loop_end) continue; // There is already ExpandedLoopInfo @@ -105,6 +105,8 @@ bool TransformInnerSplitLoop::run(LinearIR& linear_ir, LinearIR::constExprIt beg const auto inner_dim_idx = inner_loop_info->get_dim_idx(); if (inner_dim_idx != current_dim_idx) continue; + // TODO [141735] : At the moment Splitted loops are not supported in dynamic case + OPENVINO_ASSERT(!inner_loop_end->has_dynamic_params(), "inner loop must be static in TransformInnerSplitLoop"); const auto inner_loop_begin = inner_loop_end->get_loop_begin(); const auto inner_loop_work_amount = static_cast(inner_loop_end->get_work_amount()); const auto inner_loop_increment = inner_loop_end->get_increment(); diff --git a/src/common/snippets/src/lowered/pass/optimize_loop_single_evaluation.cpp b/src/common/snippets/src/lowered/pass/optimize_loop_single_evaluation.cpp index 45c01c6644c654..76921788bfd865 100644 --- a/src/common/snippets/src/lowered/pass/optimize_loop_single_evaluation.cpp +++ b/src/common/snippets/src/lowered/pass/optimize_loop_single_evaluation.cpp @@ -5,7 +5,8 @@ #include "snippets/lowered/pass/optimize_loop_single_evaluation.hpp" #include "snippets/lowered/linear_ir.hpp" -#include "snippets/snippets_isa.hpp" +#include "snippets/op/loop.hpp" +#include "snippets/utils.hpp" #include "snippets/itt.hpp" namespace ov { @@ -18,7 +19,7 @@ bool OptimizeLoopSingleEvaluation::run(lowered::LinearIR& linear_ir, lowered::Li bool is_modified = false; for (auto expr_it = begin; expr_it != end; ++expr_it) { const auto& expr = *expr_it; - if (auto loop_end = ov::as_type_ptr(expr->get_node())) { + if (auto loop_end = ov::as_type_ptr(expr->get_node())) { // *1* solo vector/tail loop + empty outer loop // => skip increments (both counter & ptr) : set evaluate_once flag // *2* solo vector/tail loop + non-empty outer loop @@ -26,7 +27,7 @@ bool OptimizeLoopSingleEvaluation::run(lowered::LinearIR& linear_ir, lowered::Li // and perform pointer increments through finalization offsets // *3* vector loop(s) + one tail loop // => vector as usual, tail depends on outer loop, see *1* and *2* - if (loop_end->get_work_amount() >= 2 * loop_end->get_increment()) + if (loop_end->has_dynamic_params() || loop_end->get_work_amount() >= 2 * loop_end->get_increment()) continue; auto new_finalization_offsets = loop_end->get_finalization_offsets(); diff --git a/src/common/snippets/src/lowered/pass/split_loops.cpp b/src/common/snippets/src/lowered/pass/split_loops.cpp index 163980a21e5f72..dfb8edcb378b40 100644 --- a/src/common/snippets/src/lowered/pass/split_loops.cpp +++ b/src/common/snippets/src/lowered/pass/split_loops.cpp @@ -26,7 +26,9 @@ bool SplitLoops::can_be_split(const UnifiedLoopInfoPtr& loop_to_split, const Uni const bool equal_dim_idxes = current_dim_idx != LoopInfo::UNDEFINED_DIM_IDX && current_dim_idx == parent_dim_idx; const bool only_main_body = handlers.get_passes().empty() && handlers.get_passes().empty(); - return loop_to_split->get_work_amount() == loop_to_fuse->get_work_amount() && + // TODO [141735] : At the moment Splitted loops are not supported in dynamic case + const auto are_static = !loop_to_split->is_dynamic() && !loop_to_fuse->is_dynamic(); + return are_static && loop_to_split->get_work_amount() == loop_to_fuse->get_work_amount() && loop_to_split->get_increment() != loop_to_fuse->get_increment() && equal_dim_idxes && only_main_body; } diff --git a/src/common/snippets/src/lowered/pass/validate.cpp b/src/common/snippets/src/lowered/pass/validate.cpp index 7642e23acf867f..dc9dbdea76b5c8 100644 --- a/src/common/snippets/src/lowered/pass/validate.cpp +++ b/src/common/snippets/src/lowered/pass/validate.cpp @@ -83,23 +83,23 @@ void validate_buffer(const ExpressionPtr& expr, const LinearIR& linear_ir) { } } -void validate_loop_end_static(const ExpressionPtr& expr, const LinearIR& linear_ir) { - const auto loop_end = ov::as_type_ptr(expr->get_node()); - OPENVINO_ASSERT(loop_end, "LoopEndStatic validation expects LoopEndStatic op"); - OPENVINO_ASSERT(ov::is_type(loop_end->get_loop_begin()), - "LoopEndStatic must be connected to the LoopBeginStatic"); +void validate_loop_end(const ExpressionPtr& expr, const LinearIR& linear_ir) { + const auto loop_end = ov::as_type_ptr(expr->get_node()); + OPENVINO_ASSERT(loop_end, "LoopEnd validation expects LoopEnd op"); + OPENVINO_ASSERT(loop_end->get_loop_begin() != nullptr, + "LoopEnd must be connected to the LoopBegin"); const auto& loop_manager = linear_ir.get_loop_manager(); const auto& loop_info = loop_manager->get_loop_info(loop_end->get_id()); OPENVINO_ASSERT(loop_info->get_work_amount() == loop_end->get_work_amount() && loop_info->get_increment() == loop_end->get_increment(), - "Incompatible LoopEndStatic and the corresponding LoopInfo"); + "Incompatible LoopEnd and the corresponding LoopInfo"); const auto input_port_infos = loop_info->get_input_ports_info(); const auto output_port_infos = loop_info->get_output_ports_info(); OPENVINO_ASSERT(input_port_infos.size() == loop_end->get_input_num() && output_port_infos.size() == loop_end->get_output_num(), - "Incompatible LoopEndStatic and the corresponding LoopInfo"); + "Incompatible LoopEnd and the corresponding LoopInfo"); const auto& is_incremented = loop_end->get_is_incremented(); const auto& ptr_increments = loop_end->get_ptr_increments(); @@ -109,39 +109,12 @@ void validate_loop_end_static(const ExpressionPtr& expr, const LinearIR& linear_ OPENVINO_ASSERT(is_incremented[i + shift] == loop_port_infos[i].port.is_incremented && ptr_increments[i + shift] == loop_port_infos[i].desc.ptr_increment && final_offsets[i + shift] == loop_port_infos[i].desc.finalization_offset, - "Incompatible data ptr shifts in LoopEndStatic and the corresponding LoopInfo"); + "Incompatible data ptr shifts in LoopEnd and the corresponding LoopInfo"); } }; validate_loop_ports(input_port_infos); validate_loop_ports(output_port_infos, loop_end->get_input_num()); } - -void validate_loop_end_dynamic(const ExpressionPtr& expr, const LinearIR& linear_ir) { - const auto loop_end = ov::as_type_ptr(expr->get_node()); - OPENVINO_ASSERT(loop_end, "LoopEndDynamic validation expects LoopEndStatic op"); - OPENVINO_ASSERT(ov::is_type(loop_end->get_loop_begin()), - "LoopEndDynamic must be connected to the LoopBeginDynamic"); - - const auto& loop_manager = linear_ir.get_loop_manager(); - const auto& loop_info = loop_manager->get_loop_info(loop_end->get_id()); - OPENVINO_ASSERT(loop_info->get_increment() == loop_end->get_increment(), - "Incompatible LoopEndDynamic and the corresponding LoopInfo"); - - OPENVINO_ASSERT(loop_info->get_input_count() == loop_end->get_input_num() && - loop_info->get_output_count() == loop_end->get_output_num(), - "Incompatible LoopEndStatic and the corresponding LoopInfo"); - - const auto& is_incremented = loop_end->get_is_incremented(); - - auto validate_loop_ports = [&](const std::vector& loop_ports, size_t shift = 0) { - for (size_t i = 0; i < loop_ports.size(); ++i) { - OPENVINO_ASSERT(is_incremented[i + shift] == loop_ports[i].is_incremented, - "Incompatible data ptr shifts in LoopEndStatic and the corresponding LoopInfo"); - } - }; - validate_loop_ports(loop_info->get_input_ports()); - validate_loop_ports(loop_info->get_output_ports(), loop_end->get_input_num()); -} } // namespace Validate::Validate() { @@ -149,8 +122,7 @@ Validate::Validate() { {ov::op::v0::Parameter::get_type_info_static(), validate_parameter}, {ov::op::v0::Result::get_type_info_static(), validate_result}, {ov::snippets::op::Buffer::get_type_info_static(), validate_buffer}, - {ov::snippets::op::LoopEndStatic::get_type_info_static(), validate_loop_end_static}, - {ov::snippets::op::LoopEndDynamic::get_type_info_static(), validate_loop_end_dynamic} + {ov::snippets::op::LoopEnd::get_type_info_static(), validate_loop_end}, }; } diff --git a/src/common/snippets/src/lowered/pass/validate_expanded_loops.cpp b/src/common/snippets/src/lowered/pass/validate_expanded_loops.cpp index 2786401538c7f6..1653d9da993f6d 100644 --- a/src/common/snippets/src/lowered/pass/validate_expanded_loops.cpp +++ b/src/common/snippets/src/lowered/pass/validate_expanded_loops.cpp @@ -110,18 +110,16 @@ void ValidateExpandedLoops::validate_loop_expressions(const LinearIR& linear_ir) const auto expanded_loop_info = ov::as_type_ptr(loop_manager->get_loop_info(loop_id)); INFORMATIVE_ASSERT(expanded_loop_info, "expects only ExpandedLoopInfo in LoopManager"); + INFORMATIVE_ASSERT(loop_end->get_work_amount() == expanded_loop_info->get_work_amount(), + "incompatible work amount of LoopEnd and ExpandedLoopInfo"); INFORMATIVE_ASSERT(loop_end->get_increment() == expanded_loop_info->get_increment(), "incompatible increment of LoopEnd and ExpandedLoopInfo"); INFORMATIVE_ASSERT(loop_end->get_element_type_sizes() == expanded_loop_info->get_data_sizes(), "incompatible element sizes of LoopEnd and ExpandedLoopInfo"); - if (const auto static_loop_end = ov::as_type_ptr(expr->get_node())) { - INFORMATIVE_ASSERT(static_loop_end->get_work_amount() == expanded_loop_info->get_work_amount(), - "incompatible work amount of LoopEnd and ExpandedLoopInfo"); - INFORMATIVE_ASSERT(static_loop_end->get_ptr_increments() == expanded_loop_info->get_ptr_increments(), - "incompatible pointer increments of LoopEnd and ExpandedLoopInfo"); - INFORMATIVE_ASSERT(static_loop_end->get_finalization_offsets() == expanded_loop_info->get_finalization_offsets(), - "incompatible finalization offsets of LoopEnd and ExpandedLoopInfo"); - } + INFORMATIVE_ASSERT(loop_end->get_ptr_increments() == expanded_loop_info->get_ptr_increments(), + "incompatible pointer increments of LoopEnd and ExpandedLoopInfo"); + INFORMATIVE_ASSERT(loop_end->get_finalization_offsets() == expanded_loop_info->get_finalization_offsets(), + "incompatible finalization offsets of LoopEnd and ExpandedLoopInfo"); } } INFORMATIVE_ASSERT(unique_loop_ids.size() == loop_manager->get_map().size(), diff --git a/src/common/snippets/src/op/loop.cpp b/src/common/snippets/src/op/loop.cpp index 73766669300337..66cdd4a275d864 100644 --- a/src/common/snippets/src/op/loop.cpp +++ b/src/common/snippets/src/op/loop.cpp @@ -3,7 +3,8 @@ // #include "snippets/op/loop.hpp" -#include "snippets/generator.hpp" + +#include "snippets/utils.hpp" namespace ov { namespace snippets { @@ -29,6 +30,11 @@ void LoopBegin::validate_and_infer_types() { "LoopBegin must have LoopEnd connected to its last output"); } +std::shared_ptr LoopBegin::clone_with_new_inputs(const OutputVector& inputs) const { + OPENVINO_ASSERT(inputs.empty(), "LoopBegin should not contain inputs"); + return std::make_shared(); +} + std::shared_ptr LoopBegin::get_loop_end() const { const auto& last_output_inputs = get_output_target_inputs(0); OPENVINO_ASSERT(last_output_inputs.size() == 1, "LoopBegin has more than one inputs attached to the last output"); @@ -37,180 +43,153 @@ std::shared_ptr LoopBegin::get_loop_end() const { return loop_end; } -std::shared_ptr LoopBeginStatic::clone_with_new_inputs(const OutputVector& inputs) const { - return std::make_shared(); -} - -std::shared_ptr LoopBeginDynamic::clone_with_new_inputs(const OutputVector& inputs) const { - return std::make_shared(); -} - -LoopEnd::LoopEnd(const Output& loop_begin, size_t work_amount_increment, std::vector is_incremented, +LoopEnd::LoopEnd(const Output& loop_begin, size_t work_amount, size_t work_amount_increment, + std::vector is_incremented, std::vector ptr_increments, std::vector finalization_offsets, std::vector element_type_sizes, size_t input_num, size_t output_num, size_t id) : LoopBase({loop_begin}), m_is_incremented(std::move(is_incremented)), + m_ptr_increments(std::move(ptr_increments)), + m_finalization_offsets(std::move(finalization_offsets)), m_element_type_sizes(std::move(element_type_sizes)), + m_work_amount(work_amount), m_work_amount_increment(work_amount_increment), m_input_num(input_num), m_output_num(output_num), - m_id(id) { + m_id(id), + m_evaluate_once(false) { constructor_validate_and_infer_types(); } -std::shared_ptr LoopEnd::get_loop_begin() { - const auto& loop_begin = ov::as_type_ptr(get_input_source_output(get_input_size() - 1).get_node_shared_ptr()); - if (!loop_begin) - throw std::invalid_argument("LoopEnd last input is not connected to LoopBegin"); - return loop_begin; -} - -const std::vector& LoopEnd::get_is_incremented() const { - return m_is_incremented; -} - -const std::vector& LoopEnd::get_element_type_sizes() const { - return m_element_type_sizes; -} - -size_t LoopEnd::get_input_num() const { - return m_input_num; -} - -size_t LoopEnd::get_output_num() const { - return m_output_num; -} - -size_t LoopEnd::get_increment() const { - return m_work_amount_increment; -} - -size_t LoopEnd::get_id() const { - return m_id; -} - -void LoopEnd::set_is_incremented(std::vector is_incremented) { - OPENVINO_ASSERT(is_incremented.size() == m_input_num + m_output_num, - "LoopEnd set_is_incremented is called with inconsistent is_incremented.size()"); - m_is_incremented = std::move(is_incremented); -} - -void LoopEnd::set_increment(size_t new_increment) { - m_work_amount_increment = new_increment; -} - -void LoopEnd::set_id(size_t id) { - m_id = id; -} - void LoopEnd::validate_and_infer_types() { NODE_VALIDATION_CHECK(this, get_input_size() == 1, "LoopEnd must have one input"); const auto loop_begin = ov::as_type_ptr(get_input_node_shared_ptr(0)); const auto io_size = m_input_num + m_output_num; NODE_VALIDATION_CHECK(this, loop_begin != nullptr, "LoopEnd must have LoopBegin as the last argument"); - NODE_VALIDATION_CHECK(this, m_is_incremented.empty() || m_is_incremented.size() == io_size, - "is_incremented must be either empty or defined per every input & output of joined Loop. Expected size: ", - io_size, " got ", m_is_incremented.size()); + +#define VALIDATE_VALUES(values, name, default_value) \ + NODE_VALIDATION_CHECK(this, values.empty() || values.size() == io_size, \ + name, " must be either empty or defined per every input & output of joined Loop. Expected size: ", \ + io_size, " got ", values.size()); \ + if (values.empty()) \ + values.resize(io_size, default_value); + + VALIDATE_VALUES(m_is_incremented, "is_incremented", true) + VALIDATE_VALUES(m_ptr_increments, "ptr_increments", 0) + VALIDATE_VALUES(m_finalization_offsets, "finalization_offsets", 0) + VALIDATE_VALUES(m_element_type_sizes, "element_type_sizes", 0) +#undef VALIDATE_VALUES + set_output_type(0, element::f32, ov::PartialShape{}); } bool LoopEnd::visit_attributes(AttributeVisitor &visitor) { std::vector int_incremented(m_is_incremented.cbegin(), m_is_incremented.cend()); visitor.on_attribute("is_incremented", int_incremented); + visitor.on_attribute("ptr_incr", m_ptr_increments); + visitor.on_attribute("fin_offset", m_finalization_offsets); visitor.on_attribute("data_sizes", m_element_type_sizes); + visitor.on_attribute("work_amount", m_work_amount); visitor.on_attribute("increment", m_work_amount_increment); visitor.on_attribute("input_num", m_input_num); visitor.on_attribute("output_num", m_output_num); visitor.on_attribute("id", m_id); + visitor.on_attribute("evaluate_once", m_evaluate_once); return true; } -LoopEndStatic::LoopEndStatic(const Output& loop_begin, size_t work_amount, size_t work_amount_increment, - std::vector is_incremented, std::vector ptr_increments, std::vector finalization_offsets, - std::vector element_type_sizes, size_t input_num, size_t output_num, size_t id) - : LoopEnd(loop_begin, work_amount_increment, std::move(is_incremented), std::move(element_type_sizes), input_num, output_num, id), - m_ptr_increments(std::move(ptr_increments)), m_finalization_offsets(std::move(finalization_offsets)), m_work_amount(work_amount), - m_evaluate_once(false) {} - -std::shared_ptr LoopEndStatic::clone_with_new_inputs(const OutputVector& inputs) const { +std::shared_ptr LoopEnd::clone_with_new_inputs(const OutputVector& inputs) const { check_new_args_count(this, inputs); - const auto loop_end = std::make_shared(inputs.at(0), m_work_amount, m_work_amount_increment, m_is_incremented, m_ptr_increments, - m_finalization_offsets, m_element_type_sizes, m_input_num, m_output_num, m_id); + const auto loop_end = std::make_shared(inputs.at(0), m_work_amount, m_work_amount_increment, m_is_incremented, m_ptr_increments, + m_finalization_offsets, m_element_type_sizes, m_input_num, m_output_num, m_id); loop_end->m_evaluate_once = m_evaluate_once; return loop_end; } -void LoopEndStatic::validate_and_infer_types() { - LoopEnd::validate_and_infer_types(); - const auto io_size = m_input_num + m_output_num; - NODE_VALIDATION_CHECK(this, m_ptr_increments.empty() || m_ptr_increments.size() == io_size, - "ptr_increments must be either empty or defined per every input & output of joined Loop. Expected size: ", - io_size, " got ", m_ptr_increments.size()); - NODE_VALIDATION_CHECK(this, m_finalization_offsets.empty() || m_finalization_offsets.size() == io_size, - "finalization_offsets must be either empty or defined per every input & output of joined Loop. Expected size: ", - io_size, " got ", m_finalization_offsets.size()); - if (m_ptr_increments.empty()) - m_ptr_increments.resize(io_size, 0); - if (m_finalization_offsets.empty()) - m_finalization_offsets.resize(io_size, 0); -} - -bool LoopEndStatic::visit_attributes(AttributeVisitor &visitor) { - visitor.on_attribute("work_amount", m_work_amount); - visitor.on_attribute("ptr_incr", m_ptr_increments); - visitor.on_attribute("fin_offset", m_finalization_offsets); - visitor.on_attribute("evaluate_once", m_evaluate_once); - return LoopEnd::visit_attributes(visitor); +std::shared_ptr LoopEnd::get_loop_begin() { + const auto& loop_begin = ov::as_type_ptr(get_input_source_output(get_input_size() - 1).get_node_shared_ptr()); + OPENVINO_ASSERT(loop_begin != nullptr, "LoopEnd last input is not connected to LoopBegin"); + return loop_begin; } -const std::vector& LoopEndStatic::get_finalization_offsets() const { +const std::vector& LoopEnd::get_is_incremented() const { + return m_is_incremented; +} + +const std::vector& LoopEnd::get_finalization_offsets() const { return m_finalization_offsets; } -const std::vector& LoopEndStatic::get_ptr_increments() const { +const std::vector& LoopEnd::get_ptr_increments() const { return m_ptr_increments; } -size_t LoopEndStatic::get_work_amount() const { +const std::vector& LoopEnd::get_element_type_sizes() const { + return m_element_type_sizes; +} + +size_t LoopEnd::get_input_num() const { + return m_input_num; +} + +size_t LoopEnd::get_output_num() const { + return m_output_num; +} + +size_t LoopEnd::get_work_amount() const { return m_work_amount; } -bool LoopEndStatic::get_evaluate_once() const { +size_t LoopEnd::get_increment() const { + return m_work_amount_increment; +} + +size_t LoopEnd::get_id() const { + return m_id; +} + +bool LoopEnd::get_evaluate_once() const { return m_evaluate_once; } -void LoopEndStatic::set_finalization_offsets(std::vector offsets) { +bool LoopEnd::has_dynamic_params() const { + auto is_vector_dynamic = [](const std::vector& values) { + return std::any_of(values.cbegin(), values.cend(), utils::is_dynamic_value); + }; + return utils::is_dynamic_value(m_work_amount) || is_vector_dynamic(m_ptr_increments) || is_vector_dynamic(m_finalization_offsets); +} + +void LoopEnd::set_is_incremented(std::vector is_incremented) { + OPENVINO_ASSERT(is_incremented.size() == m_input_num + m_output_num, + "LoopEnd set_is_incremented is called with inconsistent is_incremented.size()"); + m_is_incremented = std::move(is_incremented); +} + +void LoopEnd::set_finalization_offsets(std::vector offsets) { OPENVINO_ASSERT(offsets.size() == m_input_num + m_output_num, "LoopEnd set_finalization_offsets is called with inconsistent offsets.size()"); m_finalization_offsets = std::move(offsets); } -void LoopEndStatic::set_ptr_increments(std::vector new_ptr_increments) { +void LoopEnd::set_ptr_increments(std::vector new_ptr_increments) { OPENVINO_ASSERT(new_ptr_increments.size() == m_input_num + m_output_num, "LoopEnd set_ptr_increments is called with inconsistent new_ptr_increments.size()"); m_ptr_increments = std::move(new_ptr_increments); } -void LoopEndStatic::update_ptr_increments(int64_t new_increment) { - std::transform(m_ptr_increments.begin(), m_ptr_increments.end(), m_ptr_increments.begin(), - [new_increment](int64_t old_increment){ - return old_increment != 0 ? new_increment : 0; - }); -} -void LoopEndStatic::set_work_amount(size_t new_work_amount) { +void LoopEnd::set_work_amount(size_t new_work_amount) { m_work_amount = new_work_amount; } -void LoopEndStatic::set_evaluate_once(bool once) { - m_evaluate_once = once; +void LoopEnd::set_increment(size_t new_increment) { + m_work_amount_increment = new_increment; } -LoopEndDynamic::LoopEndDynamic(const Output& loop_begin, size_t work_amount_increment, std::vector is_incremented, - std::vector element_type_sizes, size_t input_num, size_t output_num, size_t id) - : LoopEnd(loop_begin, work_amount_increment, std::move(is_incremented), std::move(element_type_sizes), input_num, output_num, id) {} +void LoopEnd::set_evaluate_once(bool once) { + m_evaluate_once = once; +} -std::shared_ptr LoopEndDynamic::clone_with_new_inputs(const OutputVector& inputs) const { - check_new_args_count(this, inputs); - return std::make_shared(inputs.at(0), m_work_amount_increment, m_is_incremented, m_element_type_sizes, m_input_num, m_output_num, m_id); +void LoopEnd::set_id(size_t id) { + m_id = id; } } // namespace op diff --git a/src/common/snippets/src/pass/gn_decomposition.cpp b/src/common/snippets/src/pass/gn_decomposition.cpp index 04d3fdb0ac5971..1bb82421abe608 100644 --- a/src/common/snippets/src/pass/gn_decomposition.cpp +++ b/src/common/snippets/src/pass/gn_decomposition.cpp @@ -38,6 +38,7 @@ GNDecomposition::GNDecomposition() { // reshape [N, C, spatial] to [N, group, 1, (C / group) * spatial] const auto orig_shape = group_norm_node->get_input_partial_shape(0).to_shape(); size_t orig_rank = orig_shape.size(); + OPENVINO_ASSERT(orig_rank >= 2, "First input rank for group normalization op should be greater than 1"); size_t group_rank = 4; size_t c_in_group = orig_shape[1] / num_groups; size_t spatial_dim = 1; diff --git a/src/common/snippets/src/pass/gn_tokenization.cpp b/src/common/snippets/src/pass/gn_tokenization.cpp index c42a44c64cc479..e67384723768e1 100644 --- a/src/common/snippets/src/pass/gn_tokenization.cpp +++ b/src/common/snippets/src/pass/gn_tokenization.cpp @@ -20,7 +20,8 @@ ov::snippets::pass::TokenizeGNSnippets::TokenizeGNSnippets() { ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::pass::TokenizeGNSnippets") auto group_norm_node = ov::as_type_ptr(m.get_match_root()); - if (group_norm_node->is_dynamic() || group_norm_node->get_element_type() != element::f32) + if (group_norm_node->is_dynamic() || group_norm_node->get_element_type() != element::f32 || + GetSnippetsNodeType(group_norm_node) == SnippetsNodeType::SkippedByPlugin) return false; auto subgraph = op::Subgraph::wrap_node_as_subgraph(group_norm_node); diff --git a/src/common/snippets/src/shape_inference/shape_inference.cpp b/src/common/snippets/src/shape_inference/shape_inference.cpp index d6c6081113ea1f..ff42dae602a54f 100644 --- a/src/common/snippets/src/shape_inference/shape_inference.cpp +++ b/src/common/snippets/src/shape_inference/shape_inference.cpp @@ -47,12 +47,10 @@ const IShapeInferSnippetsFactory::TRegistry IShapeInferSnippetsFactory::registry SHAPE_INFER_PREDEFINED(op::HorizonMax, HorizonOpShapeInfer), SHAPE_INFER_PREDEFINED(op::HorizonSum, HorizonOpShapeInfer), // - SHAPE_INFER_PREDEFINED(op::LoopBeginStatic, SingleElementShapeInfer), - SHAPE_INFER_PREDEFINED(op::LoopBeginDynamic, SingleElementShapeInfer), + SHAPE_INFER_PREDEFINED(op::LoopBegin, SingleElementShapeInfer), SHAPE_INFER_PREDEFINED(op::Scalar, SingleElementShapeInfer), SHAPE_INFER_PREDEFINED(op::VectorBuffer, SingleElementShapeInfer), - SHAPE_INFER_PREDEFINED(op::LoopEndStatic, EmptyShapeInfer), - SHAPE_INFER_PREDEFINED(op::LoopEndDynamic, EmptyShapeInfer), + SHAPE_INFER_PREDEFINED(op::LoopEnd, EmptyShapeInfer), #ifdef SNIPPETS_DEBUG_CAPS SHAPE_INFER_PREDEFINED(op::PerfCountBegin, EmptyShapeInfer), SHAPE_INFER_PREDEFINED(op::PerfCountEnd, EmptyShapeInfer), diff --git a/src/common/snippets/tests/include/pass/gn_decomposition.hpp b/src/common/snippets/tests/include/pass/gn_decomposition.hpp new file mode 100644 index 00000000000000..012bbd7247a532 --- /dev/null +++ b/src/common/snippets/tests/include/pass/gn_decomposition.hpp @@ -0,0 +1,33 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "lowering_utils.hpp" +#include "subgraph_group_normalization.hpp" + +/* The main purpose is to test that GNDecomposition properly decomposes groupNormalization operation + */ + +namespace ov { +namespace test { +namespace snippets { + +typedef std::tuple< + PartialShape, // Input 0 Shape + size_t, // numGroup + float // epsilon +> GroupNormalizationParams; + +class GNDecompositionTest : public LoweringTests, public testing::WithParamInterface { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); +protected: + void SetUp() override; + std::shared_ptr snippets_model; +}; + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/common/snippets/tests/include/pass/gn_tokenization.hpp b/src/common/snippets/tests/include/pass/gn_tokenization.hpp new file mode 100644 index 00000000000000..e1e5c7ee81f171 --- /dev/null +++ b/src/common/snippets/tests/include/pass/gn_tokenization.hpp @@ -0,0 +1,32 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "snippets/pass/tokenization.hpp" +#include "subgraph_group_normalization.hpp" + +namespace ov { +namespace test { +namespace snippets { + +typedef std::tuple< + PartialShape, // Input 0 Shape + size_t, // numGroup + float // epsilon +> GroupNormalizationParams; + +class TokenizeGNSnippetsTests : public TransformationTestsF, public testing::WithParamInterface { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); +protected: + void SetUp() override; + std::shared_ptr snippets_model; +}; + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/common/snippets/tests/src/lowered/pass/loop.cpp b/src/common/snippets/tests/src/lowered/pass/loop.cpp index 560f39d96f632d..0169201e0aee60 100644 --- a/src/common/snippets/tests/src/lowered/pass/loop.cpp +++ b/src/common/snippets/tests/src/lowered/pass/loop.cpp @@ -79,8 +79,7 @@ static void validate(const LinearIR& linear_ir, const ref_map& reference) { size_t loop_num = 0; for (const auto& expr : linear_ir) { const auto& node = expr->get_node(); - ASSERT_TRUE(!ov::is_type(node) && !ov::is_type(node)); - const auto loop_end = ov::as_type_ptr(node); + const auto loop_end = ov::as_type_ptr(node); if (!loop_end) continue; ASSERT_GT(reference.count(loop_num), 0); diff --git a/src/common/snippets/tests/src/lowering_utils.cpp b/src/common/snippets/tests/src/lowering_utils.cpp index 9d429350c459b2..796290c3215766 100644 --- a/src/common/snippets/tests/src/lowering_utils.cpp +++ b/src/common/snippets/tests/src/lowering_utils.cpp @@ -7,6 +7,7 @@ #include "utils.hpp" #include "snippets/pass/tokenization.hpp" #include "snippets/pass/collapse_subgraph.hpp" +#include "snippets/pass/gn_tokenization.hpp" #include "snippets/lowered/expression.hpp" @@ -30,6 +31,7 @@ DummyTargetMachine::DummyTargetMachine(const std::vector& jitters[op::v1::Divide::get_type_info_static()] = dummy_functor; jitters[op::v1::Maximum::get_type_info_static()] = dummy_functor; jitters[op::v0::Exp::get_type_info_static()] = dummy_functor; + jitters[op::v0::Sqrt::get_type_info_static()] = dummy_functor; jitters[ov::snippets::op::PowerStatic::get_type_info_static()] = dummy_functor; jitters[ov::snippets::op::HorizonMax::get_type_info_static()] = dummy_functor; jitters[ov::snippets::op::HorizonSum::get_type_info_static()] = dummy_functor; @@ -42,10 +44,8 @@ DummyTargetMachine::DummyTargetMachine(const std::vector& jitters[ov::snippets::op::BroadcastMove::get_type_info_static()] = dummy_functor; jitters[ov::snippets::op::KernelDynamic::get_type_info_static()] = dummy_functor; jitters[ov::snippets::op::KernelStatic::get_type_info_static()] = dummy_functor; - jitters[ov::snippets::op::LoopBeginDynamic::get_type_info_static()] = dummy_functor; - jitters[ov::snippets::op::LoopBeginStatic::get_type_info_static()] = dummy_functor; - jitters[ov::snippets::op::LoopEndDynamic::get_type_info_static()] = dummy_functor; - jitters[ov::snippets::op::LoopEndStatic::get_type_info_static()] = dummy_functor; + jitters[ov::snippets::op::LoopBegin::get_type_info_static()] = dummy_functor; + jitters[ov::snippets::op::LoopEnd::get_type_info_static()] = dummy_functor; #ifdef SNIPPETS_DEBUG_CAPS jitters[ov::snippets::op::PerfCountBegin::get_type_info_static()] = dummy_functor; jitters[ov::snippets::op::PerfCountEnd::get_type_info_static()] = dummy_functor; @@ -57,6 +57,7 @@ DummyTargetMachine::DummyTargetMachine(const std::vector& jitters[ov::snippets::op::Fill::get_type_info_static()] = dummy_functor; jitters[ov::snippets::op::ReduceMax::get_type_info_static()] = dummy_functor; jitters[ov::snippets::op::ReduceSum::get_type_info_static()] = dummy_functor; + jitters[ov::snippets::op::Reshape::get_type_info_static()] = dummy_functor; for (const auto& elem : custom_opset) { jitters[elem] = dummy_functor; @@ -133,6 +134,7 @@ std::shared_ptr LoweringTests::getTokenizedSubgraph( ov::pass::Manager m; ov::snippets::pass::SnippetsTokenization::Config config = get_default_tokenization_config(); m.register_pass(); + m.register_pass(); m.register_pass(config); m.run_passes(f); // Perform lowering diff --git a/src/common/snippets/tests/src/pass/gn_decomposition.cpp b/src/common/snippets/tests/src/pass/gn_decomposition.cpp new file mode 100644 index 00000000000000..ae501564c1284c --- /dev/null +++ b/src/common/snippets/tests/src/pass/gn_decomposition.cpp @@ -0,0 +1,65 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "pass/gn_decomposition.hpp" +#include "common_test_utils/common_utils.hpp" +#include "subgraph_group_normalization.hpp" +#include "subgraph_lowered.hpp" + +namespace ov { +namespace test { +namespace snippets { + +std::string GNDecompositionTest::getTestCaseName(testing::TestParamInfo obj) { + PartialShape input_shape; + size_t num_group; + float eps; + std::tie(input_shape, num_group, eps) = obj.param; + std::ostringstream result; + result << "IS=" << ov::test::utils::partialShape2str({input_shape}) << "_"; + result << "num_group=" << num_group << "_"; + result << "eps=" << eps; + return result.str(); +} + +void GNDecompositionTest::SetUp() { + LoweringTests::SetUp(); + PartialShape data_shape; + size_t num_group; + float eps; + std::tie(data_shape, num_group, eps) = this->GetParam(); + OPENVINO_ASSERT(data_shape.size() >= 2, "First input rank for group normalization op should be greater than 1"); + PartialShape scaleShiftShape = PartialShape{data_shape[1]}; + std::vector input_shapes = { data_shape, scaleShiftShape, scaleShiftShape}; + snippets_model = std::make_shared(input_shapes, num_group, eps); +} + +TEST_P(GNDecompositionTest, GNDecomposition) { + auto subgraph = getLoweredSubgraph(snippets_model->getOriginal()); + model = subgraph->body_ptr(); + model_ref = snippets_model->getLowered(); +} + +namespace { + +const std::vector input_shapes{ + {1, 8}, + {1, 8, 18}, + {1, 16, 8, 5}, + {3, 8, 2, 2, 3}, + {3, 8, 2, 2, 3, 3} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_GNDecomposition, + GNDecompositionTest, + ::testing::Combine(::testing::ValuesIn(input_shapes), + ::testing::Values(4), + ::testing::Values(0.0001)), + GNDecompositionTest::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/common/snippets/tests/src/pass/gn_tokenization.cpp b/src/common/snippets/tests/src/pass/gn_tokenization.cpp new file mode 100644 index 00000000000000..21701f2d7548be --- /dev/null +++ b/src/common/snippets/tests/src/pass/gn_tokenization.cpp @@ -0,0 +1,64 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include "snippets/pass/gn_tokenization.hpp" +#include "common_test_utils/common_utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +std::string TokenizeGNSnippetsTests::getTestCaseName(testing::TestParamInfo obj) { + PartialShape input_shape; + size_t num_group; + float eps; + std::tie(input_shape, num_group, eps) = obj.param; + std::ostringstream result; + result << "IS=" << ov::test::utils::partialShape2str({input_shape}) << "_"; + result << "num_group=" << num_group << "_"; + result << "eps=" << eps; + return result.str(); +} + +void TokenizeGNSnippetsTests::SetUp() { + TransformationTestsF::SetUp(); + PartialShape data_shape; + size_t num_group; + float eps; + std::tie(data_shape, num_group, eps) = this->GetParam(); + OPENVINO_ASSERT(data_shape.size() >= 2, "First input rank for group normalization op should be greater than 1"); + PartialShape scaleShiftShape = PartialShape{data_shape[1]}; + std::vector input_shapes = { data_shape, scaleShiftShape, scaleShiftShape}; + snippets_model = std::make_shared(input_shapes, num_group, eps); + manager.register_pass(); +} + +TEST_P(TokenizeGNSnippetsTests, smoke_TokenizeGNSnippets) { + model = snippets_model->getOriginal(); + model_ref = snippets_model->getReference(); +} + +namespace { + +const std::vector input_shapes{ + {3, 10}, + {3, 10, 1}, + {3, 10, 2, 2}, + {1, 20, 2, 2, 3}, + {1, 20, 2, 2, 3, 3} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_GNTokenize, + TokenizeGNSnippetsTests, + ::testing::Combine(::testing::ValuesIn(input_shapes), + ::testing::Values(5), + ::testing::Values(0.0001)), + TokenizeGNSnippetsTests::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/common/transformations/include/transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp b/src/common/transformations/include/transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp index 34bd210e9be394..5cd99f88d13413 100644 --- a/src/common/transformations/include/transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp @@ -18,7 +18,6 @@ class TRANSFORMATIONS_API RoPEFusionQwen; class TRANSFORMATIONS_API RoPEFusionIOSlicing; class TRANSFORMATIONS_API RoPEFusionPreprocess; class TRANSFORMATIONS_API RoPEFusionCosSinPreprocess; -class TRANSFORMATIONS_API EliminateStridedSlice; class TRANSFORMATIONS_API RoPEShareCosSin; } // namespace pass @@ -66,12 +65,6 @@ class ov::pass::RoPEFusionCosSinPreprocess : public ov::pass::MatcherPass { RoPEFusionCosSinPreprocess(); }; -class ov::pass::EliminateStridedSlice : public ov::pass::MatcherPass { -public: - OPENVINO_RTTI("EliminateStridedSlice", "0"); - EliminateStridedSlice(); -}; - class ov::pass::RoPEShareCosSin : public ov::pass::MatcherPass { public: OPENVINO_RTTI("RoPEShareCosSin", "0"); diff --git a/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp b/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp index e28a71e4911e1d..ce07b9bd93020d 100644 --- a/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp @@ -19,11 +19,13 @@ class TRANSFORMATIONS_API EliminatePad; class TRANSFORMATIONS_API EliminateSplit; class TRANSFORMATIONS_API EliminateSplitConcat; class TRANSFORMATIONS_API EliminateSqueeze; +class TRANSFORMATIONS_API EliminateUnsqueeze; class TRANSFORMATIONS_API EliminateTranspose; class TRANSFORMATIONS_API EliminateNopBroadcast; -class TRANSFORMATIONS_API NopSliceBeforeGatherElements; -class TRANSFORMATIONS_API NopStridedSlice; -class TRANSFORMATIONS_API NopStridedSliceByShape; +class TRANSFORMATIONS_API EliminateSliceBeforeGatherElements; +class TRANSFORMATIONS_API EliminateStridedSlice; +class TRANSFORMATIONS_API EliminateSlice; +class TRANSFORMATIONS_API EliminateStridedSliceByShape; class TRANSFORMATIONS_API NopElimination; class TRANSFORMATIONS_API PrepareShapeOpsForEliminationAroundBE; @@ -90,6 +92,16 @@ class ov::pass::EliminateSqueeze : public ov::pass::MatcherPass { EliminateSqueeze(); }; +/** + * @ingroup ov_transformation_common_api + * @brief EliminateUnsqueeze eliminates squeeze that does nothing + */ +class ov::pass::EliminateUnsqueeze : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("EliminateUnsqueeze", "0"); + EliminateUnsqueeze(); +}; + /** * @ingroup ov_transformation_common_api * @brief EliminateTranspose eliminates transpose that does nothing @@ -148,36 +160,47 @@ class ov::pass::EliminateNopBroadcast : public ov::pass::MatcherPass { /** * @ingroup ov_transformation_common_api - * @brief NopSliceBeforeGatherElements eliminates slice before GElements if slicing from 0 + * @brief EliminateSliceBeforeGatherElements eliminates slice before GElements if slicing from 0 * It is valid since GatherElements doesn't support negative indices and Slice won't affect * indexing of elements in the original tensor that GatherElements would like to take */ -class ov::pass::NopSliceBeforeGatherElements : public ov::pass::MatcherPass { +class ov::pass::EliminateSliceBeforeGatherElements : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("EliminateSliceBeforeGatherElements", "0"); + EliminateSliceBeforeGatherElements(); +}; + +/** + * @ingroup ov_transformation_common_api + * @brief EliminateStridedSlice eliminates Strided Slice in case + * tensors were not changed + */ +class ov::pass::EliminateStridedSlice : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("NopSliceBeforeGatherElements", "0"); - NopSliceBeforeGatherElements(); + OPENVINO_RTTI("EliminateStridedSlice", "0"); + EliminateStridedSlice(); }; /** * @ingroup ov_transformation_common_api - * @brief NopStridedSlice eliminates Strided Slice in case + * @brief EliminateSlice eliminates Slice in case * tensors were not changed */ -class ov::pass::NopStridedSlice : public ov::pass::MatcherPass { +class ov::pass::EliminateSlice : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("NopStridedSlice", "0"); - NopStridedSlice(); + OPENVINO_RTTI("EliminateSlice", "0"); + EliminateSlice(); }; /** * @ingroup ov_transformation_common_api - * @brief NopStridedSlice eliminates Strided Slice in case + * @brief EliminateStridedSlice eliminates Strided Slice in case * tensors were not changed */ -class ov::pass::NopStridedSliceByShape : public ov::pass::MatcherPass { +class ov::pass::EliminateStridedSliceByShape : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("NopStridedSliceByShape", "0"); - NopStridedSliceByShape(); + OPENVINO_RTTI("EliminateStridedSliceByShape", "0"); + EliminateStridedSliceByShape(); }; /** diff --git a/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp b/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp index ca331d6a8ba1c0..454378a0e9bbd1 100644 --- a/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp @@ -17,6 +17,7 @@ class TRANSFORMATIONS_API StridedSliceOptimization; class TRANSFORMATIONS_API UselessSliceEraser; class TRANSFORMATIONS_API GroupedStridedSliceOptimizer; class TRANSFORMATIONS_API GroupedSliceToVSplitOptimization; +class TRANSFORMATIONS_API SliceSequenceToSingleSlice; } // namespace pass } // namespace ov @@ -56,6 +57,24 @@ class ov::pass::GroupedSliceToVSplitOptimization : public ov::pass::ModelPass { bool run_on_model(const std::shared_ptr& m) override; }; +/** + * @ingroup ov_transformation_common_api + * @brief SliceSequenceToSingleSlice transformation replaces group of Slice + * operations with single Slice. All Slice operations must slice data + * with the different axis. + * + * Before: + * data (shape: 2, 3, 4) -> Slice (axis 0) -> Slice (axis 1) -> Slice (axis 2) + * + * After: + * data (shape: 2, 3, 4) -> Slice (axes: 0, 1, 2) + */ +class ov::pass::SliceSequenceToSingleSlice : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("SliceSequenceToSingleSlice", "0"); + SliceSequenceToSingleSlice(); +}; + /** * @ingroup ov_transformation_common_api * @brief StridedSliceOptimization transformation executes all transformations diff --git a/src/common/transformations/include/transformations/utils/gen_pattern.hpp b/src/common/transformations/include/transformations/utils/gen_pattern.hpp index c1dbaeea411a49..711a96f460c86d 100644 --- a/src/common/transformations/include/transformations/utils/gen_pattern.hpp +++ b/src/common/transformations/include/transformations/utils/gen_pattern.hpp @@ -54,7 +54,9 @@ static bool matcher_verbose_enabled() { return enabled; } -# define _VERBOSE_LOG(...) if (matcher_verbose_enabled()) _verbose_log(__VA_ARGS__) +# define _VERBOSE_LOG(...) \ + if (matcher_verbose_enabled()) \ + _verbose_log(__VA_ARGS__) #else static bool matcher_verbose_enabled() { return false; @@ -724,7 +726,7 @@ class GenericPattern : public ov::pass::pattern::op::Pattern { explicit GenericPattern(const DiscreteTypeInfo& type_info, const OutputVector& args, const detail::AttrMap& attrs, - const char * vt) + const char* vt) : ov::pass::pattern::op::Pattern(args), m_type_info(type_info), m_attrs(attrs), @@ -1100,6 +1102,33 @@ inline std::shared_ptr operator|(const std::shared_ptr& lhs, const s OutputVector{lhs->get_default_output(), rhs->get_default_output()}); } +inline std::shared_ptr GenSlice2(detail::PatternNode data, + detail::PatternNode start, + detail::PatternNode stop, + detail::PatternNode step, + size_t axis) { + std::vector axes(axis + 1); + std::iota(axes.begin(), axes.end(), 0); + auto opt1 = makePattern({data, start, stop, step, axes}); + + std::vector begin_mask(axis + 1, 1); + std::vector end_mask(axis + 1, 1); + std::vector new_axis_mask; + std::vector shrink_axis_mask; + std::vector ellipsis_mask; + + begin_mask[axis] = 0; + end_mask[axis] = 0; + + auto opt2 = makePattern({data, start, stop, step}, + {{"begin_mask", begin_mask}, + {"end_mask", end_mask}, + {"new_axis_mask", new_axis_mask}, + {"shrink_axis_mask", shrink_axis_mask}, + {"ellipsis_mask", ellipsis_mask}}); + return opt1 | opt2; +} + inline std::shared_ptr GenSlice(detail::PatternNode data, Symbol start, Symbol stop, Symbol step, size_t axis) { auto opt1 = makePattern({data, {start}, {stop}, {step}, {static_cast(axis)}}); diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 8e3ccc160d6978..c49974bdbcff85 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -92,6 +92,7 @@ #include "transformations/op_conversions/convert_roi_align_v3_to_v9.hpp" #include "transformations/op_conversions/convert_roi_align_v9_to_v3.hpp" #include "transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp" +#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp" #include "transformations/op_conversions/convert_softmax_downgrade.hpp" #include "transformations/op_conversions/convert_softmax_upgrade.hpp" #include "transformations/op_conversions/convert_space_to_depth.hpp" @@ -125,7 +126,9 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptrset_friendly_name("x"); - auto half_ndims = ov::gen_pattern::Symbol("half_ndims"); + + auto varsplit = makePattern({x, 3, {half_ndims, ov::gen_pattern::Symbol("end")}}); + varsplit->set_output_size(2); + auto int32_max = std::numeric_limits::max(); // rotate half : [-x2, x1] auto x2 = GenSlice(x, half_ndims, int32_max, 1, 3); - auto x2neg = makePattern({x2, -1.0f}, {{"auto_broadcast", "numpy"}}); + auto x2neg = makePattern({x2 | varsplit->output(1), -1.0f}, {{"auto_broadcast", "numpy"}}); auto x1 = GenSlice(x, 0, half_ndims, 1, 3); - auto x_rotate_half = makePattern({x2neg, x1}, {{"axis", -1}}); + auto x_rotate_half = makePattern({x2neg, x1 | varsplit->output(0)}, {{"axis", -1}}); auto mul_cos = makePattern({x_or_cos1, x_or_cos2}, {{"auto_broadcast", "numpy"}}); auto mul_sin = makePattern({x_rotate_half, t_sin}, {{"auto_broadcast", "numpy"}}); @@ -117,13 +119,8 @@ ov::pass::RoPEFusionCosSinPreprocess::RoPEFusionCosSinPreprocess() { auto gather_positions = makePattern("i32[?,?,?,?]"); auto prepare_cos_sin_gptneox = [&](std::shared_ptr const_tab) { - auto slice1 = makePattern({const_tab, {0}, node_batch_size, {1}}, - {{"begin_mask", {0}}, - {"end_mask", {0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - return makePattern({slice1, gather_positions}, {{"axis", 2}}); + auto slice = GenSlice2(const_tab, {0}, node_batch_size, {1}, 0); + return makePattern({slice, gather_positions}, {{"axis", 2}}); }; auto seq_len = makePattern("i32[1]"); @@ -132,22 +129,12 @@ ov::pass::RoPEFusionCosSinPreprocess::RoPEFusionCosSinPreprocess() { auto head_dims = ov::gen_pattern::Symbol("head_dims"); auto prepare_cos_sin_llama = [&](std::shared_ptr const_tab) { auto ScatterUpdate = makePattern({{0, 0, 0}, 2, seq_len, 0}); - auto slice_Slice = makePattern({const_tab, {0, 0, 0}, ScatterUpdate, {1, 1, 1}}, - {{"begin_mask", {1, 1, 0}}, - {"end_mask", {1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); + auto slice_Slice = GenSlice2(const_tab, {0, 0, 0}, ScatterUpdate, {1, 1, 1}, 2); auto squeeze = makePattern({slice_Slice, {-1, head_dims}}); auto index_Gather = makePattern({squeeze, gather_positions_2d, 0}, {{"batch_dims", 0}}); // another simplified pattern for gathering at position_ids - auto slice_Slice2 = makePattern({const_tab, {0}, seq_len, {1}}, - {{"begin_mask", {0}}, - {"end_mask", {0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); + auto slice_Slice2 = GenSlice2(const_tab, {0}, seq_len, {1}, 0); auto index_Gather2 = makePattern({slice_Slice2, gather_positions_2d, 0}, {{"batch_dims", 0}}); auto unsqueeze = makePattern({index_Gather | index_Gather2, {1, 1, -1, head_dims}}); @@ -204,12 +191,16 @@ ov::pass::RoPEFusionIOSlicing::RoPEFusionIOSlicing() { MATCHER_SCOPE(RoPEFusionIOSlicing); auto int32_max = std::numeric_limits::max(); auto data = makePattern(ov::Rank(4)); - auto ndims = ov::gen_pattern::Symbol("ndims"); + + auto varsplit = makePattern({data, 3, {ndims, ov::gen_pattern::Symbol("end")}}); + varsplit->set_output_size(2); + auto x = GenSlice(data, 0, ndims, 1, 3); auto y = GenSlice(data, ndims, int32_max, 1, 3); - auto x_emb = makePattern({x, {}, {}}) | makePattern({x, {}, {}, {}}); - auto result = makePattern({x_emb, y}, {{"axis", -1}}); + auto x_emb = makePattern({x | varsplit->output(0), {}, {}}) | + makePattern({x | varsplit->output(0), {}, {}, {}}); + auto result = makePattern({x_emb, y | varsplit->output(1)}, {{"axis", -1}}); matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) { const auto& pattern_map = m.get_pattern_value_map(); @@ -293,83 +284,6 @@ ov::pass::RoPEFusionPreprocess::RoPEFusionPreprocess() { this->register_matcher(m, callback); } -// remove stridedslice from 0 to int32_max with stride 1 -ov::pass::EliminateStridedSlice::EliminateStridedSlice() { - MATCHER_SCOPE(EliminateStridedSlice); - auto data = ov::pass::pattern::any_input(ov::pass::pattern::has_static_rank()); - auto begin = ov::pass::pattern::wrap_type(ov::pass::pattern::type_matches(ov::element::i32)); - auto end = ov::pass::pattern::wrap_type(ov::pass::pattern::type_matches(ov::element::i32)); - auto stride = ov::pass::pattern::wrap_type(ov::pass::pattern::type_matches(ov::element::i32)); - - auto strided_slice = - ov::pass::pattern::wrap_type({data, begin, end, stride}, [](const Output& value) { - auto s1 = as_type_ptr(value.get_node_shared_ptr()); - if (!s1->get_new_axis_mask().empty() || !s1->get_shrink_axis_mask().empty() || - !s1->get_ellipsis_mask().empty()) { - return false; - } - - auto inputs = s1->input_values(); - - auto begin = as_type_ptr(inputs[1].get_node_shared_ptr()); - auto end = as_type_ptr(inputs[2].get_node_shared_ptr()); - auto stride = as_type_ptr(inputs[3].get_node_shared_ptr()); - - if (!begin) - return false; - if (!end) - return false; - if (!stride) - return false; - - // stride is all 1 - auto v_stride = stride->cast_vector(); - for (auto& v : v_stride) { - if (v != 1) - return false; - } - - auto v_begin = begin->cast_vector(); - auto v_end = end->cast_vector(); - if (v_begin.size() != v_end.size()) { - return false; - } - - auto& begin_mask = s1->get_begin_mask(); - auto& end_mask = s1->get_end_mask(); - auto mask_size = begin_mask.size(); - if (begin_mask.size() != end_mask.size()) { - return false; - } - - auto int32_max = std::numeric_limits::max(); - size_t i = 0; - for (; i < mask_size; i++) { - if (begin_mask[i] != end_mask[i]) - return false; - // all valid [begin, end] are [0, int32_max] - if (begin_mask[i] == 0 && end_mask[i] == 0) { - if (v_begin[i] != 0 || v_end[i] != int32_max) - return false; - } - } - // the non-masked part - for (; i < v_begin.size(); i++) { - if (v_begin[i] != 0 || v_end[i] != int32_max) - return false; - } - return true; - }); - - matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { - auto root = m.get_match_root(); - return replace_output_update_name(root->output(0), root->input_value(0)); - }; - - auto m = std::make_shared(strided_slice, matcher_name); - this->register_matcher(m, callback); -} - ov::pass::RoPEFusionGPTJ::RoPEFusionGPTJ() { MATCHER_SCOPE(RoPEFusionGPTJ); @@ -381,6 +295,10 @@ ov::pass::RoPEFusionGPTJ::RoPEFusionGPTJ() { // view_Reshape : B,L,H,S auto slice_Slice_965 = GenSlice(view_Reshape, 0, ndims, 1, 3); + auto varsplit_view_Reshape = + makePattern({view_Reshape, 3, {ndims, ov::gen_pattern::Symbol("end")}}); + varsplit_view_Reshape->set_output_size(2); + auto gather_sin_cos = makePattern("f32"); auto varsplit = makePattern({gather_sin_cos, -1, {ndims / 2, -1}}); @@ -404,12 +322,12 @@ ov::pass::RoPEFusionGPTJ::RoPEFusionGPTJ() { auto repeat_interleave_cos = makePattern({unsqueeze_cos, const_idx, 3}, {{"batch_dims", 0}}); // x interleave (-x[:,:,:, 1::2], x[:,:,:, 0::2]) - auto slice_Slice_1174 = GenSlice(slice_Slice_965, 1, int32_max, 2, 3); + auto slice_Slice_1174 = GenSlice(slice_Slice_965 | varsplit_view_Reshape->output(0), 1, int32_max, 2, 3); auto neg_Multiply_1177 = makePattern({slice_Slice_1174, -1.0f}, {{"auto_broadcast", "numpy"}}); auto Unsqueeze_65524 = makePattern({neg_Multiply_1177, -1}); - auto slice_Slice_1168 = GenSlice(slice_Slice_965, 0, int32_max, 2, 3); + auto slice_Slice_1168 = GenSlice(slice_Slice_965 | varsplit_view_Reshape->output(0), 0, int32_max, 2, 3); auto Unsqueeze_65525 = makePattern({slice_Slice_1168, -1}); auto stack_1182 = makePattern({Unsqueeze_65524, Unsqueeze_65525}, {{"axis", -1}}); @@ -423,7 +341,8 @@ ov::pass::RoPEFusionGPTJ::RoPEFusionGPTJ() { // x*cos [B,L,H,ndims] auto mul_cos = - makePattern({slice_Slice_965, repeat_interleave_cos}, {{"auto_broadcast", "numpy"}}); + makePattern({slice_Slice_965 | varsplit_view_Reshape->output(0), repeat_interleave_cos}, + {{"auto_broadcast", "numpy"}}); auto mul_sin = makePattern({flatten_Reshape_1198 | flatten_Reshape_Zero, repeat_interleave_sin}, {{"auto_broadcast", "numpy"}}); @@ -431,7 +350,8 @@ ov::pass::RoPEFusionGPTJ::RoPEFusionGPTJ() { auto rotary_emb = makePattern({mul_cos, mul_sin}, {{"auto_broadcast", "numpy"}}); auto slice_Slice_971 = GenSlice(view_Reshape, ndims, int32_max, 1, 3); - auto cat_Concat_1211 = makePattern({rotary_emb, slice_Slice_971}, {{"axis", -1}}); + auto cat_Concat_1211 = + makePattern({rotary_emb, slice_Slice_971 | varsplit_view_Reshape->output(1)}, {{"axis", -1}}); auto permute_Transpose_1213 = makePattern({cat_Concat_1211, {0, 2, 1, 3}}); auto result = permute_Transpose_1213; @@ -513,33 +433,36 @@ ov::pass::RoPEFusionChatGLM::RoPEFusionChatGLM(int split_output_id) { auto cur_key = makePattern({qkv_proj->output(split_output_id), {0, 0, head_cnt, head_size}}, {{"special_zero", true}}); - auto slice_Slice_437 = makePattern({cur_key, {0, 0, 0, 0}, {0, 0, 0, ndims}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); + auto slice_Slice_437 = GenSlice(cur_key, 0, ndims, 1, 3); + auto var_split_1 = makePattern({cur_key, 3, {ndims, ov::gen_pattern::Symbol("end")}}); + var_split_1->set_output_size(2); // rotate half auto ListConstruct_452_Concat = makePattern({seq_length, {-1}, {head_cnt}, {ndims / 2}, {2}}, {{"axis", 0}}); + auto const_target_shape_1 = makeConst({0, 0, head_cnt, ndims / 2, 2}); + auto ListConstruct_379_Concat = makePattern({seq_length, {-1}, {1}, {ndims / 2}, {2}}, {{"axis", 0}}); + auto const_target_shape_2 = makeConst({0, 0, 1, ndims / 2, 2}); + + auto reshape_Reshape_453 = makePattern( + {slice_Slice_437 | var_split_1->output(0), ListConstruct_452_Concat | const_target_shape_1}); - auto reshape_Reshape_453 = - makePattern({slice_Slice_437, ListConstruct_452_Concat}, {{"special_zero", false}}); auto x_even = makePattern({reshape_Reshape_453, 0, -1}, {{"batch_dims", 0}}); - auto slice_Slice_449 = makePattern({cos_sin_cache, {0}, seq_length, {1}}, - {{"begin_mask", {0}}, - {"end_mask", {0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto view_Reshape_460 = - makePattern({slice_Slice_449, ListConstruct_379_Concat}, {{"special_zero", false}}); + auto x_odd = makePattern({reshape_Reshape_453, 1, -1}, {{"batch_dims", 0}}); + + auto slice_Slice_449 = GenSlice2(cos_sin_cache, {0}, seq_length, {1}, 0); + auto var_split_2 = makePattern({cos_sin_cache, 0, {0, ov::gen_pattern::Symbol("end")}}); + var_split_2->set_output_size(2); + + auto view_Reshape_460 = makePattern( + {slice_Slice_449 | var_split_2->output(0), ListConstruct_379_Concat | const_target_shape_2}, + {{"special_zero", false}}); + auto cos_tab = makePattern({view_Reshape_460, 0, -1}, {{"batch_dims", 0}}); auto x_even_cos = makePattern({x_even, cos_tab}, {{"auto_broadcast", "numpy"}}); - auto x_odd = makePattern({reshape_Reshape_453, 1, -1}, {{"batch_dims", 0}}); + auto sin_tab = makePattern({view_Reshape_460, 1, -1}, {{"batch_dims", 0}}); auto x_odd_sin = makePattern({x_odd, sin_tab}, {{"auto_broadcast", "numpy"}}); auto neg_x_odd_sin = makePattern({x_odd_sin, -1.000000f}, {{"auto_broadcast", "numpy"}}); @@ -554,25 +477,16 @@ ov::pass::RoPEFusionChatGLM::RoPEFusionChatGLM(int split_output_id) { auto stack_481 = makePattern({y_even, y_odd}, {{"axis", -1}}); auto ShapeOf_135133 = makePattern({stack_481}); - auto flatten_Slice_497 = makePattern({ShapeOf_135133, {0}, {3}, {1}}, - {{"begin_mask", {0}}, - {"end_mask", {0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); + auto flatten_Slice_497 = GenSlice(ShapeOf_135133, 0, 3, 1, 0); auto flatten_Concat_500 = makePattern({flatten_Slice_497, {-1}}, {{"axis", 0}}); - auto const_target_shape = makeConst({0, 0, head_cnt, ndims}); + auto const_target_shape_3 = makeConst({0, 0, head_cnt, ndims}); // [length, batch, head_cnt, half_rotary_dims, 2] auto flatten_Reshape_501 = - makePattern({stack_481, flatten_Concat_500 | const_target_shape}, {{"special_zero", true}}); - auto slice_Slice_443 = - makePattern({cur_key, {0, 0, 0, ndims}, {0, 0, 0, INT_MAX}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto cat_Concat_505 = makePattern({flatten_Reshape_501, slice_Slice_443}, {{"axis", -1}}); + makePattern({stack_481, flatten_Concat_500 | const_target_shape_3}, {{"special_zero", true}}); + auto slice_Slice_443 = GenSlice(cur_key, ndims, INT_MAX, 1, 3); + + auto cat_Concat_505 = + makePattern({flatten_Reshape_501, slice_Slice_443 | var_split_1->output(1)}, {{"axis", -1}}); auto result = cat_Concat_505; @@ -610,7 +524,6 @@ ov::pass::RoPEFusionChatGLM::RoPEFusionChatGLM(int split_output_id) { auto new_node = std::make_shared(new_args, config); new_node->set_friendly_name(old_node->get_friendly_name()); ov::copy_runtime_info({pattern_map.at(flatten_Reshape_501).get_node_shared_ptr(), - pattern_map.at(slice_Slice_443).get_node_shared_ptr(), pattern_map.at(cat_Concat_505).get_node_shared_ptr()}, new_node); ov::replace_node(old_node, new_node); @@ -639,13 +552,7 @@ ov::pass::RoPEFusionQwen::RoPEFusionQwen(int split_output_id) { auto view_Reshape_424 = makePattern( {ListUnpack_410_VariadicSplit->output(split_output_id), {0, 0, head_cnt, head_size}}, {{"special_zero", true}}); - auto slice_Slice_543 = - makePattern({view_Reshape_424, {0, 0, 0, 0}, {0, 0, 0, head_size}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); // tensor_array + auto slice_Slice_543 = GenSlice(view_Reshape_424, 0, head_size, 1, 3); // tensor_array auto hidden_states = makePattern("f32[?,?,?]"); // auto ShapeOf_485735 = makePattern({hidden_states}, {}); @@ -658,14 +565,8 @@ ov::pass::RoPEFusionQwen::RoPEFusionQwen(int split_output_id) { auto neg_Multiply = makePattern({Gather_311651, {-1}}, {{"auto_broadcast", "numpy"}}); auto ScatterUpdate_463814 = makePattern({{0, 0}, {1}, Gather_377635 | neg_Multiply, {0}}); - auto slice_Slice_446 = - makePattern({rotary_emb_cos, ScatterUpdate_463814, {0, INT_MAX}, {1, 1}}, - {{"begin_mask", {1, 0}}, - {"end_mask", {1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); // tensor_array + GenSlice2(rotary_emb_cos, ScatterUpdate_463814, {0, INT_MAX}, {1, 1}, 1); // tensor_array auto mul_Multiply_552 = makePattern({slice_Slice_543, slice_Slice_446}, {{"auto_broadcast", "numpy"}}); // tensor_array @@ -687,13 +588,12 @@ ov::pass::RoPEFusionQwen::RoPEFusionQwen(int split_output_id) { {{"special_zero", false}}); // tensor_array }; - auto reshape_opt2 = [&](std::shared_ptr input_BLHS) { - return makePattern({input_BLHS, {0, 0, 0, 2, head_size / 2}}, - {{"special_zero", true}}); // tensor_array - }; + // If with sepcial_zero, const_shape should be checked later + auto const_shape = makePattern({}, {}); + auto reshape_special = makePattern({slice_Slice_543, const_shape}, {{"special_zero", true}}); auto ListUnpack_586_Split = - makePattern({reshape_opt1(slice_Slice_543) | reshape_opt2(slice_Slice_543), -2}, + makePattern({reshape_opt1(slice_Slice_543) | reshape_special, -2}, {{"num_splits", 2}}); // tensor_array ListUnpack_586_Split->set_output_size(2); auto Multiply_567527 = @@ -706,12 +606,7 @@ ov::pass::RoPEFusionQwen::RoPEFusionQwen(int split_output_id) { auto cat_Concat_593 = makePattern({ListUnpack_586_Squeeze_0, ListUnpack_586_Squeeze}, {{"axis", -1}}); // tensor_array auto slice_Slice_470 = - makePattern({rotary_emb_sin, ScatterUpdate_463814, {0, INT_MAX}, {1, 1}}, - {{"begin_mask", {1, 0}}, - {"end_mask", {1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); // tensor_array + GenSlice2(rotary_emb_sin, ScatterUpdate_463814, {0, INT_MAX}, {1, 1}, 1); // tensor_array auto mul_Multiply_594 = makePattern({cat_Concat_593, slice_Slice_470}, {{"auto_broadcast", "numpy"}}); // tensor_array @@ -744,6 +639,29 @@ ov::pass::RoPEFusionQwen::RoPEFusionQwen(int split_output_id) { config.slice_stop = config.slice_start + config.head_cnt * config.head_size; } + if (pattern_map.count(reshape_special)) { + // check reshape_special shape correctness + auto reshape_special_node = pattern_map.at(reshape_special).get_node_shared_ptr(); + auto data_shape = reshape_special_node->get_input_partial_shape(0); + auto reshape_shape = pattern_map.at(const_shape); + auto node = ov::as_type_ptr(reshape_shape.get_node_shared_ptr()); + const auto& target = node->cast_vector(); + // ensure target_shape have correct rank + if (target.size() < 3) { + return false; + } + int32_t head_size = static_cast(config.head_size); + int32_t head_cnt = static_cast(config.head_cnt); + // reshape splits the head_size of input to [2, head_size / 2] + // head_cnt of target_shape could be 0 or head_cnt + size_t target_rank = target.size(); + bool is_ok = (target[target_rank - 1] == head_size / 2) && (target[target_rank - 2] == 2) && + ((target[target_rank - 3] == 0 || target[target_rank - 3] == head_cnt)); + if (!is_ok) { + return false; + } + } + new_args.push_back(pattern_map.at(qkv_proj)); new_args.push_back(pattern_map.at(rotary_emb_cos)); new_args.push_back(pattern_map.at(rotary_emb_sin)); @@ -755,7 +673,6 @@ ov::pass::RoPEFusionQwen::RoPEFusionQwen(int split_output_id) { pattern_map.at(ListUnpack_586_Squeeze_0).get_node_shared_ptr(), pattern_map.at(ListUnpack_586_Squeeze).get_node_shared_ptr(), pattern_map.at(cat_Concat_593).get_node_shared_ptr(), - pattern_map.at(slice_Slice_470).get_node_shared_ptr(), pattern_map.at(mul_Multiply_594).get_node_shared_ptr(), pattern_map.at(add_Add_597).get_node_shared_ptr()}, new_node); diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp index 7fc8c73fe244fc..9c7d828c2497c2 100644 --- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp @@ -280,7 +280,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr REGISTER_PASS(manager, SharedOpOptimization) REGISTER_PASS(manager, ConstantFolding) REGISTER_PASS(manager, SymbolicOptimizations) - manager.register_pass(true); + REGISTER_PASS(manager, ResolveNameCollisions, true); manager.run_passes(f); if (!m_use_shapes) { diff --git a/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp b/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp index f89fcd45a85dfe..e7206ac2078d18 100644 --- a/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp @@ -338,7 +338,6 @@ static bool eliminate_unsqueeze(const shared_ptr& node) { }; SIMPLE_MATCHER_PASS_DEFINITION(EliminateReshape, eliminate_reshape_v1, ov::op::v1::Reshape); -SIMPLE_MATCHER_PASS_DEFINITION(EliminateUnsqueeze, eliminate_unsqueeze, ov::op::v0::Unsqueeze); SIMPLE_MATCHER_PASS_DEFINITION(EliminateBroadcast, eliminate_nop, op::v1::Broadcast, op::v3::Broadcast); SIMPLE_MATCHER_PASS_DEFINITION(EliminateGather, simplify_gather, @@ -451,6 +450,17 @@ pass::EliminateSplit::EliminateSplit() { this->register_matcher(m, callback); } +pass::EliminateUnsqueeze::EliminateUnsqueeze() { + MATCHER_SCOPE(EliminateUnsqueeze); + auto unsqueeze_pattern = pattern::wrap_type(); + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + return eliminate_unsqueeze(m.get_match_root()); + }; + auto m = make_shared(unsqueeze_pattern, matcher_name); + this->register_matcher(m, callback); +} + pass::EliminateSqueeze::EliminateSqueeze() { MATCHER_SCOPE(EliminateSqueeze); auto squeeze_pattern = pattern::wrap_type(); @@ -732,6 +742,10 @@ pass::EliminateTranspose::EliminateTranspose() { } const auto& order_values = order_const->cast_vector(); + if (order_values.empty()) { + return false; + } + vector ref_values(order_values.size()); iota(ref_values.begin(), ref_values.end(), 0); if (order_values != ref_values) { @@ -817,8 +831,8 @@ ov::pass::EliminateNopBroadcast::EliminateNopBroadcast() { register_matcher(m, matcher_pass_callback); } -ov::pass::NopSliceBeforeGatherElements::NopSliceBeforeGatherElements() { - MATCHER_SCOPE(NopSliceBeforeGatherElements); +ov::pass::EliminateSliceBeforeGatherElements::EliminateSliceBeforeGatherElements() { + MATCHER_SCOPE(EliminateSliceBeforeGatherElements); auto slice = pattern::wrap_type(); auto gather = pattern::wrap_type({slice, pattern::any_input()}); @@ -837,9 +851,38 @@ ov::pass::NopSliceBeforeGatherElements::NopSliceBeforeGatherElements() { auto m = std::make_shared(gather, matcher_name); register_matcher(m, matcher_pass_callback); } +ov::pass::EliminateSlice::EliminateSlice() { + MATCHER_SCOPE(EliminateSlice); + + auto input = pattern::any_input(); + auto begin_const = pattern::wrap_type(); + auto end_const = pattern::wrap_type(); + auto step_const = pattern::wrap_type(); + auto axes = pattern::any_input(); + auto pattern = pattern::wrap_type({input, begin_const, end_const, step_const, axes}); + + ov::matcher_pass_callback matcher_pass_callback = [=](pattern::Matcher& m) { + auto slice = std::dynamic_pointer_cast(m.get_match_root()); + + int64_t max_int = slice->input_value(2).get_element_type() == element::i32 + ? std::numeric_limits::max() + : std::numeric_limits::max(); + bool is_nop = op::util::is_constant_and_all_values_equal_int(slice->input_value(1), 0) && + op::util::is_constant_and_all_values_equal_int(slice->input_value(2), max_int) && + op::util::is_constant_and_all_values_equal_int(slice->input_value(3), 1); + + if (is_nop) { + return replace_output_update_name(slice->output(0), slice->input_value(0)); + } else { + return false; + } + }; + auto m = std::make_shared(pattern, matcher_name); + register_matcher(m, matcher_pass_callback); +} -ov::pass::NopStridedSlice::NopStridedSlice() { - MATCHER_SCOPE(NopStridedSlice); +ov::pass::EliminateStridedSlice::EliminateStridedSlice() { + MATCHER_SCOPE(EliminateStridedSlice); auto input = pattern::any_input(); auto begin_const = pattern::wrap_type(); @@ -903,6 +946,9 @@ ov::pass::NopStridedSlice::NopStridedSlice() { auto end_node = strided_slice_node->get_input_node_shared_ptr(2); if (const auto& end_constant_node = ov::util::get_constant_from_source(end_node)) { + int64_t max_value = end_node->get_element_type() == ov::element::i32 ? std::numeric_limits::max() + : std::numeric_limits::max(); + auto values = end_constant_node->cast_vector(); auto end_mask = strided_slice_node->get_end_mask(); // align end_mask and values_vec by length @@ -910,7 +956,7 @@ ov::pass::NopStridedSlice::NopStridedSlice() { for (size_t i = 0; i < end_mask.size(); ++i) { // if mask == 1 then ignore the begin_mask_value else check // if values[i] == max then take whole tensor else take part of a tensor - if (!end_mask[i] && values[i] != std::numeric_limits::max()) { + if (!end_mask[i] && values[i] != max_value) { return false; } } @@ -923,15 +969,18 @@ ov::pass::NopStridedSlice::NopStridedSlice() { register_matcher(m, matcher_pass_callback); } -ov::pass::NopStridedSliceByShape::NopStridedSliceByShape() { - MATCHER_SCOPE(NopStridedSliceByShape); - auto slice = pattern::wrap_type(); +ov::pass::EliminateStridedSliceByShape::EliminateStridedSliceByShape() { + MATCHER_SCOPE(EliminateStridedSliceByShape); auto input = pattern::any_input(); - auto begin_const = pattern::any_input(); - auto end_const = pattern::any_input(); + auto begin = pattern::any_input(); + auto end = pattern::any_input(); auto optional_stride_const = pattern::wrap_type(); - auto pattern = pattern::wrap_type({input, begin_const, end_const, optional_stride_const}); + auto strided_slice = pattern::wrap_type({input, begin, end, optional_stride_const}); + + auto axes = pattern::any_input(); + auto slice = pattern::wrap_type({input, begin, end, optional_stride_const, axes}); + auto pattern = std::make_shared(OutputVector{strided_slice, slice}); ov::matcher_pass_callback matcher_pass_callback = [=](pattern::Matcher& m) { auto node = m.get_match_root(); @@ -939,31 +988,33 @@ ov::pass::NopStridedSliceByShape::NopStridedSliceByShape() { return false; } auto strided_slice_node = std::dynamic_pointer_cast(node); - // check that all values of the mask is equal 0 - auto check_mask = [](const std::vector& mask_to_check) { - auto it = std::find_if(mask_to_check.begin(), mask_to_check.end(), [](const int64_t& value) { - return value != 0; - }); - if (mask_to_check.empty() || it == mask_to_check.end()) { - return true; + if (strided_slice_node) { + // check that all values of the mask is equal 0 + auto check_mask = [](const std::vector& mask_to_check) { + auto it = std::find_if(mask_to_check.begin(), mask_to_check.end(), [](const int64_t& value) { + return value != 0; + }); + if (mask_to_check.empty() || it == mask_to_check.end()) { + return true; + } + return false; + }; + // check that we won't do change dimention rank + if (!check_mask(strided_slice_node->get_shrink_axis_mask()) || + !check_mask(strided_slice_node->get_new_axis_mask()) || + !check_mask(strided_slice_node->get_ellipsis_mask())) { + return false; } - return false; - }; - // check that we won't do change dimention rank - if (!check_mask(strided_slice_node->get_shrink_axis_mask()) || - !check_mask(strided_slice_node->get_new_axis_mask()) || - !check_mask(strided_slice_node->get_ellipsis_mask())) { - return false; } + // check that that we will take all values - if (node->get_input_size() == 4 && !op::util::is_constant_and_all_values_equal_int(node->input_value(3), 1)) { + if (node->get_input_size() >= 4 && !op::util::is_constant_and_all_values_equal_int(node->input_value(3), 1)) { return false; } - if (strided_slice_node->get_input_partial_shape(0).is_static() && - strided_slice_node->get_output_partial_shape(0).is_static()) { - if (strided_slice_node->get_input_shape(0) == strided_slice_node->get_output_shape(0)) { - return replace_output_update_name(strided_slice_node->output(0), strided_slice_node->input_value(0)); + if (node->get_input_partial_shape(0).is_static() && node->get_output_partial_shape(0).is_static()) { + if (node->get_input_shape(0) == node->get_output_shape(0)) { + return replace_output_update_name(node->output(0), node->input_value(0)); } } return false; @@ -1022,9 +1073,9 @@ ov::pass::NopElimination::NopElimination(bool use_shape_for_elimination) { ADD_MATCHER_FOR_THIS(EliminateSplit) ADD_MATCHER_FOR_THIS(EliminateTranspose) ADD_MATCHER_FOR_THIS(EliminateEltwise) - using namespace ov::pass; ADD_MATCHER_FOR_THIS(EliminateSplitConcat) - ADD_MATCHER_FOR_THIS(NopStridedSlice) + ADD_MATCHER_FOR_THIS(EliminateStridedSlice) + ADD_MATCHER_FOR_THIS(EliminateSlice) // shape-dependent transformations if (use_shape_for_elimination) { @@ -1035,8 +1086,8 @@ ov::pass::NopElimination::NopElimination(bool use_shape_for_elimination) { ADD_MATCHER_FOR_THIS(PrepareShapeOpsForEliminationAroundBE) ADD_MATCHER_FOR_THIS(EliminateBroadcast) ADD_MATCHER_FOR_THIS(EliminateNopBroadcast) - ADD_MATCHER_FOR_THIS(NopSliceBeforeGatherElements) - ADD_MATCHER_FOR_THIS(NopStridedSliceByShape) + ADD_MATCHER_FOR_THIS(EliminateSliceBeforeGatherElements) + ADD_MATCHER_FOR_THIS(EliminateStridedSliceByShape) ADD_MATCHER_FOR_THIS(EliminateGather) } } diff --git a/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp b/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp index d4267b940a4f86..ace7e544bc994c 100644 --- a/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp @@ -9,6 +9,7 @@ #include "itt.hpp" #include "openvino/core/rt_info.hpp" +#include "openvino/op/concat.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/result.hpp" #include "openvino/op/slice.hpp" @@ -17,6 +18,7 @@ #include "openvino/op/util/sub_graph_base.hpp" #include "openvino/op/variadic_split.hpp" #include "openvino/pass/manager.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/common_optimizations/shared_ops_optimization.hpp" #include "transformations/op_conversions/convert_slice_to_strided_slice.hpp" #include "transformations/utils/utils.hpp" @@ -366,25 +368,72 @@ bool ov::pass::GroupedSliceToVSplitOptimization::run_on_model(const std::shared_ return graph_rewritten; } +ov::pass::SliceSequenceToSingleSlice::SliceSequenceToSingleSlice() { + MATCHER_SCOPE(SliceSequenceToSingleSlice); + using namespace ov::op; + using namespace ov::op::util; + using namespace ov::pass::pattern; + + auto const_axes_1_pattern = wrap_type(); + auto const_axes_2_pattern = wrap_type(); + auto slice_1_pattern = + wrap_type({any_input(), any_input(), any_input(), any_input(), const_axes_1_pattern}, + consumers_count(1)); + auto slice_2_pattern = + wrap_type({slice_1_pattern, any_input(), any_input(), any_input(), const_axes_2_pattern}); + + ov::matcher_pass_callback callback = [=](Matcher& m) { + const auto& pattern_to_output = m.get_pattern_map(); + auto slice_1 = pattern_to_output.at(slice_1_pattern); + auto slice_2 = pattern_to_output.at(slice_2_pattern); + + auto const_axes_1 = ov::as_type_ptr(pattern_to_output.at(const_axes_1_pattern)); + auto const_axes_2 = ov::as_type_ptr(pattern_to_output.at(const_axes_2_pattern)); + + auto axes_1_values = const_axes_1->cast_vector(); + auto axes_2_values = const_axes_2->cast_vector(); + + // supported a simple scenario when the axes_1 values and axes_2 values don't intersect. + for (const auto& axis : axes_1_values) { + if (std::find(axes_2_values.begin(), axes_2_values.end(), axis) != axes_2_values.end()) { + return false; + } + } + + auto begin = std::make_shared(OutputVector{slice_1->input_value(1), slice_2->input_value(1)}, 0); + auto end = std::make_shared(OutputVector{slice_1->input_value(2), slice_2->input_value(2)}, 0); + auto step = std::make_shared(OutputVector{slice_1->input_value(3), slice_2->input_value(3)}, 0); + auto axes = std::make_shared(OutputVector{slice_1->input_value(4), slice_2->input_value(4)}, 0); + auto one_slice = std::make_shared(slice_1->input_value(0), + try_fold_unary_output(begin), + try_fold_unary_output(end), + try_fold_unary_output(step), + try_fold_unary_output(axes)); + + ov::copy_runtime_info({slice_1, slice_2}, {one_slice, begin, end, step, axes}); + one_slice->set_friendly_name(slice_2->get_friendly_name()); + ov::replace_node(slice_2, one_slice); + return true; + }; + auto m = std::make_shared(slice_2_pattern, matcher_name); + register_matcher(m, callback); +} + ov::pass::StridedSliceOptimization::StridedSliceOptimization(bool use_shapes) { m_use_shapes = use_shapes; } bool ov::pass::StridedSliceOptimization::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(StridedSliceOptimization); - - ov::pass::Manager manager(get_pass_config()); - using namespace ov::pass; - REGISTER_PASS(manager, SliceToStridedSlice, m_use_shapes) - manager.run_passes(f); - - bool rewritten = false; + ov::pass::Manager manager; + manager.set_per_pass_validation(false); if (m_use_shapes) { - rewritten = UselessSliceEraser().run_on_model(f); - // Execution of other passes is also needed even if 'rewritten' is already 'true' - rewritten = SharedOpOptimization().run_on_model(f) || rewritten; - rewritten = GroupedStridedSliceOptimizer().run_on_model(f) || rewritten; - rewritten = GroupedSliceToVSplitOptimization().run_on_model(f) || rewritten; + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); } - return rewritten; + + manager.register_pass(); + return manager.run_passes(f); } diff --git a/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp b/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp index efa7518ac97d16..f7ad3f56595053 100644 --- a/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp @@ -353,7 +353,7 @@ pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { bool pass::SimplifyShapeOfSubGraph::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(SimplifyShapeOfSubGraph); - Manager manager; + Manager manager(get_pass_config()); manager.set_per_pass_validation(false); REGISTER_PASS(manager, PrepareShapeOpsForEliminationAroundBE) diff --git a/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp b/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp index 1c6cd30db15fcb..197644ffca6d84 100644 --- a/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp +++ b/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp @@ -200,6 +200,14 @@ ov::pass::SymbolicOptimizations::SymbolicOptimizations(bool full_run) { bool ov::pass::SymbolicOptimizations::run_on_model(const std::shared_ptr& m) { RUN_ON_FUNCTION_SCOPE(SymbolicOptimizations); + + // Eliminate Squeeze/Unsqueeze might convert Squeeze/Unsqueeze ops to Reshape + // it may break NNCF patterns and lead to unexpected FakeQuantize ops in the model. + // So we decided to disable these passes in SymbolicOptimizations. + const auto& pass_config = m_manager->get_pass_config(); + pass_config->disable(); + pass_config->disable(); + m_manager->run_passes(m); ov::remove_skip_invalidation_rti(m); return true; diff --git a/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp b/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp index 79d036e08bc208..5b54b4a7cce437 100644 --- a/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp +++ b/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp @@ -574,3 +574,160 @@ TEST_F(TransformationTestsF, ConvertToROPE_chatGML) { std::make_shared(ov::NodeVector{rope}, ov::ParameterVector{input, seq_length, cos_sin_cache}); } } + +TEST_F(TransformationTestsF, ConvertToROPE_chatGML_Slice) { + using namespace ov; + disable_rt_info_check(); + const int batch = 2; + const int seq_len = 7; + const int num_heads = 32; + const int ndims = 128; + const int rotary_ndims = 64; + const int max_pos_length = 2048; + { + auto input = std::make_shared(ov::element::f32, ov::Shape{seq_len, batch, 4608}); + auto seq_length = std::make_shared(ov::element::i32, ov::Shape{1}); + auto cos_sin_cache = + std::make_shared(ov::element::f32, + ov::Shape{max_pos_length, batch, rotary_ndims / 2, 2}); + auto ListUnpack = makeOP({input, -1, {4096, 256, 256}}); + auto view_Reshape = + makeOP({ListUnpack->output(0), {0, 0, num_heads, ndims}}, {{"special_zero", true}}); + + auto VariadicSplit_20795 = makeOP({view_Reshape, 3, {rotary_ndims, -1}}); + auto reshape_Reshape = + makeOP({VariadicSplit_20795->output(0), {0, 0, num_heads, rotary_ndims / 2, 2}}, + {{"special_zero", true}}); + + auto select_Gather = makeOP({reshape_Reshape, 0, -1}, {{"batch_dims", 0}}); + auto slice_Slice_1 = makeOP({cos_sin_cache, {0}, seq_length, {1}, {0}}); + auto ListConstruct_Concat_1 = + makeOP({seq_length, {-1}, {1}, {rotary_ndims / 2}, {2}}, {{"axis", 0}}); + auto view_Reshape_1 = + makeOP({slice_Slice_1, ListConstruct_Concat_1}, {{"special_zero", false}}); + + auto select_Gather_1 = makeOP({view_Reshape_1, 0, -1}, {{"batch_dims", 0}}); + auto mul_Multiply_1 = makeOP({select_Gather, select_Gather_1}, {{"auto_broadcast", "numpy"}}); + auto select_Gather_2 = makeOP({reshape_Reshape, 1, -1}, {{"batch_dims", 0}}); + auto select_Gather_3 = makeOP({view_Reshape_1, 1, -1}, {{"batch_dims", 0}}); + auto mul_Multiply_2 = + makeOP({select_Gather_2, select_Gather_3}, {{"auto_broadcast", "numpy"}}); + auto Multiply_23724 = makeOP({mul_Multiply_2, -1.000000f}, {{"auto_broadcast", "numpy"}}); + auto sub_Subtract = makeOP({mul_Multiply_1, Multiply_23724}, {{"auto_broadcast", "numpy"}}); + + auto Unsqueeze_57121 = makeOP({sub_Subtract, -1}); + auto mul_Multiply_3 = + makeOP({select_Gather_2, select_Gather_1}, {{"auto_broadcast", "numpy"}}); + auto mul_Multiply_4 = makeOP({select_Gather, select_Gather_3}, {{"auto_broadcast", "numpy"}}); + auto add_Add = makeOP({mul_Multiply_3, mul_Multiply_4}, {{"auto_broadcast", "numpy"}}); + auto Unsqueeze_57122 = makeOP({add_Add, -1}); + auto stack = makeOP({Unsqueeze_57121, Unsqueeze_57122}, {{"axis", -1}}); + auto flatten_Reshape = + makeOP({stack, {0, 0, num_heads, rotary_ndims}}, {{"special_zero", true}}); + auto cat_Concat = makeOP({flatten_Reshape, VariadicSplit_20795->output(1)}, {{"axis", -1}}); + + model = std::make_shared(ov::NodeVector{cat_Concat}, + ov::ParameterVector{input, seq_length, cos_sin_cache}); + } + manager.register_pass(); + { + auto input = std::make_shared(ov::element::f32, ov::Shape{seq_len, batch, 4608}); + auto seq_length = std::make_shared(ov::element::i32, ov::Shape{1}); + auto cos_sin_cache = + std::make_shared(ov::element::f32, + ov::Shape{max_pos_length, batch, rotary_ndims / 2, 2}); + auto rope = makeOP({input, cos_sin_cache, cos_sin_cache}, + {{"config.slice_start", 0}, + {"config.slice_stop", 4096}, + {"config.input_trans0213", false}, + {"config.is_interleaved", false}, + {"config.rotary_ndims", rotary_ndims}, + {"config.is_chatglm", true}, + {"config.is_qwen", false}, + {"config.head_cnt", num_heads}, + {"config.head_size", ndims}, + {"config.gather_position_arg_id", 0}}); + model_ref = + std::make_shared(ov::NodeVector{rope}, ov::ParameterVector{input, seq_length, cos_sin_cache}); + } +} + +TEST_F(TransformationTestsF, ConvertToROPE_GPTJ_Slice) { + disable_rt_info_check(); + using namespace ov; + + const int batch = 2; + const int seq_len = 7; + const int num_heads = 16; + const int ndims = 256; + const int rotary_ndims = 64; + { + std::vector rpi_idx(rotary_ndims); + for (int i = 0, index = 0; i < rotary_ndims; i += 2, index++) { + rpi_idx[i] = index; + rpi_idx[i + 1] = index; + } + auto repeat_interleave_index = makeConst(ov::element::i32, ov::Shape({rotary_ndims}), rpi_idx); + + auto input = + std::make_shared(ov::element::f32, ov::Shape{batch, seq_len, num_heads, ndims}); + auto gather_sin_cos = + std::make_shared(ov::element::f32, ov::Shape{1, seq_len, rotary_ndims}); + + auto ListUnpack_VariadicSplit = makeOP({gather_sin_cos, -1, {rotary_ndims / 2, -1}}); + auto sin_tab = makeOP({ListUnpack_VariadicSplit->output(0), 2}); + auto cos_tab = makeOP({ListUnpack_VariadicSplit->output(1), 2}); + + auto repeat_interleave_Sin = + makeOP({sin_tab, repeat_interleave_index, {3}}, {{"batch_dims", 0}}); + auto repeat_interleave_Cos = + makeOP({cos_tab, repeat_interleave_index, {3}}, {{"batch_dims", 0}}); + + auto VariadicSplit_39740 = makeOP({input, 3, {rotary_ndims, -1}}); + + auto mul_Multiply = makeOP({VariadicSplit_39740->output(0), repeat_interleave_Cos}, + {{"auto_broadcast", "numpy"}}); + auto slice_Slice_10 = makeOP({VariadicSplit_39740->output(0), {1}, {INT_MAX}, {2}, {3}}); + auto Constant_134252 = makeConst(element::f32, + ov::Shape({ + 1, + 1, + 1, + 1, + }), + {-1.000000f}); + + auto neg_Multiply = makeOP({slice_Slice_10, Constant_134252}, {{"auto_broadcast", "numpy"}}); + auto Unsqueeze_47361 = makeOP({neg_Multiply, -1}); + auto slice_Slice_14 = makeOP({VariadicSplit_39740->output(0), {0}, {INT_MAX}, {2}, {3}}); + auto Unsqueeze_47362 = makeOP({slice_Slice_14, -1}); + auto stack = makeOP({Unsqueeze_47361, Unsqueeze_47362}, {{"axis", -1}}); + auto flatten_Reshape = makeOP({stack, {0, 0, 16, rotary_ndims}}, {{"special_zero", true}}); + auto mul_Multiply_1 = + makeOP({flatten_Reshape, repeat_interleave_Sin}, {{"auto_broadcast", "numpy"}}); + auto add_Add = makeOP({mul_Multiply, mul_Multiply_1}, {{"auto_broadcast", "numpy"}}); + auto cat_Concat = makeOP({add_Add, VariadicSplit_39740->output(1)}, {{"axis", -1}}); + auto permute_Transpose = makeOP({cat_Concat, {0, 2, 1, 3}}); + + model = + std::make_shared(ov::NodeVector{permute_Transpose}, ov::ParameterVector{input, gather_sin_cos}); + } + manager.register_pass(); + { + auto input = + std::make_shared(ov::element::f32, ov::Shape{batch, seq_len, num_heads, ndims}); + auto cos_sin = std::make_shared(ov::element::f32, ov::Shape{1, seq_len, rotary_ndims}); + auto rope = makeOP({input, cos_sin, cos_sin}, + {{"config.slice_start", 0}, + {"config.slice_stop", 0}, + {"config.input_trans0213", false}, + {"config.is_interleaved", true}, + {"config.is_chatglm", false}, + {"config.is_qwen", false}, + {"config.head_cnt", 0}, + {"config.head_size", 0}, + {"config.rotary_ndims", rotary_ndims}, + {"config.gather_position_arg_id", 0}}); + model_ref = std::make_shared(ov::NodeVector{rope}, ov::ParameterVector{input, cos_sin}); + } +} \ No newline at end of file diff --git a/src/common/transformations/tests/common_optimizations/moc_transformations.cpp b/src/common/transformations/tests/common_optimizations/moc_transformations.cpp index c7ffe2626c0b25..d054605fba726e 100644 --- a/src/common/transformations/tests/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/tests/common_optimizations/moc_transformations.cpp @@ -8,6 +8,7 @@ #include +#include "common_test_utils/ov_test_utils.hpp" #include "openvino/core/model.hpp" #include "openvino/opsets/opset12.hpp" #include "openvino/pass/manager.hpp" @@ -59,3 +60,21 @@ TEST(TransformationTests, TestModelTensorsConsistencyUseShapesFalse) { model->validate_nodes_and_infer_types(); EXPECT_TRUE(model->outputs()[0].get_names() == new_tensors); } + +TEST_F(TransformationTestsF, SqueezeRemainsSqueezeAfterMOC) { + { + using namespace ov::op; + auto input = std::make_shared(element::f32, Shape{30}); + auto shape = v0::Constant::create(element::i64, Shape{5}, {2, 3, 1, 5, 1}); + auto reshape = std::make_shared(input, shape, false); + auto unsqueeze_axes = v0::Constant::create(element::i64, Shape{1}, {0}); + auto unsqueeze = std::make_shared(reshape, unsqueeze_axes); + + auto squeeze_axes = v0::Constant::create(element::i64, Shape{2}, {3, 5}); + auto squeeze = std::make_shared(unsqueeze, squeeze_axes); + + auto res = std::make_shared(squeeze); + model = std::make_shared(ov::ResultVector{res}, ov::ParameterVector{input}); + manager.register_pass(false); + } +} diff --git a/src/common/transformations/tests/common_optimizations/nop_elimination.cpp b/src/common/transformations/tests/common_optimizations/nop_elimination.cpp index d3656b37599b42..e9b4bf3d8be3d1 100644 --- a/src/common/transformations/tests/common_optimizations/nop_elimination.cpp +++ b/src/common/transformations/tests/common_optimizations/nop_elimination.cpp @@ -1447,7 +1447,7 @@ TEST_F(TransformationTestsF, NopTile) { } } -TEST_F(TransformationTestsF, NopSliceBeforeGatherElements) { +TEST_F(TransformationTestsF, EliminateSliceBeforeGatherElements) { { auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); @@ -1463,7 +1463,7 @@ TEST_F(TransformationTestsF, NopSliceBeforeGatherElements) { auto relu = std::make_shared(gather_elements); auto result = std::make_shared(relu); model = std::make_shared(ResultVector{result}, ParameterVector{data, indices}); - manager.register_pass(); + manager.register_pass(); } { auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); @@ -1477,7 +1477,7 @@ TEST_F(TransformationTestsF, NopSliceBeforeGatherElements) { } } -TEST_F(TransformationTestsF, NopStridedSlice) { +TEST_F(TransformationTestsF, EliminateStridedSlice) { { auto input = std::make_shared(ov::element::f32, PartialShape{ov::Dimension(), 4, ov::Dimension(), 64}); @@ -1498,7 +1498,7 @@ TEST_F(TransformationTestsF, NopStridedSlice) { auto result = std::make_shared(strided_slice); model = std::make_shared(ResultVector{result}, ParameterVector{input}); - manager.register_pass(); + manager.register_pass(); } { auto input = std::make_shared(ov::element::f32, @@ -1510,7 +1510,39 @@ TEST_F(TransformationTestsF, NopStridedSlice) { } } -TEST_F(TransformationTestsF, NopStridedSliceWithoutStrides) { +TEST_F(TransformationTestsF, EliminateStridedSlice_int32max) { + { + auto input = std::make_shared(ov::element::f32, PartialShape{-1, 4, -1, 64}); + auto relu = std::make_shared(input); + auto begin_const = std::make_shared(ov::element::i32, ov::Shape{2}, std::vector{0, 0}); + auto end_const = + std::make_shared(ov::element::i32, + ov::Shape{2}, + std::vector{0, std::numeric_limits::max()}); + auto optional_stride_const = + std::make_shared(ov::element::i32, ov::Shape{2}, std::vector{1, 1}); + auto strided_slice = std::make_shared(relu, + begin_const, + end_const, + optional_stride_const, + std::vector{1, 0, 1, 1}, + std::vector{1, 0, 1, 1}); + auto result = std::make_shared(strided_slice); + + model = std::make_shared(ResultVector{result}, ParameterVector{input}); + manager.register_pass(); + } + { + auto input = std::make_shared(ov::element::f32, + PartialShape{ov::Dimension(), 4, ov::Dimension(), 64}); + auto relu = std::make_shared(input); + auto result = std::make_shared(relu); + + model_ref = std::make_shared(ResultVector{result}, ParameterVector{input}); + } +} + +TEST_F(TransformationTestsF, EliminateStridedSliceWithoutStrides) { { auto input = std::make_shared(ov::element::f32, PartialShape{ov::Dimension(), 4, ov::Dimension(), 64}); @@ -1528,7 +1560,7 @@ TEST_F(TransformationTestsF, NopStridedSliceWithoutStrides) { auto result = std::make_shared(strided_slice); model = std::make_shared(ResultVector{result}, ParameterVector{input}); - manager.register_pass(); + manager.register_pass(); } { auto input = std::make_shared(ov::element::f32, @@ -1540,7 +1572,7 @@ TEST_F(TransformationTestsF, NopStridedSliceWithoutStrides) { } } -TEST_F(TransformationTestsF, NopStridedSliceByShape) { +TEST_F(TransformationTestsF, EliminateStridedSliceByShape) { { auto input = std::make_shared(ov::element::f32, Shape{1, 4, 8, 64}); auto relu = std::make_shared(input); @@ -1560,7 +1592,7 @@ TEST_F(TransformationTestsF, NopStridedSliceByShape) { auto result = std::make_shared(strided_slice); model = std::make_shared(ResultVector{result}, ParameterVector{input}); - manager.register_pass(); + manager.register_pass(); } { auto input = std::make_shared(ov::element::f32, Shape{1, 4, 8, 64}); @@ -1571,7 +1603,7 @@ TEST_F(TransformationTestsF, NopStridedSliceByShape) { } } -TEST_F(TransformationTestsF, NopStridedSliceByShapeNegative) { +TEST_F(TransformationTestsF, EliminateStridedSliceByShapeNegative) { { auto input = std::make_shared(ov::element::f32, Shape{1, 4, 8, 64}); auto relu = std::make_shared(input); @@ -1588,7 +1620,7 @@ TEST_F(TransformationTestsF, NopStridedSliceByShapeNegative) { auto result = std::make_shared(strided_slice); model = std::make_shared(ResultVector{result}, ParameterVector{input}); - manager.register_pass(); + manager.register_pass(); } { auto input = std::make_shared(ov::element::f32, Shape{1, 4, 8, 64}); @@ -1633,3 +1665,84 @@ TEST_F(TransformationTestsF, SqueezeBinaryReshape) { model_ref = std::make_shared(OutputVector{relu}, ParameterVector{data}); } } + +TEST_F(TransformationTestsF, EliminateSlice) { + using namespace op::v0; + auto type = element::i64; + { + auto input = std::make_shared(element::f32, PartialShape{3, 6, 9}); + auto slice = std::make_shared(input, + Constant::create(type, {1}, {0}), + Constant::create(type, {1}, {std::numeric_limits::max()}), + Constant::create(type, {1}, {1}), + Constant::create(type, {1}, {1})); + auto relu = std::make_shared(slice); + + auto result = std::make_shared(relu); + + model = std::make_shared(ResultVector{result}, ParameterVector{input}); + manager.register_pass(); + } + { + auto input = std::make_shared(ov::element::f32, PartialShape{3, 6, 9}); + auto relu = std::make_shared(input); + auto result = std::make_shared(relu); + model_ref = std::make_shared(ResultVector{result}, ParameterVector{input}); + } +} + +TEST_F(TransformationTestsF, EliminateSlice_int32max) { + using namespace op::v0; + auto type = element::i32; + { + auto input = std::make_shared(element::f32, PartialShape{3, 6, 9}); + auto slice = std::make_shared(input, + Constant::create(type, {1}, {0}), + Constant::create(type, {1}, {std::numeric_limits::max()}), + Constant::create(type, {1}, {1}), + Constant::create(type, {1}, {1})); + auto relu = std::make_shared(slice); + + auto result = std::make_shared(relu); + model = std::make_shared(ResultVector{result}, ParameterVector{input}); + manager.register_pass(); + } + { + auto input = std::make_shared(ov::element::f32, PartialShape{3, 6, 9}); + auto relu = std::make_shared(input); + auto result = std::make_shared(relu); + model_ref = std::make_shared(ResultVector{result}, ParameterVector{input}); + } +} + +TEST_F(TransformationTestsF, TransposeWithEmptyOrder) { + { + auto data = std::make_shared(element::f32, PartialShape{1, 2}); + auto relu = std::make_shared(data); + auto empty_order = std::make_shared(element::i32, Shape{0}, std::vector()); + auto transpose = std::make_shared(relu, empty_order); + + auto result = std::make_shared(transpose); + model = std::make_shared(OutputVector{result}, ParameterVector{data}); + manager.register_pass(); + } +} + +TEST_F(TransformationTestsF, TransposeElimination) { + { + auto data = std::make_shared(element::f32, PartialShape{1, 2}); + auto relu = std::make_shared(data); + auto order = std::make_shared(element::i32, Shape{2}, std::vector{0, 1}); + auto transpose = std::make_shared(relu, order); + + auto result = std::make_shared(transpose); + model = std::make_shared(OutputVector{result}, ParameterVector{data}); + manager.register_pass(); + } + { + auto data = std::make_shared(element::f32, PartialShape{1, 2}); + auto relu = std::make_shared(data); + auto result = std::make_shared(relu); + model_ref = std::make_shared(OutputVector{result}, ParameterVector{data}); + } +} diff --git a/src/common/transformations/tests/common_optimizations/optimize_strided_slice_test.cpp b/src/common/transformations/tests/common_optimizations/optimize_strided_slice_test.cpp index 82a4647b6ce614..b274f3cc0cb6c3 100644 --- a/src/common/transformations/tests/common_optimizations/optimize_strided_slice_test.cpp +++ b/src/common/transformations/tests/common_optimizations/optimize_strided_slice_test.cpp @@ -21,6 +21,7 @@ #include "openvino/opsets/opset3.hpp" #include "openvino/opsets/opset8.hpp" #include "openvino/pass/constant_folding.hpp" +#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp" #include "transformations/utils/utils.hpp" using namespace ov; @@ -420,6 +421,7 @@ TEST_F(TransformationTestsF, SliceToStridedSlice_default_axes) { auto slice = std::make_shared(data, begin, end, step); model = std::make_shared(NodeVector{slice}, ParameterVector{data}); + manager.register_pass(true); manager.register_pass(); } { @@ -451,7 +453,7 @@ TEST_F(TransformationTestsF, SliceToStridedSlice_axes_const_sorted_full) { auto slice = std::make_shared(data, begin, end, step, axes); model = std::make_shared(NodeVector{slice}, ParameterVector{data}); - manager.register_pass(); + manager.register_pass(true); } { auto data = std::make_shared(element::f32, Shape{2, 4, 3, 5}); @@ -482,6 +484,7 @@ TEST_F(TransformationTestsF, SliceToStridedSlice_all_const) { auto slice = std::make_shared(data, begin, end, step, axes); model = std::make_shared(NodeVector{slice}, ParameterVector{}); + manager.register_pass(true); manager.register_pass(); } { @@ -535,6 +538,7 @@ TEST_F(TransformationTestsF, SliceToStridedSlice_sss_params_axes_const_sorted_le auto slice = std::make_shared(data, begin, end, step, axes); model = std::make_shared(NodeVector{slice}, ParameterVector{data, begin, end, step}); + manager.register_pass(true); manager.register_pass(); } { @@ -576,6 +580,7 @@ TEST_F(TransformationTestsF, SliceToStridedSlice_sss_params_axes_const_unsorted) auto slice = std::make_shared(data, begin, end, step, axes); model = std::make_shared(NodeVector{slice}, ParameterVector{data, begin, end, step}); + manager.register_pass(true); manager.register_pass(); } { @@ -618,6 +623,7 @@ TEST_F(TransformationTestsF, SliceToStridedSlice_sss_params_axes_const_negative_ auto slice = std::make_shared(data, begin, end, step, axes); model = std::make_shared(NodeVector{slice}, ParameterVector{data, begin, end, step}); + manager.register_pass(true); manager.register_pass(); } { @@ -650,6 +656,7 @@ TEST_F(TransformationTestsF, SliceToStridedSlice_sss_params_dyn_shape_axes_const auto slice = std::make_shared(data, begin, end, step, axes); model = std::make_shared(NodeVector{slice}, ParameterVector{data, begin, end, step}); + manager.register_pass(true); manager.register_pass(); } { @@ -693,6 +700,7 @@ TEST_F(TransformationTestsF, SliceToStridedSlice_sss_params_static_shape_axes_co auto slice = std::make_shared(data, begin, end, step, axes); model = std::make_shared(NodeVector{slice}, ParameterVector{data, begin, end, step}); + manager.register_pass(true); manager.register_pass(); } { @@ -735,6 +743,7 @@ TEST_F(TransformationTestsF, SliceToStridedSlice_dyn_rank_axes_const_positive) { auto slice = std::make_shared(data, begin, end, step, axes); model = std::make_shared(NodeVector{slice}, ParameterVector{data, begin, end, step}); + manager.register_pass(true); manager.register_pass(); } { @@ -809,6 +818,7 @@ TEST_F(TransformationTestsF, SliceToStridedSlice_begin_param_shape_of_use_shapes auto slice = std::make_shared(shape_of_data, begin, end, step, axes); model = std::make_shared(NodeVector{slice}, ParameterVector{data, begin}); + manager.register_pass(true); manager.register_pass(true); manager.register_pass(); } @@ -851,6 +861,7 @@ TEST_F(TransformationTestsF, SliceToStridedSlice_begin_param_shape_of_use_shapes model = std::make_shared(NodeVector{slice}, ParameterVector{data, begin}); manager.register_pass(); + manager.register_pass(false); manager.register_pass(false); manager.register_pass(); } @@ -953,6 +964,7 @@ TEST_F(TransformationTestsF, SliceToStridedSlice_slice_all_use_shapes_true) { auto slice = std::make_shared(relu, begin, end, step); model = std::make_shared(NodeVector{slice}, ParameterVector{data}); + manager.register_pass(true); manager.register_pass(true); manager.register_pass(); } @@ -992,6 +1004,7 @@ TEST_F(TransformationTestsF, SliceToStridedSlice_slice_all_use_shapes_false) { auto slice = std::make_shared(relu, begin, end, step); model = std::make_shared(NodeVector{slice}, ParameterVector{data}); + manager.register_pass(false); manager.register_pass(false); manager.register_pass(); } @@ -1214,3 +1227,80 @@ TEST_F(TransformationTestsF, GroupedSliceToVSplitNegativeStartStop) { model_ref = std::make_shared(ov::NodeVector{concat}, ov::ParameterVector{data}); } } + +TEST_F(TransformationTestsF, SliceSequenceToSingleSlice) { + auto data_pshape = ov::PartialShape{10, 5, 5, 10}; + auto data_type = ov::element::f32; + { + auto data = std::make_shared(data_type, data_pshape); + + auto slice_0 = make_slice(data, 1, 10, 1, 0); + auto slice_1 = make_slice(slice_0, -1, 1, -1, 1); + auto slice_2 = make_slice(slice_1, -7, INT32_MAX, 2, 3); + + model = std::make_shared(ov::OutputVector{slice_2}, ov::ParameterVector{data}); + manager.register_pass(); + } + { + auto data = std::make_shared(data_type, data_pshape); + auto slice = std::make_shared( + data, + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{3}, {1, -1, -7}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{3}, {10, 1, INT32_MAX}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{3}, {1, -1, 2}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{3}, {0, 1, 3})); + model_ref = std::make_shared(ov::NodeVector{slice}, ov::ParameterVector{data}); + } +} + +TEST_F(TransformationTestsF, SliceSequenceToSingleSliceStartAsParameter) { + auto data_pshape = ov::PartialShape{10, 5, 5, 10}; + auto data_type = ov::element::f32; + { + auto data = std::make_shared(data_type, data_pshape); + + auto start_0 = std::make_shared(element::i64, ov::PartialShape{1}); + auto start_1 = std::make_shared(element::i64, ov::PartialShape{1}); + auto start_2 = std::make_shared(element::i64, ov::PartialShape{1}); + auto slice_0 = + std::make_shared(data, + start_0, + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {10}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {1}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {0})); + + auto slice_1 = + std::make_shared(slice_0, + start_1, + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {1}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {-1}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {1})); + + auto slice_2 = std::make_shared( + slice_1, + start_2, + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {INT32_MAX}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {2}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {3})); + + model = std::make_shared(ov::OutputVector{slice_2}, + ov::ParameterVector{data, start_0, start_1, start_2}); + manager.register_pass(); + } + { + auto data = std::make_shared(data_type, data_pshape); + auto start_0 = std::make_shared(element::i64, ov::PartialShape{1}); + auto start_1 = std::make_shared(element::i64, ov::PartialShape{1}); + auto start_2 = std::make_shared(element::i64, ov::PartialShape{1}); + auto concat_0_1 = std::make_shared(OutputVector{start_0, start_1}, 0); + auto concat_1_2 = std::make_shared(OutputVector{concat_0_1, start_2}, 0); + auto slice = std::make_shared( + data, + concat_1_2, + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{3}, {10, 1, INT32_MAX}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{3}, {1, -1, 2}), + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{3}, {0, 1, 3})); + model_ref = + std::make_shared(ov::NodeVector{slice}, ov::ParameterVector{data, start_0, start_1, start_2}); + } +} diff --git a/src/common/util/include/openvino/util/file_util.hpp b/src/common/util/include/openvino/util/file_util.hpp index d41b21e78edd57..96d0b62865d59d 100644 --- a/src/common/util/include/openvino/util/file_util.hpp +++ b/src/common/util/include/openvino/util/file_util.hpp @@ -125,6 +125,15 @@ bool is_absolute_file_path(const std::string& path); */ void create_directory_recursive(const std::string& path); +#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT +/** + * @brief Interface function to create directorty recursively by given path + * @param path - path to file wide-string, can be relative to current working directory + * @throw runtime_error if any error occurred + */ +void create_directory_recursive(const std::wstring& path); +#endif + /** * @brief Interface function to check if directory exists for given path * @param path - path to directory diff --git a/src/common/util/src/file_util.cpp b/src/common/util/src/file_util.cpp index 13d372916fc2c7..032010a4d2b23e 100644 --- a/src/common/util/src/file_util.cpp +++ b/src/common/util/src/file_util.cpp @@ -31,7 +31,10 @@ # define wstat _wstat # endif /// @brief Windows-specific 'mkdir' wrapper -# define makedir(dir) _mkdir(dir) +# define makedir(dir) _mkdir(dir.c_str()) +# ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT +# define wmakedir(dir) _wmkdir(dir.c_str()) +# endif // Copied from linux libc sys/stat.h: # if !defined(__MINGW32__) && !defined(__MINGW64__) # define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) @@ -55,7 +58,10 @@ /// @brief Get absolute file path, returns NULL in case of error # define get_absolute_path(result, path) realpath(path.c_str(), result) /// @brief mkdir wrapper -# define makedir(dir) mkdir(dir, 0755) +# define makedir(dir) mkdir(dir.c_str(), 0755) +# ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT +# define wmakedir(dir) mkdir(ov::util::wstring_to_string(dir).c_str(), 0755) +# endif #endif std::string ov::util::get_file_name(const std::string& s) { @@ -341,9 +347,9 @@ void ov::util::convert_path_win_style(std::string& path) { std::string ov::util::wstring_to_string(const std::wstring& wstr) { # ifdef _WIN32 - int size_needed = WideCharToMultiByte(CP_ACP, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL); + int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL); std::string strTo(size_needed, 0); - WideCharToMultiByte(CP_ACP, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL); + WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL); return strTo; # else std::wstring_convert> wstring_decoder; @@ -355,9 +361,9 @@ std::wstring ov::util::string_to_wstring(const std::string& string) { const char* str = string.c_str(); # ifdef _WIN32 int strSize = static_cast(std::strlen(str)); - int size_needed = MultiByteToWideChar(CP_ACP, 0, str, strSize, NULL, 0); + int size_needed = MultiByteToWideChar(CP_UTF8, 0, str, strSize, NULL, 0); std::wstring wstrTo(size_needed, 0); - MultiByteToWideChar(CP_ACP, 0, str, strSize, &wstrTo[0], size_needed); + MultiByteToWideChar(CP_UTF8, 0, str, strSize, &wstrTo[0], size_needed); return wstrTo; # else std::wstring_convert> wstring_encoder; @@ -397,6 +403,27 @@ bool ov::util::is_absolute_file_path(const std::string& path) { #endif // _WIN32 } +#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT +void ov::util::create_directory_recursive(const std::wstring& path) { + if (path.empty() || directory_exists(path)) { + return; + } + + std::size_t pos = path.rfind(ov::util::FileTraits::file_separator); + if (pos != std::wstring::npos) { + create_directory_recursive(path.substr(0, pos)); + } + + int err = wmakedir(path); + if (err != 0 && errno != EEXIST) { + std::stringstream ss; + // TODO: in case of exception it may be needed to remove all created sub-directories + ss << "Couldn't create directory [" << ov::util::wstring_to_string(path) << "], err=" << strerror(errno) << ")"; + throw std::runtime_error(ss.str()); + } +} +#endif + void ov::util::create_directory_recursive(const std::string& path) { if (path.empty() || directory_exists(path)) { return; @@ -407,7 +434,7 @@ void ov::util::create_directory_recursive(const std::string& path) { create_directory_recursive(path.substr(0, pos)); } - int err = makedir(path.c_str()); + int err = makedir(path); if (err != 0 && errno != EEXIST) { std::stringstream ss; // TODO: in case of exception it may be needed to remove all created sub-directories diff --git a/src/core/reference/include/openvino/reference/roi_align.hpp b/src/core/reference/include/openvino/reference/roi_align.hpp index 8e0a76af108971..351d8be7116320 100644 --- a/src/core/reference/include/openvino/reference/roi_align.hpp +++ b/src/core/reference/include/openvino/reference/roi_align.hpp @@ -70,7 +70,7 @@ class ROIAlignOpDefPolicy { : SamplingSpaceBase(start_x_, start_y_, size_x_, size_y_) {} }; - ROIAlignOpDefPolicy() : aligned(false), offset_src(0), offset_dst(0) {} + ROIAlignOpDefPolicy() : rois(nullptr), spatial_scale(0.0f), aligned(false), offset_src(0), offset_dst(0) {} void init(const T* rois_, const Shape& shape_, float spatial_scale_, AlignedMode aligned_mode, bool) { rois = rois_; diff --git a/src/core/src/pass/visualize_tree.cpp b/src/core/src/pass/visualize_tree.cpp index 61446c416d132d..3cc95403f3ecf7 100644 --- a/src/core/src/pass/visualize_tree.cpp +++ b/src/core/src/pass/visualize_tree.cpp @@ -13,9 +13,11 @@ #include "openvino/op/parameter.hpp" #include "openvino/op/util/multi_subgraph_base.hpp" #include "openvino/op/util/op_types.hpp" +#include "openvino/op/util/symbolic_info.hpp" #include "openvino/util/common_util.hpp" #include "openvino/util/env_util.hpp" #include "openvino/util/file_util.hpp" +#include "transformations/symbolic_transformations/symbolic_optimizations.hpp" /* * As we are visualizing the graph, we will make some tweaks to the generated dot file to make @@ -213,6 +215,16 @@ static void collect_symbol_print_values(const std::shared_ptr& m, bool ov::pass::VisualizeTree::run_on_model(const std::shared_ptr& f) { RUN_ON_MODEL_SCOPE(VisualizeTree); + + static const bool ovasp = ov::util::getenv_bool("OV_VISUALIZE_APPLY_SYMBOLIC_PROPAGATION"); + if (ovasp) { + std::cerr << "Warning: OV_VISUALIZE_APPLY_SYMBOLIC_PROPAGATION enabled. ov::pass::SymbolicPropagation will be " + "triggered" + << std::endl; + ov::pass::SymbolicPropagation().run_on_model(f); + std::cerr << "ov::pass::SymbolicPropagation finished successfully" << std::endl; + } + std::unordered_map height_maps; for (auto& node : f->get_ops()) { @@ -257,7 +269,13 @@ bool ov::pass::VisualizeTree::run_on_model(const std::shared_ptr& f) // Clean up local variable not to hold node pointers m_nodes_with_attributes.clear(); - + if (ovasp) { + std::cerr << "Warning: Due to previously triggered SymbolicPropagation we need to clean-up the model from " + "symbols. It includes model revalidation" + << std::endl; + ov::remove_skip_invalidation_rti(f); + std::cerr << "Model revalidation finished successfully" << std::endl; + } return false; } diff --git a/src/frontends/onnx/frontend/src/op/multinomial.cpp b/src/frontends/onnx/frontend/src/op/multinomial.cpp new file mode 100644 index 00000000000000..16de91dc826acf --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/multinomial.cpp @@ -0,0 +1,53 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "op/multinomial.hpp" + +#include "exceptions.hpp" +#include "openvino/op/multinomial.hpp" +#include "utils/common.hpp" + +using namespace ov::op; +using ::ONNX_NAMESPACE::TensorProto_DataType; +namespace ov { +namespace frontend { +namespace onnx { +namespace op { +namespace set_1 { + +ov::OutputVector multinomial(const ov::frontend::onnx::Node& node) { + const auto input = node.get_ov_inputs().at(0); + + const auto sample_size = node.get_attribute_as_constant("sample_size", 1); + + const auto dtype = + node.get_attribute_value("dtype", + static_cast(TensorProto_DataType::TensorProto_DataType_INT32)); + const auto seed = node.get_attribute_value("seed", 0.0f); + const auto target_type = common::get_ov_element_type(dtype); + const uint64_t global_seed = 0; + // OpenVINO supports only uint64 seeds with a meaningful 0 value (seed will be auto-generated). + // Because we use a seed as a just meaningful identifier we may + // just interpret its value as a 32-bit value (float zero value is same with + // uint32 zero value). + // Float -0 value will be interpreted as a valid uint32 value. + const void* seed_ptr = &seed; // To prevent strict-aliasing error + const uint64_t seed_uint64 = *static_cast(seed_ptr); + + auto multinomial_op = std::make_shared(input, + sample_size, + target_type, + true, + true, + seed_uint64, + global_seed); + + return {multinomial_op}; +} + +} // namespace set_1 +} // namespace op +} // namespace onnx +} // namespace frontend +} // namespace ov diff --git a/src/frontends/onnx/frontend/src/op/multinomial.hpp b/src/frontends/onnx/frontend/src/op/multinomial.hpp new file mode 100644 index 00000000000000..81e9b31531cdc3 --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/multinomial.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "core/node.hpp" + +namespace ov { +namespace frontend { +namespace onnx { +namespace op { +namespace set_1 { +ov::OutputVector multinomial(const ov::frontend::onnx::Node& node); +} // namespace set_1 +} // namespace op +} // namespace onnx +} // namespace frontend +} // namespace ov diff --git a/src/frontends/onnx/frontend/src/ops_bridge.cpp b/src/frontends/onnx/frontend/src/ops_bridge.cpp index 5ab8a792c1fefa..cbc26d22849e42 100644 --- a/src/frontends/onnx/frontend/src/ops_bridge.cpp +++ b/src/frontends/onnx/frontend/src/ops_bridge.cpp @@ -119,6 +119,7 @@ #include "op/mmdeploy_roi_align_rotated.hpp" #include "op/mod.hpp" #include "op/mul.hpp" +#include "op/multinomial.hpp" #include "op/neg.hpp" #include "op/nms_rotated.hpp" #include "op/non_max_suppression.hpp" @@ -462,6 +463,7 @@ OperatorsBridge::OperatorsBridge() { REGISTER_OPERATOR("Mod", 1, mod); REGISTER_OPERATOR("Mul", 1, mul); REGISTER_OPERATOR("Mul", 7, mul); + REGISTER_OPERATOR("Multinomial", 1, multinomial) REGISTER_OPERATOR("Neg", 1, neg); REGISTER_OPERATOR("NonMaxSuppression", 1, non_max_suppression); REGISTER_OPERATOR("NonZero", 1, non_zero); diff --git a/src/frontends/onnx/tests/models/multinomial.prototxt b/src/frontends/onnx/tests/models/multinomial.prototxt new file mode 100644 index 00000000000000..f2bf4e6cc551f3 --- /dev/null +++ b/src/frontends/onnx/tests/models/multinomial.prototxt @@ -0,0 +1,55 @@ +ir_version: 7 +producer_name: "OpenVINO ONNX Frontend" +graph { + node { + input: "input" + output: "output" + op_type: "Multinomial" + attribute { + name: "sample_size" + i: 5 + type: INT + } + attribute { + name: "seed" + f: 1.23 + type: FLOAT + } + } + name: "test_multinomial" + input { + name: "input" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 3 + } + } + } + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 7 + shape { + dim { + dim_value: -1 + } + dim { + dim_value: -1 + } + } + } + } + } +} +opset_import { + version: 7 +} diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp index 59a53c0016eb1a..1d0d9c488bf997 100644 --- a/src/frontends/onnx/tests/onnx_import.in.cpp +++ b/src/frontends/onnx/tests/onnx_import.in.cpp @@ -6718,3 +6718,28 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_min_20_boolean) { test_case.add_expected_output(expected_output); test_case.run(); } + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_multinomial_7) { + auto model = convert_model("multinomial.onnx"); + + auto test_case = ov::test::TestCase(model, s_device); + + auto expected_shape = Shape{3, 5}; + EXPECT_EQ(model->get_output_shape(0), expected_shape); + + std::vector input_values = {0.1f, 0.2f, 0.7f, 0.2f, 0.4f, 0.4f, 1.0f, 0.0f, 0.0f}; + test_case.add_input(ov::Shape{3, 3}, input_values); + + // Values are collected for seed 1.23 + if (std::string("${BACKEND_NAME}") == std::string("INTERPRETER")) { + test_case.add_expected_output(Shape{3, 5}, {0, 2, 0, 1, 1, 2, 1, 2, 1, 2, 0, 1, 0, 0, 0}); + } else if (std::string("${BACKEND_NAME}") == std::string("IE_CPU")) { + test_case.add_expected_output(Shape{3, 5}, {2, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 1, 0, 1, 0}); + } else if (std::string("${BACKEND_NAME}") == std::string("IE_GPU")) { + test_case.add_expected_output(Shape{3, 5}, {1, 0, 0, 1, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0}); + } else { + GTEST_FAIL(); + } + + test_case.run(); +} diff --git a/src/frontends/tensorflow/README.md b/src/frontends/tensorflow/README.md index 0fb2a29823ae6f..13db207ca6c6c9 100644 --- a/src/frontends/tensorflow/README.md +++ b/src/frontends/tensorflow/README.md @@ -31,7 +31,6 @@ flowchart BT ``` The MO tool and model conversion API now use the TensorFlow Frontend as the default path for conversion to IR. -Known limitations of TF FE are described [here](https://docs.openvino.ai/nightly/openvino_docs_MO_DG_TensorFlow_Frontend.html). ## Key contacts diff --git a/src/frontends/tensorflow/docs/supported_ops.md b/src/frontends/tensorflow/docs/supported_ops.md index 4e171257af1f74..ff01473f969c0d 100644 --- a/src/frontends/tensorflow/docs/supported_ops.md +++ b/src/frontends/tensorflow/docs/supported_ops.md @@ -22,11 +22,11 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | AdjustContrast | NO | | | AdjustContrastv2 | YES | | | AdjustHue | NO | | -| AdjustSaturation | NO | | +| AdjustSaturation | YES | | | All | YES | | | AllCandidateSampler | NO | | | AllToAll | NO | | -| AngleNEW | YES | | +| Angle | YES | | | AnonymousHashTable | NO | | | AnonymousIterator | NO | | | AnonymousIteratorV2 | NO | | @@ -57,7 +57,7 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | ApplyProximalGradientDescent | NO | | | ApplyRMSProp | NO | | | ApproxTopK | NO | | -| ApproximateEqualNEW | YES | | +| ApproximateEqual | YES | | | ArgMax | YES | | | ArgMin | YES | | | AsString | NO | | @@ -139,7 +139,7 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | BiasAdd | YES | | | BiasAddGrad | NO | | | BiasAddV1 | NO | | -| BincountNEW | YES | | +| Bincount | YES | | | Bitcast | NO | | | BitwiseAnd | YES | | | BitwiseOr | YES | | @@ -629,7 +629,7 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | MatrixDiagV2 | NO | | | MatrixDiagV3 | NO | | | MatrixExponential | NO | | -| MatrixInverse | YES | | +| MatrixInverseNEW | YES | | | MatrixLogarithm | NO | | | MatrixSetDiag | NO | | | MatrixSetDiagV2 | NO | | @@ -838,8 +838,8 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | RaggedGather | NO | | | RaggedRange | NO | | | RaggedTensorFromVariant | NO | | -| RaggedTensorToSparseNEW | YES | openvino-tokenizers required | -| RaggedTensorToTensorNEW | YES | openvino-tokenizers required | +| RaggedTensorToSparse | YES | openvino-tokenizers required | +| RaggedTensorToTensor | YES | openvino-tokenizers required | | RaggedTensorToVariant | NO | | | RaggedTensorToVariantGradient | NO | | | RandomCrop | NO | | @@ -988,7 +988,7 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | ReverseV2 | YES | | | RewriteDataset | NO | | | RightShift | NO | | -| Rint | YES | | +| RintNEW | YES | | | RngReadAndSkip | NO | | | RngSkip | NO | | | Roll | YES | | @@ -1209,7 +1209,7 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | StatelessTruncatedNormalV2 | NO | | | StatelessWhile | YES | | | StaticRegexFullMatch | YES | | -| StaticRegexReplaceNEW | YES | openvino-tokenizers required | +| StaticRegexReplace | YES | openvino-tokenizers required | | StatsAggregatorHandle | NO | | | StatsAggregatorHandleV2 | NO | | | StatsAggregatorSetSummaryWriter | NO | | @@ -1221,13 +1221,13 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | StringFormat | NO | | | StringJoin | YES | | | StringLength | NO | | -| StringLowerNEW | YES | openvino-tokenizers required | +| StringLower | YES | openvino-tokenizers required | | StringNGrams | NO | | | StringSplit | NO | | -| StringSplitV2NEW | YES | openvino-tokenizers required | +| StringSplitV2 | YES | openvino-tokenizers required | | StringStrip | NO | | | StringToHashBucket | NO | | -| StringToHashBucketFast | YES | openvino-tokenizers required | +| StringToHashBucketFastNEW | YES | openvino-tokenizers required | | StringToHashBucketStrong | NO | | | StringToNumber | NO | | | StringUpper | NO | | diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index 80e5b710a4fb79..fc311c9b7bd82e 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -205,6 +205,7 @@ const std::map get_supported_ops() { // Separate translators: {"AddN", CreatorFunction(translate_add_n_op)}, {"AdjustContrastv2", CreatorFunction(translate_adjust_contrast_op)}, + {"AdjustSaturation", CreatorFunction(translate_adjust_saturation_op)}, {"Angle", CreatorFunction(translate_angle_op)}, {"ArgMax", CreatorFunction(translate_arg_max_op)}, {"ArgMin", CreatorFunction(translate_arg_min_op)}, diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index 08bf463807af57..264df2d9a0cb80 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -35,6 +35,7 @@ OP_CONVERTER(translate_addv2_op); OP_CONVERTER(translate_add_n_op); OP_CONVERTER(translate_approximate_equal_op); OP_CONVERTER(translate_adjust_contrast_op); +OP_CONVERTER(translate_adjust_saturation_op); OP_CONVERTER(translate_angle_op); OP_CONVERTER(translate_arg_max_op); OP_CONVERTER(translate_arg_min_op); diff --git a/src/frontends/tensorflow_common/src/op/adjust_saturation.cpp b/src/frontends/tensorflow_common/src/op/adjust_saturation.cpp new file mode 100644 index 00000000000000..620ca2f8bb966a --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/adjust_saturation.cpp @@ -0,0 +1,206 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/op/abs.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/clamp.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/floor.hpp" +#include "openvino/op/floor_mod.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/greater.hpp" +#include "openvino/op/less.hpp" +#include "openvino/op/maximum.hpp" +#include "openvino/op/minimum.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/reduce_max.hpp" +#include "openvino/op/reduce_mean.hpp" +#include "openvino/op/reduce_min.hpp" +#include "openvino/op/select.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/split.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/op/subtract.hpp" +#include "openvino/op/unsqueeze.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +shared_ptr, shared_ptr, shared_ptr>> convert_rgb_to_hsv( + const shared_ptr& images, + element::Type type) { + // image format conversion based on + // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/image/adjust_saturation_op.cc + + auto const_zero_f_ = make_shared(type, Shape{}, 0.0f); + auto const_one_f_ = make_shared(type, Shape{}, 1.0f); + auto const_six_f_ = make_shared(type, Shape{}, 6.0f); + + // Find max and min across channel axis. Max = Value (V) + auto const_minus_one_i_1 = make_shared(element::i32, Shape{1}, -1); + auto max_rgb = make_shared(images, const_minus_one_i_1, true); + auto min_rgb = make_shared(images, const_minus_one_i_1, true); + + auto range = make_shared(max_rgb, min_rgb); + auto vv = max_rgb; + + // compute Saturation (S) + auto ss_ = make_shared(range, vv); + auto ss = make_shared(make_shared(vv, const_zero_f_), ss_, const_zero_f_); + + // compute normalization factor (for Hue calculation) + auto norm = make_shared(const_one_f_, make_shared(const_six_f_, range)); + + // Split the image tensor into R, G, B channels + auto const_minus_one_i = make_shared(element::i32, Shape{}, -1); + auto channels = make_shared(images, const_minus_one_i, 3); + + auto r = channels->output(0); + auto g = channels->output(1); + auto b = channels->output(2); + + // compute Hue (H) + // determine which component is the max (V) to compute Hue (H) + auto r_eq_v = make_shared(r, vv); + auto g_eq_v = make_shared(g, vv); + + // r == vv: hh = norm * (g - b) + auto hue_case_r = make_shared(norm, make_shared(g, b)); + + // g == vv: hh = norm * (b - r) + 2.0 / 6.0 + auto const_2_by_6 = make_shared(type, Shape{}, 2.0f / 6.0f); + auto hue_case_g = + make_shared(make_shared(norm, make_shared(b, r)), const_2_by_6); + + // b == vv: hh = norm * (r - g) + 4.0 / 6.0 + auto const_4_by_6 = make_shared(type, Shape{}, 4.0f / 6.0f); + auto hue_case_b = + make_shared(make_shared(norm, make_shared(r, g)), const_4_by_6); + + // select hue based on the maximum component + // check if `r` is the max, otherwise check if `g` is the max, if not use `b`'s hue + auto hh = make_shared(r_eq_v, + hue_case_r, // Use hue_case_r if r is max + make_shared(g_eq_v, + hue_case_g, // Use hue_case_g if g is max + hue_case_b // Use hue_case_b otherwise (b is max) + )); + + // range = 0.0: hh = 0 + auto hh_zero_range = make_shared(make_shared(range, const_zero_f_), const_zero_f_, hh); + + // hh < 0.0: hh = hh + 1 + auto hh_final = make_shared(make_shared(hh, const_zero_f_), + make_shared(hh_zero_range, const_one_f_), + hh_zero_range); + + return make_shared, shared_ptr, shared_ptr>>(hh_final, ss, vv); +} + +shared_ptr hsv_to_rgb(const shared_ptr& h, + const shared_ptr& s, + const shared_ptr& v, + element::Type type) { + // image format conversion based on + // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/image/adjust_saturation_op.cc + auto const_six_f_ = make_shared(type, Shape{}, 6.0f); + auto const_two_f_ = make_shared(type, Shape{}, 2.0f); + auto const_one_f_ = make_shared(type, Shape{}, 1.0f); + auto const_zero_f_ = make_shared(type, Shape{}, 0.0f); + + auto const_minus_one_i_ = make_shared(element::i32, Shape{}, -1); + auto const_minus_two_i_ = make_shared(element::i32, Shape{}, -2); + + // c = s * v; + auto c = make_shared(s, v); + // m = v - c; + auto m = make_shared(v, c); + // dh = h * 6; + auto dh = make_shared(h, const_six_f_); + + // fmodu rounded to within [0, 2) + auto fmodu = make_shared(dh, const_two_f_); + + // x = c * (1 - std::abs(fmodu - 1)); + auto x = make_shared( + c, + make_shared(const_one_f_, make_shared(make_shared(fmodu, const_one_f_)))); + + // h_category: [batch_dims..., H, W, 1] + auto h_category = make_shared(make_shared(dh), element::i32); + + auto zeros = make_shared(const_zero_f_, make_shared(x)); + + auto rr_options = NodeVector{c, x, zeros, zeros, x, c}; + auto gg_options = NodeVector{x, c, c, x, zeros, zeros}; + auto bb_options = NodeVector{zeros, zeros, x, c, c, x}; + + // rr_concat: [batch_dims..., H, W, 6] + auto rr_concat = make_shared(rr_options, -1); + auto gg_concat = make_shared(gg_options, -1); + auto bb_concat = make_shared(bb_options, -1); + + // rr_unsqueeze: [batch_dims..., H, W, 6, 1] + auto rr_unsqueeze = make_shared(rr_concat, const_minus_one_i_); + auto gg_unsqueeze = make_shared(gg_concat, const_minus_one_i_); + auto bb_unsqueeze = make_shared(bb_concat, const_minus_one_i_); + + // rgb_options: [batch_dims..., H, W, 6, 3] + auto rgb_options = make_shared(NodeVector{rr_unsqueeze, gg_unsqueeze, bb_unsqueeze}, -1); + + // use a gather operation to select the correct channel values based on h_category + // rgb: [batch_dims..., H, W, 3] + // int batch_dim = rgb_options->get_shape().size() - 2; + int batch_dim = -1; + auto rgb_gather = make_shared(rgb_options, h_category, const_minus_two_i_, batch_dim); + auto rgb = make_shared(rgb_gather, const_minus_two_i_); + + auto rgb_adjust = make_shared(rgb, m); + + // return concatenated RGB + return rgb_adjust; +} + +OutputVector translate_adjust_saturation_op(const NodeContext& node) { + default_op_checks(node, 2, {"AdjustSaturation"}); + auto images = node.get_input(0); + auto scale = node.get_input(1); + auto node_name = node.get_name(); + + auto type = images.get_element_type(); + + auto hsv_components = convert_rgb_to_hsv(images.get_node_shared_ptr(), type); + auto hh = get<0>(*hsv_components); + auto ss = get<1>(*hsv_components); + auto vv = get<2>(*hsv_components); + + scale = make_shared(scale, images); + + auto ss_adjust = make_shared(make_shared(ss, scale), 0.0f, 1.0f); + + auto new_images = hsv_to_rgb(hh, ss_adjust, vv, type); + + auto adjust_saturation = new_images->output(0); + + set_node_name(node_name, adjust_saturation.get_node_shared_ptr()); + return {adjust_saturation}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp index 8c5bae65b7e372..596cb651763d57 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp @@ -9,6 +9,7 @@ #include #include "openvino/core/runtime_attribute.hpp" +#include "openvino/core/type/element_type.hpp" #include "openvino/frontend/tensorflow_lite/visibility.hpp" namespace ov { @@ -30,12 +31,16 @@ class SparsityInfo : public ov::RuntimeAttribute { const std::vector& traversal_order, const std::vector& block_map, const std::vector& dim_format, - const std::vector& data_desc) + const std::vector& data_desc, + const ov::element::Type target_type, + const uint8_t* values) : m_shape(shape), m_traversal_order(traversal_order), m_block_map(block_map), m_dim_format(dim_format), - m_data_desc(data_desc) { + m_data_desc(data_desc), + m_target_type(target_type), + m_values(values) { enable(); } @@ -72,6 +77,21 @@ class SparsityInfo : public ov::RuntimeAttribute { void set_data_desc(const std::vector& data_desc) { m_data_desc = data_desc; } + const ov::element::Type get_target_type() const { + return m_target_type; + } + + void set_target_type(const ov::element::Type target_type) { + m_target_type = target_type; + } + + const uint8_t* get_values() const { + return m_values; + } + + void set_values(const uint8_t* values) { + m_values = values; + } bool is_disabled() const { return m_disabled; } @@ -100,6 +120,8 @@ class SparsityInfo : public ov::RuntimeAttribute { std::vector m_dim_format; // List of dimension's format std::vector m_data_desc; // Tensor data descriptors std::vector m_data; // Dense data + ov::element::Type m_target_type; // Target type + const uint8_t* m_values; // Sparse values bool m_disabled; // Unpack sparse tensor and return pointer on unpacked data diff --git a/src/frontends/tensorflow_lite/src/decoder_flatbuffer.cpp b/src/frontends/tensorflow_lite/src/decoder_flatbuffer.cpp index c5477b8b93aeea..15cba54e54e659 100644 --- a/src/frontends/tensorflow_lite/src/decoder_flatbuffer.cpp +++ b/src/frontends/tensorflow_lite/src/decoder_flatbuffer.cpp @@ -20,14 +20,18 @@ namespace { TensorMetaInfo extract_tensor_meta_info(const TensorInfo& tensor_info) { TensorMetaInfo tensor_meta_info; const auto tensor = tensor_info.tensor; + const uint8_t* tensor_data = + (tensor_info.buffer && tensor_info.buffer->data() ? tensor_info.buffer->data()->data() : nullptr); tensor_meta_info.m_partial_shape = ov::frontend::tensorflow_lite::get_ov_shape(tensor->shape(), tensor->shape_signature()); tensor_meta_info.m_element_type = ov::frontend::tensorflow_lite::get_ov_type(tensor->type()); tensor_meta_info.m_quantization_info = ov::frontend::tensorflow_lite::get_quantization(tensor->quantization()); - tensor_meta_info.m_sparsity_info = ov::frontend::tensorflow_lite::get_sparsity(tensor->shape(), tensor->sparsity()); - tensor_meta_info.m_tensor_data = - (tensor_info.buffer && tensor_info.buffer->data() ? tensor_info.buffer->data()->data() : nullptr); + tensor_meta_info.m_sparsity_info = ov::frontend::tensorflow_lite::get_sparsity(tensor->shape(), + tensor->sparsity(), + tensor_meta_info.m_element_type, + tensor_data); + tensor_meta_info.m_tensor_data = tensor_data; tensor_meta_info.m_tensor_name = tensor->name()->str(); return tensor_meta_info; @@ -108,6 +112,8 @@ std::shared_ptr DecoderFlatBuffe const ov::frontend::InputModel& model) const { const auto tensor = tensor_info.tensor; std::vector names = {tensor->name()->str()}; + const uint8_t* tensor_data = + (tensor_info.buffer && tensor_info.buffer->data() ? tensor_info.buffer->data()->data() : nullptr); return std::make_shared( model, @@ -115,8 +121,11 @@ std::shared_ptr DecoderFlatBuffe ov::frontend::tensorflow_lite::get_ov_type(tensor->type()), names, ov::frontend::tensorflow_lite::get_quantization(tensor->quantization()), - ov::frontend::tensorflow_lite::get_sparsity(tensor->shape(), tensor->sparsity()), - (tensor_info.buffer && tensor_info.buffer->data() ? tensor_info.buffer->data()->data() : nullptr)); + ov::frontend::tensorflow_lite::get_sparsity(tensor->shape(), + tensor->sparsity(), + ov::frontend::tensorflow_lite::get_ov_type(tensor->type()), + tensor_data), + tensor_data); } ov::Any get_value_as_ov_any(const flexbuffers::Reference& value) { diff --git a/src/frontends/tensorflow_lite/src/sparsity_info.cpp b/src/frontends/tensorflow_lite/src/sparsity_info.cpp index 68a19ba2be02de..0dbb4f59381b8d 100644 --- a/src/frontends/tensorflow_lite/src/sparsity_info.cpp +++ b/src/frontends/tensorflow_lite/src/sparsity_info.cpp @@ -14,22 +14,27 @@ bool ov::frontend::tensorflow_lite::SparsityInfo::is_copyable() const { template static void read_sparse_data(uint8_t* dest, uint8_t* dest_end, + const uint8_t* values, const size_t row_size, + const size_t element_size, const ::flatbuffers::Vector* indices, const ::flatbuffers::Vector* segments) { - uint8_t* data = dest - row_size; // row size will be increased at first step - T last_idx = ~static_cast(0); - for (auto idx = indices->begin(); idx != indices->end(); ++idx) { - if (*idx <= last_idx) { - data += row_size; + U last_segment = *segments->begin(); + size_t idx = 0; + for (auto segment = segments->begin() + 1; segment != segments->end(); last_segment = *segment, ++segment) { + FRONT_END_GENERAL_CHECK(dest < dest_end, "Dense data is out of bounds"); + size_t element_count = *segment - last_segment; + for (size_t i = 0; i < element_count; ++i, ++idx) { + auto row_offset = (*indices)[static_cast(idx)] * element_size; + auto value_offset = idx * element_size; + memcpy(static_cast(static_cast(dest)) + row_offset, values + value_offset, element_size); } - FRONT_END_GENERAL_CHECK(data + *idx < dest_end, "Dense data is out of bounds"); - static_cast(static_cast(data))[*idx] = segments->Get(*idx); - last_idx = *idx; + dest += row_size; } } void* ov::frontend::tensorflow_lite::SparsityInfo::densify() { + FRONT_END_GENERAL_CHECK(m_values, "Values are not found"); size_t sparse_idx = 0; for (; sparse_idx < m_dim_format.size(); ++sparse_idx) { if (m_dim_format[sparse_idx] == ::tflite::DimensionType_SPARSE_CSR) @@ -39,20 +44,8 @@ void* ov::frontend::tensorflow_lite::SparsityInfo::densify() { FRONT_END_GENERAL_CHECK(sparse_idx == (m_dim_format.size() - 1), "Supports only sparse tensor with sparse dimension as a last dimension"); - size_t total_size = 0, // Size of data in bytes - row_size = 0; // Size of data row in bytes - switch (m_data_desc[sparse_idx].segments_type) { - case ::tflite::SparseIndexVector_Uint8Vector: - total_size = 1; - break; - case ::tflite::SparseIndexVector_Uint16Vector: - total_size = 2; - break; - case ::tflite::SparseIndexVector_Int32Vector: - total_size = 4; - break; - } - row_size = total_size; // Byte size is same + size_t total_size = m_target_type.size(), // Size of data in bytes + row_size = total_size; // Size of data row in bytes for (size_t dim = 0; dim < m_shape.size(); ++dim) { total_size *= m_shape[dim]; switch (m_dim_format[dim]) { @@ -76,21 +69,27 @@ void* ov::frontend::tensorflow_lite::SparsityInfo::densify() { case ::tflite::SparseIndexVector_Uint8Vector: read_sparse_data(m_data.data(), m_data.data() + total_size, + m_values, row_size, + m_target_type.size(), static_cast(m_data_desc[sparse_idx].indices)->values(), static_cast(m_data_desc[sparse_idx].segments)->values()); break; case ::tflite::SparseIndexVector_Uint16Vector: read_sparse_data(m_data.data(), m_data.data() + total_size, + m_values, row_size, + m_target_type.size(), static_cast(m_data_desc[sparse_idx].indices)->values(), static_cast(m_data_desc[sparse_idx].segments)->values()); break; case ::tflite::SparseIndexVector_Int32Vector: read_sparse_data(m_data.data(), m_data.data() + total_size, + m_values, row_size, + m_target_type.size(), static_cast(m_data_desc[sparse_idx].indices)->values(), static_cast(m_data_desc[sparse_idx].segments)->values()); break; @@ -104,21 +103,27 @@ void* ov::frontend::tensorflow_lite::SparsityInfo::densify() { case ::tflite::SparseIndexVector_Uint8Vector: read_sparse_data(m_data.data(), m_data.data() + total_size, + m_values, row_size, + m_target_type.size(), static_cast(m_data_desc[sparse_idx].indices)->values(), static_cast(m_data_desc[sparse_idx].segments)->values()); break; case ::tflite::SparseIndexVector_Uint16Vector: read_sparse_data(m_data.data(), m_data.data() + total_size, + m_values, row_size, + m_target_type.size(), static_cast(m_data_desc[sparse_idx].indices)->values(), static_cast(m_data_desc[sparse_idx].segments)->values()); break; case ::tflite::SparseIndexVector_Int32Vector: read_sparse_data(m_data.data(), m_data.data() + total_size, + m_values, row_size, + m_target_type.size(), static_cast(m_data_desc[sparse_idx].indices)->values(), static_cast(m_data_desc[sparse_idx].segments)->values()); break; @@ -132,21 +137,27 @@ void* ov::frontend::tensorflow_lite::SparsityInfo::densify() { case ::tflite::SparseIndexVector_Uint8Vector: read_sparse_data(m_data.data(), m_data.data() + total_size, + m_values, row_size, + m_target_type.size(), static_cast(m_data_desc[sparse_idx].indices)->values(), static_cast(m_data_desc[sparse_idx].segments)->values()); break; case ::tflite::SparseIndexVector_Uint16Vector: read_sparse_data(m_data.data(), m_data.data() + total_size, + m_values, row_size, + m_target_type.size(), static_cast(m_data_desc[sparse_idx].indices)->values(), static_cast(m_data_desc[sparse_idx].segments)->values()); break; case ::tflite::SparseIndexVector_Int32Vector: read_sparse_data(m_data.data(), m_data.data() + total_size, + m_values, row_size, + m_target_type.size(), static_cast(m_data_desc[sparse_idx].indices)->values(), static_cast(m_data_desc[sparse_idx].segments)->values()); break; @@ -160,4 +171,4 @@ void* ov::frontend::tensorflow_lite::SparsityInfo::densify() { break; } return m_data.data(); -} \ No newline at end of file +} diff --git a/src/frontends/tensorflow_lite/src/utils.cpp b/src/frontends/tensorflow_lite/src/utils.cpp index fc753ccc2de646..7400c3a092ebc1 100644 --- a/src/frontends/tensorflow_lite/src/utils.cpp +++ b/src/frontends/tensorflow_lite/src/utils.cpp @@ -30,13 +30,17 @@ std::shared_ptr ov::frontend::t std::shared_ptr ov::frontend::tensorflow_lite::get_sparsity( const flatbuffers::Vector* tf_shape, - const tflite::SparsityParameters* tf_sparsity) { + const tflite::SparsityParameters* tf_sparsity, + const ov::element::Type target_type, + const uint8_t* buffer) { if (tf_shape == nullptr) return {}; if (tf_sparsity == nullptr) return {}; auto sparsity = std::make_shared(); sparsity->set_shape({tf_shape->begin(), tf_shape->end()}); + sparsity->set_values(buffer); + sparsity->set_target_type(target_type); if (tf_sparsity->traversal_order() != nullptr) sparsity->set_traversal_order({tf_sparsity->traversal_order()->begin(), tf_sparsity->traversal_order()->end()}); if (tf_sparsity->block_map() != nullptr) diff --git a/src/frontends/tensorflow_lite/src/utils.hpp b/src/frontends/tensorflow_lite/src/utils.hpp index 126a080111d0f5..b531e9f22d6d11 100644 --- a/src/frontends/tensorflow_lite/src/utils.hpp +++ b/src/frontends/tensorflow_lite/src/utils.hpp @@ -23,7 +23,9 @@ std::shared_ptr get_quantization(const tflite::QuantizationPar void apply_quantization(ov::Output& output, ov::element::Type type); void dequantize_inputs(OutputVector& deq_inputs); std::shared_ptr get_sparsity(const flatbuffers::Vector* tf_shape, - const tflite::SparsityParameters* tf_sparsity); + const tflite::SparsityParameters* tf_sparsity, + const ov::element::Type target_type, + const uint8_t* buffer); template OutputVector get_indexed_outputs(const T& outputs); diff --git a/src/frontends/tensorflow_lite/tests/convert_model.cpp b/src/frontends/tensorflow_lite/tests/convert_model.cpp index d6c05df626ef4c..c61fd0a28b318c 100644 --- a/src/frontends/tensorflow_lite/tests/convert_model.cpp +++ b/src/frontends/tensorflow_lite/tests/convert_model.cpp @@ -12,7 +12,6 @@ using TFLiteConvertModelTest = FrontEndConvertModelTest; static const std::vector models{ std::string("2in_2out/2in_2out.tflite"), - std::string("downloads/pose_detector.tflite"), }; INSTANTIATE_TEST_SUITE_P(TFLiteConvertModelTest, diff --git a/src/frontends/tensorflow_lite/tests/convert_tricky_models.cpp b/src/frontends/tensorflow_lite/tests/convert_tricky_models.cpp new file mode 100644 index 00000000000000..d00667abd32927 --- /dev/null +++ b/src/frontends/tensorflow_lite/tests/convert_tricky_models.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/file_utils.hpp" +#include "common_test_utils/ov_test_utils.hpp" +#include "common_test_utils/test_case.hpp" +#include "common_test_utils/test_control.hpp" +#include "common_test_utils/test_tools.hpp" +#include "common_test_utils/type_prop.hpp" +#include "conversion_extension.hpp" +#include "gtest/gtest.h" +#include "tf_utils.hpp" + +using namespace ov; +using namespace ov::frontend::tensorflow_lite::tests; + +static std::string s_manifest = ""; + +using Inputs = std::vector>; +using Outputs = std::vector>; + +OPENVINO_TEST(TensorFlowLiteTrickyModels, tflite_dequantize) { + auto model = convert_model("dequantize.tflite"); + + auto test_case = ov::test::TestCase(model, ov::test::utils::DEVICE_CPU); + test_case.add_input({1, 1, 1, 1}); + test_case.add_expected_output(Shape{2, 2}, {2, 1.75f, 2001, 0.876f}); + test_case.run_with_tolerance_as_fp(0.001f); +} + +OPENVINO_TEST(TensorFlowLiteTrickyModels, tflite_densify) { + auto model = convert_model("densify.tflite"); + + auto test_case = ov::test::TestCase(model, ov::test::utils::DEVICE_CPU); + test_case.add_input(Shape{1, 2, 3, 3}, {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 2, 0, 0, 0, 1}); + test_case.add_expected_output(Shape{1, 2, 2, 4}, {2, 1, 0, 0, 0, 3, 1, 0, 0, 2, 0, 0, 2, 0, 1, 0}); + test_case.run(); +} diff --git a/src/frontends/tensorflow_lite/tests/test_models/gen_scripts/generate_densify.py b/src/frontends/tensorflow_lite/tests/test_models/gen_scripts/generate_densify.py new file mode 100644 index 00000000000000..4619b5d69095b0 --- /dev/null +++ b/src/frontends/tensorflow_lite/tests/test_models/gen_scripts/generate_densify.py @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import sys + +import tensorflow as tf + +# Create the graph and model +class SampleGraph(tf.Module): + def __init__(self): + super(SampleGraph, self).__init__() + self.var1 = tf.constant([[[[0,0,1,0],[0,0,0,0],[0,2,1,0]],[[0,0,0,0],[0,1,0,0],[2,0,0,0]]]], dtype=tf.float32) + @tf.function(input_signature=[tf.TensorSpec([1,2,3,3], tf.float32)]) + def __call__(self, x): + conv = tf.raw_ops.Conv2D(input=x, filter=self.var1, strides=[1,1,1,1], padding="VALID") + return {'test_output_name': conv} + +module = SampleGraph() +sm_path = os.path.join(sys.argv[1], "densify") +tf.saved_model.save(module, sm_path) + +converter = tf.lite.TFLiteConverter.from_saved_model(sm_path) # path to the SavedModel directory +converter.target_spec.supported_ops = [ + tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops. + tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops. +] +converter.allow_custom_ops = True +converter.optimizations = [tf.lite.Optimize.DEFAULT, tf.lite.Optimize.EXPERIMENTAL_SPARSITY] +converter.target_spec.supported_types = [tf.float16] +tflite_model = converter.convert() + +# Save the model. +with open(os.path.join(sys.argv[1], sm_path + ".tflite"), 'wb') as f: + f.write(tflite_model) \ No newline at end of file diff --git a/src/frontends/tensorflow_lite/tests/test_models/gen_scripts/generate_dequantize.py b/src/frontends/tensorflow_lite/tests/test_models/gen_scripts/generate_dequantize.py new file mode 100644 index 00000000000000..15e12ac73053ca --- /dev/null +++ b/src/frontends/tensorflow_lite/tests/test_models/gen_scripts/generate_dequantize.py @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import sys + +import tensorflow as tf + +# Create the graph and model +class SampleGraph(tf.Module): + def __init__(self): + super(SampleGraph, self).__init__() + self.var1 = tf.constant([[1, 0.75],[2000.43, -0.12345]], dtype=tf.float32) + @tf.function(input_signature=[tf.TensorSpec([2,2], tf.float32)]) + def __call__(self, x): + res = self.var1 + x + return {'test_output_name': res} + +module = SampleGraph() +sm_path = os.path.join(sys.argv[1], "dequantize") +tf.saved_model.save(module, sm_path) + +converter = tf.lite.TFLiteConverter.from_saved_model(sm_path) # path to the SavedModel directory +converter.target_spec.supported_ops = [ + tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops. + tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops. +] +converter.allow_custom_ops = True +converter.optimizations = [tf.lite.Optimize.DEFAULT] +converter.target_spec.supported_types = [tf.float16] +tflite_model = converter.convert() + +# Save the model. +with open(os.path.join(sys.argv[1], sm_path + ".tflite"), 'wb') as f: + f.write(tflite_model) \ No newline at end of file diff --git a/src/frontends/tensorflow_lite/tests/test_models/gen_scripts/generate_pose_detector.py b/src/frontends/tensorflow_lite/tests/test_models/gen_scripts/generate_pose_detector.py deleted file mode 100644 index f69319c2917ae3..00000000000000 --- a/src/frontends/tensorflow_lite/tests/test_models/gen_scripts/generate_pose_detector.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (C) 2018-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# This is a way to test Densify operation. Should be removed after enabling Layer-based test - -import numpy as np -import os -import sys -import requests -import zipfile - -src_url = "https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_lite/float16/latest/pose_landmarker_lite.task" -path_to_model_dir = os.path.join(sys.argv[1], "downloads") -tflite_file_name = 'pose_detector.tflite' -tflite_model_path = os.path.join(path_to_model_dir, tflite_file_name) -task_file_name = 'pose_landmarker_lite.task' -task_file_path = os.path.join(path_to_model_dir, task_file_name) -if not os.path.exists(path_to_model_dir): - os.makedirs(path_to_model_dir, exist_ok=True) -if not os.path.exists(tflite_model_path): - if not os.path.exists(task_file_path): - try: - response = requests.get(src_url, allow_redirects=True, verify=False) - open(task_file_path, 'wb').write(response.content) - except: - print(f"ERROR: Cannot download model archive, try to download it manually from {src_url} and place to {task_file_path}") - exit(2) - with zipfile.ZipFile(task_file_path, "r") as f: - f.extract(tflite_file_name, path_to_model_dir) - diff --git a/src/frontends/tensorflow_lite/tests/tf_utils.cpp b/src/frontends/tensorflow_lite/tests/tf_utils.cpp new file mode 100644 index 00000000000000..6190b68fa1f7b4 --- /dev/null +++ b/src/frontends/tensorflow_lite/tests/tf_utils.cpp @@ -0,0 +1,73 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "tf_utils.hpp" + +#include +#include + +#include "utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::frontend; + +// For compatibility purposes, need to remove when will be unused +const std::string TF_LITE_FE = "tflite"; + +namespace ov { +namespace frontend { +namespace tensorflow_lite { +namespace tests { + +const std::string TF_LITE_FE = ::TF_LITE_FE; + +static FrontEnd::Ptr get_tflite_frontend(bool default_front_end = true) { + static FrontEnd::Ptr _front_end = nullptr; + + FrontEnd::Ptr front_end = nullptr; + + if (default_front_end) { + if (_front_end == nullptr) { + auto fem = FrontEndManager(); + _front_end = fem.load_by_framework(TF_LITE_FE); + } + front_end = _front_end; + } else { + auto fem = FrontEndManager(); + front_end = fem.load_by_framework(TF_LITE_FE); + } + + if (!front_end) { + throw "TensorFlow Lite FrontEnd is not initialized"; + } + + return front_end; +} + +shared_ptr convert_model(const string& model_path, const ov::frontend::ConversionExtensionBase::Ptr& conv_ext) { + auto front_end = get_tflite_frontend(conv_ext == nullptr); + + if (conv_ext) { + front_end->add_extension(conv_ext); + } + + auto full_path = FrontEndTestUtils::make_model_path(string(TEST_TENSORFLOW_LITE_MODELS_DIRNAME) + model_path); + InputModel::Ptr input_model = front_end->load(full_path); + if (!input_model) { + throw "Input Model is not loaded"; + } + + shared_ptr model = front_end->convert(input_model); + if (!model) { + throw "Model is not converted"; + } + + return model; +} + +} // namespace tests +} // namespace tensorflow_lite +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_lite/tests/tf_utils.hpp b/src/frontends/tensorflow_lite/tests/tf_utils.hpp index 26b3c15fe405b7..13473c61a5c15b 100644 --- a/src/frontends/tensorflow_lite/tests/tf_utils.hpp +++ b/src/frontends/tensorflow_lite/tests/tf_utils.hpp @@ -6,4 +6,24 @@ #include -static const std::string TF_LITE_FE = "tflite"; +#include "openvino/core/model.hpp" +#include "openvino/frontend/extension.hpp" +#include "openvino/frontend/manager.hpp" + +namespace ov { +namespace frontend { +namespace tensorflow_lite { +namespace tests { + +extern const std::string TF_LITE_FE; + +// A wrapper to create TensorFlow Lite Frontend and configure the conversion pipeline +std::shared_ptr convert_model(const std::string& model_path, + const ov::frontend::ConversionExtensionBase::Ptr& conv_ext = nullptr); +} // namespace tests +} // namespace tensorflow_lite +} // namespace frontend +} // namespace ov + +// For compatibility purposes, need to remove when will be unused +extern const std::string TF_LITE_FE; diff --git a/src/inference/src/cache_manager.hpp b/src/inference/src/cache_manager.hpp index 1fe42e8b654479..b14fe2abe18a7e 100644 --- a/src/inference/src/cache_manager.hpp +++ b/src/inference/src/cache_manager.hpp @@ -98,10 +98,15 @@ class ICacheManager { */ class FileStorageCacheManager final : public ICacheManager { std::string m_cachePath; - +#if defined(_WIN32) && defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) + std::wstring getBlobFile(const std::string& blobHash) const { + return ov::util::string_to_wstring(ov::util::make_path(m_cachePath, blobHash + ".blob")); + } +#else std::string getBlobFile(const std::string& blobHash) const { return ov::util::make_path(m_cachePath, blobHash + ".blob"); } +#endif public: /** @@ -120,7 +125,6 @@ class FileStorageCacheManager final : public ICacheManager { void write_cache_entry(const std::string& id, StreamWriter writer) override { // Fix the bug caused by pugixml, which may return unexpected results if the locale is different from "C". ScopedLocale plocal_C(LC_ALL, "C"); - std::ofstream stream(getBlobFile(id), std::ios_base::binary | std::ofstream::out); writer(stream); } @@ -137,8 +141,14 @@ class FileStorageCacheManager final : public ICacheManager { void remove_cache_entry(const std::string& id) override { auto blobFileName = getBlobFile(id); - if (ov::util::file_exists(blobFileName)) + + if (ov::util::file_exists(blobFileName)) { +#if defined(_WIN32) && defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) + _wremove(blobFileName.c_str()); +#else std::remove(blobFileName.c_str()); +#endif + } } }; diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index c1d5a6f484d016..637a5e45596357 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -1555,7 +1555,11 @@ ov::CoreImpl::CoreConfig::CacheConfig ov::CoreImpl::CoreConfig::CacheConfig::cre std::shared_ptr cache_manager = nullptr; if (!dir.empty()) { +#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT + ov::util::create_directory_recursive(ov::util::string_to_wstring(dir)); +#else ov::util::create_directory_recursive(dir); +#endif cache_manager = std::make_shared(dir); } diff --git a/src/plugins/auto/tests/unit/auto_unit_test.cpp b/src/plugins/auto/tests/unit/auto_unit_test.cpp index 24b15eee691a2c..b2331aa4192deb 100644 --- a/src/plugins/auto/tests/unit/auto_unit_test.cpp +++ b/src/plugins/auto/tests/unit/auto_unit_test.cpp @@ -78,13 +78,17 @@ ov::mock_auto_plugin::tests::BaseTest::BaseTest(const MODELTYPE modelType) { ON_CALL(*mockIExeNetActual.get(), outputs()).WillByDefault(ReturnRefOfCopy(model->outputs())); inferReqInternal = std::make_shared(mockIExeNet); - ON_CALL(*mockIExeNet.get(), create_sync_infer_request()).WillByDefault(Return(inferReqInternal)); + ON_CALL(*mockIExeNet.get(), create_sync_infer_request()).WillByDefault([this]() { + return inferReqInternal; + }); optimalNum = (uint32_t)1; ON_CALL(*mockIExeNet.get(), get_property(StrEq(ov::optimal_number_of_infer_requests.name()))) .WillByDefault(Return(optimalNum)); inferReqInternalActual = std::make_shared(mockIExeNetActual); - ON_CALL(*mockIExeNetActual.get(), create_sync_infer_request()).WillByDefault(Return(inferReqInternalActual)); + ON_CALL(*mockIExeNetActual.get(), create_sync_infer_request()).WillByDefault([this]() { + return inferReqInternalActual; + }); ON_CALL(*mockIExeNetActual.get(), get_property(StrEq(ov::optimal_number_of_infer_requests.name()))) .WillByDefault(Return(optimalNum)); ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([this]() { @@ -138,6 +142,7 @@ ov::mock_auto_plugin::tests::BaseTest::~BaseTest() { inferReqInternalActual.reset(); mock_plugin_cpu.reset(); mock_plugin_gpu.reset(); + plugin->get_executor_manager()->clear(); plugin.reset(); } diff --git a/src/plugins/auto/tests/unit/runtime_fallback_test.cpp b/src/plugins/auto/tests/unit/runtime_fallback_test.cpp index f85b32760d2cec..15d46c2e9f0a22 100644 --- a/src/plugins/auto/tests/unit/runtime_fallback_test.cpp +++ b/src/plugins/auto/tests/unit/runtime_fallback_test.cpp @@ -175,7 +175,9 @@ TEST_P(AutoRuntimeFallback, releaseResource) { mockExecutor, nullptr, ifThrow); - ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault(Return(mockInferrequest)); + ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([this]() { + return mockInferrequest; + }); } else if (deviceName == "GPU.0") { mockInferrequestGPU_0 = std::make_shared(inferReqInternalActual, @@ -328,7 +330,9 @@ TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) { mockExecutor, nullptr, ifThrow); - ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault(Return(mockInferrequest)); + ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault([this]() { + return mockInferrequest; + }); } else if (deviceName == "GPU.0") { mockInferrequestGPU_0 = std::make_shared(inferReqInternalActual, diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp index d6b0a0fe8f5b72..4d9b807dee79ad 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp @@ -97,10 +97,8 @@ CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::aarch64::cpu_isa_t host_isa) // control flow jitters[snippets::op::KernelStatic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_kernel_static_emitter); jitters[snippets::op::KernelDynamic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_kernel_dynamic_emitter); - jitters[snippets::op::LoopBeginStatic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_loop_begin_static_emitter); - jitters[snippets::op::LoopBeginDynamic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_loop_begin_dynamic_emitter); - jitters[snippets::op::LoopEndStatic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_loop_end_static_emitter); - jitters[snippets::op::LoopEndDynamic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_loop_end_dynamic_emitter); + jitters[snippets::op::LoopBegin::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_loop_begin_emitter); + jitters[snippets::op::LoopEnd::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_loop_end_emitter); // others jitters[snippets::op::Scalar::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_scalar_emitter); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_loop_emitters.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_loop_emitters.cpp index 6ca9af92254e72..2b5b41fb912606 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_loop_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_loop_emitters.cpp @@ -16,96 +16,39 @@ using jit_generator = dnnl::impl::cpu::aarch64::jit_generator; using cpu_isa_t = dnnl::impl::cpu::aarch64::cpu_isa_t; using ExpressionPtr = ov::snippets::lowered::ExpressionPtr; -inline static std::vector transform_idxs_to_regs(const std::vector& idxs) { - std::vector regs; - regs.resize(idxs.size(), XReg(0)); - std::transform(idxs.begin(), idxs.end(), regs.begin(), [](size_t idx){return XReg(idx);}); - return regs; -} - /* ================== jit_loop_begin_emitter ====================== */ jit_loop_begin_emitter::jit_loop_begin_emitter(dnnl::impl::cpu::aarch64::jit_generator* h, dnnl::impl::cpu::aarch64::cpu_isa_t isa, const ov::snippets::lowered::ExpressionPtr& expr) : jit_emitter(h, isa), loop_begin_label{new Xbyak_aarch64::Label()} { - in_out_type_ = emitter_in_out_map::gpr_to_gpr; -} - -std::shared_ptr jit_loop_begin_emitter::get_loop_end(const ov::snippets::lowered::ExpressionPtr& expr) { - OV_CPU_JIT_EMITTER_ASSERT(expr->get_output_port_connectors().size() == 1, "Has invalid LoopBegin expression configuration"); - const auto& consumers = expr->get_output_port_connector(0)->get_consumers(); - OV_CPU_JIT_EMITTER_ASSERT(consumers.size() == 1, "Has invalid LoopBegin expression configuration"); - const auto loop_end = ov::as_type_ptr(consumers.cbegin()->get_expr()->get_node()); - OV_CPU_JIT_EMITTER_ASSERT(loop_end != nullptr, "Has invalid LoopBegin expression configuration"); - return loop_end; -} - -jit_loop_begin_static_emitter::jit_loop_begin_static_emitter(dnnl::impl::cpu::aarch64::jit_generator* h, dnnl::impl::cpu::aarch64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr) - : jit_loop_begin_emitter(h, isa, expr) { - OV_CPU_JIT_EMITTER_ASSERT(ov::is_type(expr->get_node()), - "Expects LoopBeginStatic expression"); - const auto loop_end = ov::as_type_ptr(get_loop_end(expr)); + const auto loop_begin = ov::as_type_ptr(expr->get_node()); + OV_CPU_JIT_EMITTER_ASSERT(loop_begin, "expects LoopBegin expression"); + const auto loop_end = loop_begin->get_loop_end(); + OV_CPU_JIT_EMITTER_ASSERT(!loop_end->has_dynamic_params(), "supports only static loops!"); work_amount = loop_end->get_work_amount(); wa_increment = loop_end->get_increment(); evaluate_once = loop_end->get_evaluate_once(); + in_out_type_ = emitter_in_out_map::gpr_to_gpr; } -void jit_loop_begin_static_emitter::validate_arguments(const std::vector &in, const std::vector &out) const { +void jit_loop_begin_emitter::validate_arguments(const std::vector &in, const std::vector &out) const { OV_CPU_JIT_EMITTER_ASSERT(in.empty(), "Invalid inputs size: expected 0 got " + std::to_string(in.size())); // Note: the only expected output is work amount register (communicated to jit_loop_end_emitter) OV_CPU_JIT_EMITTER_ASSERT(out.size() == 1, "Invalid outputs size: expected 1 got " + std::to_string(out.size())); + OV_CPU_JIT_EMITTER_ASSERT(loop_begin_label != nullptr, "has not inited label!"); } -void jit_loop_begin_static_emitter::emit_impl(const std::vector& in, const std::vector& out) const { - XReg reg_work_amount = XReg(out[0]); - if (!evaluate_once) { - h->mov(reg_work_amount, work_amount); - } - h->L(*loop_begin_label); -} - -void jit_loop_begin_static_emitter::emit_code(const std::vector &in, const std::vector &out, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) const { +void jit_loop_begin_emitter::emit_code(const std::vector &in, const std::vector &out, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) const { validate_arguments(in, out); emit_impl(in, out); } -jit_loop_begin_dynamic_emitter::jit_loop_begin_dynamic_emitter(dnnl::impl::cpu::aarch64::jit_generator* h, dnnl::impl::cpu::aarch64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr) - : jit_loop_begin_emitter(h, isa, expr), loop_end_label(nullptr) { - OV_CPU_JIT_EMITTER_ASSERT(ov::is_type(expr->get_node()), "Expects LoopBeginDynamic expression"); - const auto loop_end = get_loop_end(expr); - wa_increment = loop_end->get_increment(); - loop_id = loop_end->get_id(); -} - -void jit_loop_begin_dynamic_emitter::validate_arguments(const std::vector &in, const std::vector &out) const { - // Note: the only expected input is the reg_runtime_params_idx - OV_CPU_JIT_EMITTER_ASSERT(in.empty(), "Invalid inputs size: expected 0 got " + std::to_string(in.size())); - // Note: the only expected output is work amount register (communicated to jit_loop_end_emitter) - OV_CPU_JIT_EMITTER_ASSERT(out.size() == 1, "Invalid outputs size: expected 1 got " + std::to_string(out.size())); - OV_CPU_JIT_EMITTER_ASSERT(loop_end_label != nullptr && loop_begin_label != nullptr, "Has not inited labels!"); -} - -void jit_loop_begin_dynamic_emitter::emit_code(const std::vector &in, const std::vector &out, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) const { - validate_arguments(in, out); - jit_emitter::emit_code(in, out); -} - -void jit_loop_begin_dynamic_emitter::emit_impl(const std::vector& in, const std::vector& out) const { - XReg reg_runtime_params = XReg(Operand::X0); // defined by jit_kernel_emitter +void jit_loop_begin_emitter::emit_impl(const std::vector& in, const std::vector& out) const { XReg reg_work_amount = XReg(out[0]); - XReg reg_loop_args_ptr = XReg(aux_gpr_idxs[0]); - const auto id_offset = loop_id * sizeof(jit_snippets_call_args::loop_args_t); - h->ldr(reg_loop_args_ptr, ptr(reg_runtime_params, static_cast(GET_OFF(loop_args)))); - h->ldr(reg_work_amount, ptr(reg_loop_args_ptr, static_cast(id_offset + GET_OFF_LOOP_ARGS(m_work_amount)))); - - // if wa < increment, skip the loop - h->cmp(reg_work_amount, wa_increment); - h->b(LT, *loop_end_label); - + if (!evaluate_once) { + h->mov(reg_work_amount, work_amount); + } h->L(*loop_begin_label); } @@ -118,12 +61,17 @@ jit_loop_end_emitter::jit_loop_end_emitter(dnnl::impl::cpu::aarch64::jit_generat : jit_emitter(h, isa), loop_begin_label{nullptr} { in_out_type_ = emitter_in_out_map::gpr_to_gpr; const auto loop_end = ov::as_type_ptr(expr->get_node()); - OV_CPU_JIT_EMITTER_ASSERT(loop_end != nullptr, "Expected LoopEnd expr"); - // Note that 1 edge connects LoopBegin and LoopEnd + OV_CPU_JIT_EMITTER_ASSERT(loop_end != nullptr, "expected LoopEnd expr"); + OV_CPU_JIT_EMITTER_ASSERT(!loop_end->has_dynamic_params(), "supports only static loops!"); num_inputs = loop_end->get_input_num(); num_outputs = loop_end->get_output_num(); + work_amount = loop_end->get_work_amount(); wa_increment = loop_end->get_increment(); is_incremented = loop_end->get_is_incremented(); + ptr_increments = loop_end->get_ptr_increments(); + finalization_offsets = loop_end->get_finalization_offsets(); + data_sizes = loop_end->get_element_type_sizes(); + evaluate_once = loop_end->get_evaluate_once(); const auto begin_expr = get_loop_begin_expr(expr); const auto& loop_begin_emitter = std::dynamic_pointer_cast(begin_expr->get_emitter()); @@ -138,36 +86,25 @@ ov::snippets::lowered::ExpressionPtr jit_loop_end_emitter::get_loop_begin_expr(c return begin_expr; } -jit_loop_end_static_emitter::jit_loop_end_static_emitter(dnnl::impl::cpu::aarch64::jit_generator* h, dnnl::impl::cpu::aarch64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr) - : jit_loop_end_emitter(h, isa, expr) { - const auto loop_end = ov::as_type_ptr(expr->get_node()); - OV_CPU_JIT_EMITTER_ASSERT(loop_end != nullptr, "Expected LoopEndStatic expr"); - work_amount = static_cast(loop_end->get_work_amount()); - is_incremented = loop_end->get_is_incremented(); - ptr_increments = loop_end->get_ptr_increments(); - finalization_offsets = loop_end->get_finalization_offsets(); - data_sizes = loop_end->get_element_type_sizes(); - evaluate_once = loop_end->get_evaluate_once(); -} - -void jit_loop_end_static_emitter::validate_arguments(const std::vector &in, const std::vector &out) const { - const auto io_size = num_inputs + num_outputs; +void jit_loop_end_emitter::validate_arguments(const std::vector &in, const std::vector &out) const { +const auto io_size = num_inputs + num_outputs; OV_CPU_JIT_EMITTER_ASSERT(out.size() == 0, "Invalid number of out arguments: expected ", 0, " got ", out.size()); OV_CPU_JIT_EMITTER_ASSERT(in.size() == io_size + 1, "Invalid number of in arguments: expected ", io_size + 1, " got ", in.size()); + OV_CPU_JIT_EMITTER_ASSERT(is_incremented.size() == io_size, "Invalid is_incremented size: expected ", io_size, " got ", is_incremented.size()); OV_CPU_JIT_EMITTER_ASSERT(ptr_increments.size() == io_size, "Invalid ptr_increments size: expected ", io_size, " got ", ptr_increments.size()); OV_CPU_JIT_EMITTER_ASSERT(finalization_offsets.size() == io_size, "Invalid finalization_offsets size: expected: ", io_size, " got ", finalization_offsets.size()); OV_CPU_JIT_EMITTER_ASSERT(data_sizes.size() == io_size, "Invalid data_sizes size: expected: ", io_size, " got ", data_sizes.size()); + OV_CPU_JIT_EMITTER_ASSERT(loop_begin_label != nullptr, "has not inited begin label!"); } -void jit_loop_end_static_emitter::emit_code(const std::vector &in, const std::vector &out, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) const { +void jit_loop_end_emitter::emit_code(const std::vector &in, const std::vector &out, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) const { validate_arguments(in, out); emit_impl(in, out); } -void jit_loop_end_static_emitter::emit_impl(const std::vector& in, const std::vector& out) const { +void jit_loop_end_emitter::emit_impl(const std::vector& in, const std::vector& out) const { std::vector data_ptr_reg_idxs; data_ptr_reg_idxs.reserve(num_inputs + num_outputs); std::copy(in.begin(), in.end() - 1, std::back_inserter(data_ptr_reg_idxs)); @@ -201,68 +138,6 @@ void jit_loop_end_static_emitter::emit_impl(const std::vector& in, const } } -jit_loop_end_dynamic_emitter::jit_loop_end_dynamic_emitter(dnnl::impl::cpu::aarch64::jit_generator* h, dnnl::impl::cpu::aarch64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr) - : jit_loop_end_emitter(h, isa, expr), loop_end_label{new Xbyak_aarch64::Label()} { - const auto loop_end = ov::as_type_ptr(expr->get_node()); - OV_CPU_JIT_EMITTER_ASSERT(loop_end != nullptr, "Expected LoopEndDynamic expr"); - loop_id = loop_end->get_id(); - - const auto begin_expr = get_loop_begin_expr(expr); - const auto& loop_begin_emitter = std::dynamic_pointer_cast(begin_expr->get_emitter()); - OV_CPU_JIT_EMITTER_ASSERT(loop_begin_emitter, "LoopBeginDynamic expected jit_loop_begin_dynamic_emitter"); - loop_begin_emitter->set_loop_end_label(loop_end_label); -} - -void jit_loop_end_dynamic_emitter::validate_arguments(const std::vector &in, const std::vector &out) const { - OV_CPU_JIT_EMITTER_ASSERT(loop_end_label != nullptr && loop_begin_label != nullptr, "Has not inited labels!"); - // Note: there must be additional input argument for runtime parameters - const auto io_size = num_inputs + num_outputs; - OV_CPU_JIT_EMITTER_ASSERT(in.size() == io_size + 1, "Invalid number of in arguments: expected ", io_size + 1, " got ", in.size()); - OV_CPU_JIT_EMITTER_ASSERT(out.size() == 0, "Invalid number of out arguments: expected ", 0, " got ", out.size()); -} - -void jit_loop_end_dynamic_emitter::emit_code(const std::vector &in, const std::vector &out, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) const { - validate_arguments(in, out); - jit_emitter::emit_code(in, out); -} - -void jit_loop_end_dynamic_emitter::emit_impl(const std::vector& in, const std::vector& out) const { - XReg reg_runtime_params = XReg(Operand::X0); // defined by jit_kernel_emitter - XReg reg_work_amount = XReg(in.back()); - XReg reg_increments = XReg(aux_gpr_idxs[0]); - XReg reg_aux = XReg(aux_gpr_idxs[1]); - const auto id_offset = loop_id * sizeof(jit_snippets_call_args::loop_args_t); - - std::vector data_ptr_regs = transform_idxs_to_regs(std::vector(in.begin(), in.end() - 1)); - - // todo: Note that we can pre-save reg_loop_args_ptr in jit_loop_begin_dynamic_emitter and pass it here like work_amount_reg - // this would save us one dereferencing here and in finalization offsets - h->ldr(reg_increments, ptr(reg_runtime_params, static_cast(GET_OFF(loop_args)))); - h->ldr(reg_increments, ptr(reg_increments, static_cast(id_offset + GET_OFF_LOOP_ARGS(m_ptr_increments)))); - for (size_t idx = 0; idx < data_ptr_regs.size(); idx++) { - if (is_incremented[idx]) { - h->ldr(reg_aux, ptr(reg_increments, static_cast(idx * sizeof(int64_t)))); - h->add(data_ptr_regs[idx], data_ptr_regs[idx], reg_aux); - } - } - h->sub_imm(reg_work_amount, reg_work_amount, wa_increment, h->X_TMP_0); - h->cmp(reg_work_amount, wa_increment); - h->b(GE, *loop_begin_label); - - h->ldr(reg_increments, ptr(reg_runtime_params, static_cast(GET_OFF(loop_args)))); - h->ldr(reg_increments, ptr(reg_increments, static_cast(id_offset + GET_OFF_LOOP_ARGS(m_finalization_offsets)))); - for (size_t idx = 0; idx < data_ptr_regs.size(); idx++) { - if (is_incremented[idx]) { - h->ldr(reg_aux, ptr(reg_increments, static_cast(idx * sizeof(int64_t)))); - h->add(data_ptr_regs[idx], data_ptr_regs[idx], reg_aux); - } - } - - h->L(*loop_end_label); -} - /* ============================================================== */ } // namespace aarch64 diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_loop_emitters.hpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_loop_emitters.hpp index af75ac1eb41b02..6ec87835821df2 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_loop_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_loop_emitters.hpp @@ -21,49 +21,19 @@ class jit_loop_begin_emitter: public jit_emitter { size_t get_inputs_count() const override { return 0; } - std::shared_ptr get_begin_label() { return loop_begin_label; } - -protected: - static std::shared_ptr get_loop_end(const ov::snippets::lowered::ExpressionPtr& expr); - - std::shared_ptr loop_begin_label; - int64_t wa_increment = 0; -}; - -class jit_loop_begin_static_emitter: public jit_loop_begin_emitter { -public: - jit_loop_begin_static_emitter(dnnl::impl::cpu::aarch64::jit_generator* h, dnnl::impl::cpu::aarch64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr); - void emit_code(const std::vector &in_idxs, const std::vector &out_idxs, const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}) const override; -protected: - void validate_arguments(const std::vector &in, const std::vector &out) const override; - void emit_impl(const std::vector& in, const std::vector& out) const override; - bool evaluate_once = false; - size_t work_amount = 0; -}; - -class jit_loop_begin_dynamic_emitter: public jit_loop_begin_emitter { -public: - jit_loop_begin_dynamic_emitter(dnnl::impl::cpu::aarch64::jit_generator* h, dnnl::impl::cpu::aarch64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr); - - void emit_code(const std::vector &in_idxs, const std::vector &out_idxs, - const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}) const override; - - void set_loop_end_label(const std::shared_ptr& label) { loop_end_label = label; } + std::shared_ptr get_begin_label() { return loop_begin_label; } protected: void validate_arguments(const std::vector &in, const std::vector &out) const override; void emit_impl(const std::vector& in, const std::vector& out) const override; - // For Loop arguments - size_t get_aux_gprs_count() const override { return 1; } - - std::shared_ptr loop_end_label; - size_t loop_id; + std::shared_ptr loop_begin_label; + size_t work_amount = 0; + int64_t wa_increment = 0; + bool evaluate_once = false; }; /* ============================================================== */ @@ -77,21 +47,6 @@ class jit_loop_end_emitter: public jit_emitter { size_t get_inputs_count() const override { return 0; } -protected: - static ov::snippets::lowered::ExpressionPtr get_loop_begin_expr(const ov::snippets::lowered::ExpressionPtr& expr); - - std::shared_ptr loop_begin_label; - size_t num_inputs = 0; - size_t num_outputs = 0; - int64_t wa_increment = 0; - std::vector is_incremented = {}; -}; - -class jit_loop_end_static_emitter: public jit_loop_end_emitter { -public: - jit_loop_end_static_emitter(dnnl::impl::cpu::aarch64::jit_generator* h, dnnl::impl::cpu::aarch64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr); - void emit_code(const std::vector &in_idxs, const std::vector &out_idxs, const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}) const override; @@ -99,7 +54,13 @@ class jit_loop_end_static_emitter: public jit_loop_end_emitter { void validate_arguments(const std::vector &in, const std::vector &out) const override; void emit_impl(const std::vector& in, const std::vector& out) const override; + static ov::snippets::lowered::ExpressionPtr get_loop_begin_expr(const ov::snippets::lowered::ExpressionPtr& expr); + + std::shared_ptr loop_begin_label; + size_t num_inputs = 0; + size_t num_outputs = 0; size_t work_amount = 0; + int64_t wa_increment = 0; std::vector is_incremented = {}; std::vector ptr_increments = {}; std::vector finalization_offsets = {}; @@ -107,25 +68,6 @@ class jit_loop_end_static_emitter: public jit_loop_end_emitter { bool evaluate_once = false; }; -class jit_loop_end_dynamic_emitter: public jit_loop_end_emitter { -public: - jit_loop_end_dynamic_emitter(dnnl::impl::cpu::aarch64::jit_generator* h, dnnl::impl::cpu::aarch64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr); - - void emit_code(const std::vector &in_idxs, const std::vector &out_idxs, - const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}) const override; - -protected: - void validate_arguments(const std::vector &in, const std::vector &out) const override; - void emit_impl(const std::vector& in, const std::vector& out) const override; - - // For Loop arguments - size_t get_aux_gprs_count() const override { return 2; } - - std::shared_ptr loop_end_label; - size_t loop_id; -}; - /* ============================================================== */ } // namespace aarch64 diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp index 323bae34806923..a1fde3bf28f3bf 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp @@ -239,10 +239,8 @@ intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t ho jitters[snippets::op::KernelStatic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_kernel_static_emitter); jitters[snippets::op::KernelDynamic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_kernel_dynamic_emitter); - jitters[snippets::op::LoopBeginStatic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_loop_begin_static_emitter); - jitters[snippets::op::LoopBeginDynamic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_loop_begin_dynamic_emitter); - jitters[snippets::op::LoopEndStatic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_loop_end_static_emitter); - jitters[snippets::op::LoopEndDynamic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_loop_end_dynamic_emitter); + jitters[snippets::op::LoopBegin::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_loop_begin_emitter); + jitters[snippets::op::LoopEnd::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_loop_end_emitter); // Note: jit_brgemm_emitter supports runtime recompilation, so its constructor takes additional arguments jitters[intel_cpu::BrgemmCPU::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_brgemm_emitter, kernel_executor_table, diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp index ee6225271f65e8..566e495e88dfac 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp @@ -5,6 +5,7 @@ #include "jit_loop_emitters.hpp" #include "emitters/snippets/jit_snippets_call_args.hpp" +#include "snippets/utils.hpp" using namespace Xbyak; using namespace dnnl::impl; @@ -13,89 +14,57 @@ using namespace dnnl::impl::cpu::x64; namespace ov { namespace intel_cpu { -inline static void transform_idxs_to_regs(const std::vector& idxs, std::vector& regs) { - regs.resize(idxs.size()); - std::transform(idxs.begin(), idxs.end(), regs.begin(), [](size_t idx){ return Xbyak::Reg64(static_cast(idx)); }); -} - /* ================== jit_loop_begin_emitter ====================== */ jit_loop_begin_emitter::jit_loop_begin_emitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, const ov::snippets::lowered::ExpressionPtr& expr) - : jit_emitter(h, isa), loop_begin_label{new Xbyak::Label()} { - in_out_type_ = emitter_in_out_map::gpr_to_gpr; -} - -std::shared_ptr jit_loop_begin_emitter::get_loop_end(const ov::snippets::lowered::ExpressionPtr& expr) { - OV_CPU_JIT_EMITTER_ASSERT(expr->get_output_port_connectors().size() == 1, "has invalid LoopBegin expression configuration"); - const auto& consumers = expr->get_output_port_connector(0)->get_consumers(); - OV_CPU_JIT_EMITTER_ASSERT(consumers.size() == 1, "has invalid LoopBegin expression configuration"); - const auto loop_end = ov::as_type_ptr(consumers.cbegin()->get_expr()->get_node()); - OV_CPU_JIT_EMITTER_ASSERT(loop_end != nullptr, "has invalid LoopBegin expression configuration"); - return loop_end; -} - -jit_loop_begin_static_emitter::jit_loop_begin_static_emitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr) - : jit_loop_begin_emitter(h, isa, expr) { - OV_CPU_JIT_EMITTER_ASSERT(ov::is_type(expr->get_node()), - "expects LoopBeginStatic expression"); - const auto loop_end = ov::as_type_ptr(get_loop_end(expr)); + : jit_emitter(h, isa), loop_begin_label{new Xbyak::Label()}, loop_end_label(nullptr) { + const auto loop_begin = ov::as_type_ptr(expr->get_node()); + OV_CPU_JIT_EMITTER_ASSERT(loop_begin, "expects LoopBegin expression"); + const auto loop_end = loop_begin->get_loop_end(); work_amount = loop_end->get_work_amount(); wa_increment = loop_end->get_increment(); evaluate_once = loop_end->get_evaluate_once(); + loop_id = loop_end->get_id(); + is_work_amount_dynamic = ov::snippets::utils::is_dynamic_value(work_amount); + in_out_type_ = emitter_in_out_map::gpr_to_gpr; } -void jit_loop_begin_static_emitter::validate_arguments(const std::vector &in, const std::vector &out) const { - OV_CPU_JIT_EMITTER_ASSERT(in.empty(), "Invalid inputs size: expected 0 got " + std::to_string(in.size())); - // Note: the only expected output is work amount register (communicated to jit_loop_end_emitter) - OV_CPU_JIT_EMITTER_ASSERT(out.size() == 1, "Invalid outputs size: expected 1 got " + std::to_string(out.size())); -} - -void jit_loop_begin_static_emitter::emit_impl(const std::vector& in, const std::vector& out) const { - Xbyak::Reg64 reg_work_amount = Xbyak::Reg64(static_cast(out.back())); - if (!evaluate_once) { - h->mov(reg_work_amount, work_amount); - } - h->L(*loop_begin_label); -} - -void jit_loop_begin_static_emitter::emit_code(const std::vector &in, const std::vector &out, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) const { - validate_arguments(in, out); - emit_impl(in, out); -} - -jit_loop_begin_dynamic_emitter::jit_loop_begin_dynamic_emitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr) - : jit_loop_begin_emitter(h, isa, expr), loop_end_label(nullptr) { - OV_CPU_JIT_EMITTER_ASSERT(ov::is_type(expr->get_node()), "expects LoopBeginDynamic expression"); - const auto loop_end = get_loop_end(expr); - wa_increment = loop_end->get_increment(); - loop_id = loop_end->get_id(); +size_t jit_loop_begin_emitter::aux_gprs_count() const { + // We should have aux GPR to store Loop arguments from `runtime_args` + // where we will take all needed information about the current loop: work amount + return is_work_amount_dynamic ? 1 : 0; } -void jit_loop_begin_dynamic_emitter::validate_arguments(const std::vector &in, const std::vector &out) const { - // Note: the only expected input is the reg_runtime_params_idx +void jit_loop_begin_emitter::validate_arguments(const std::vector &in, const std::vector &out) const { OV_CPU_JIT_EMITTER_ASSERT(in.empty(), "Invalid inputs size: expected 0 got " + std::to_string(in.size())); // Note: the only expected output is work amount register (communicated to jit_loop_end_emitter) OV_CPU_JIT_EMITTER_ASSERT(out.size() == 1, "Invalid outputs size: expected 1 got " + std::to_string(out.size())); - OV_CPU_JIT_EMITTER_ASSERT(loop_end_label != nullptr && loop_begin_label != nullptr, "has not inited labels!"); + OV_CPU_JIT_EMITTER_ASSERT(loop_begin_label != nullptr && loop_end_label != nullptr, "has not inited labels!"); + OV_CPU_JIT_EMITTER_ASSERT(implication(is_work_amount_dynamic, !evaluate_once), "with dynamic work_amount cannot evaluate once!"); } -void jit_loop_begin_dynamic_emitter::emit_code(const std::vector &in, const std::vector &out, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) const { +void jit_loop_begin_emitter::emit_code(const std::vector &in, const std::vector &out, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) const { validate_arguments(in, out); - jit_emitter::emit_code(in, out); -} - -void jit_loop_begin_dynamic_emitter::emit_impl(const std::vector& in, const std::vector& out) const { - Xbyak::Reg64 reg_runtime_params = abi_param1; // defined by jit_kernel_emitter - Xbyak::Reg64 reg_work_amount = Xbyak::Reg64(static_cast(out.back())); - Xbyak::Reg64 reg_loop_args_ptr = Xbyak::Reg64(static_cast(aux_gpr_idxs[0])); - const auto id_offset = loop_id * sizeof(jit_snippets_call_args::loop_args_t); - h->mov(reg_loop_args_ptr, h->ptr[reg_runtime_params + GET_OFF(loop_args)]); - h->mov(reg_work_amount, h->ptr[reg_loop_args_ptr + id_offset + GET_OFF_LOOP_ARGS(m_work_amount)]); + jit_emitter::emit_code(in, out, pool_vec_idxs, pool_gpr_idxs); +} + +void jit_loop_begin_emitter::emit_impl(const std::vector& in, const std::vector& out) const { + // If the loop evaulate once, we can skip loop begin code emission + if (evaluate_once) + return; + + Reg64 reg_work_amount = Reg64(static_cast(out.back())); + if (is_work_amount_dynamic) { + Reg64 reg_runtime_params = abi_param1; // defined by jit_kernel_emitter + Reg64 reg_loop_args_ptr = Reg64(static_cast(aux_gpr_idxs[0])); + const auto id_offset = loop_id * sizeof(jit_snippets_call_args::loop_args_t); + h->mov(reg_loop_args_ptr, h->ptr[reg_runtime_params + GET_OFF(loop_args)]); + h->mov(reg_work_amount, h->ptr[reg_loop_args_ptr + id_offset + GET_OFF_LOOP_ARGS(m_work_amount)]); + } else { + h->mov(reg_work_amount, work_amount); + } // if wa < increment, skip the loop h->cmp(reg_work_amount, wa_increment); @@ -110,19 +79,31 @@ void jit_loop_begin_dynamic_emitter::emit_impl(const std::vector& in, co jit_loop_end_emitter::jit_loop_end_emitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, const ov::snippets::lowered::ExpressionPtr& expr) - : jit_emitter(h, isa), loop_begin_label{nullptr} { + : jit_emitter(h, isa), loop_begin_label{nullptr}, loop_end_label{new Xbyak::Label()} { in_out_type_ = emitter_in_out_map::gpr_to_gpr; const auto loop_end = ov::as_type_ptr(expr->get_node()); OV_CPU_JIT_EMITTER_ASSERT(loop_end != nullptr, "expected LoopEnd expr"); - // Note that 1 edge connects LoopBegin and LoopEnd num_inputs = loop_end->get_input_num(); num_outputs = loop_end->get_output_num(); + work_amount = loop_end->get_work_amount(); wa_increment = loop_end->get_increment(); is_incremented = loop_end->get_is_incremented(); + ptr_increments = loop_end->get_ptr_increments(); + finalization_offsets = loop_end->get_finalization_offsets(); + data_sizes = loop_end->get_element_type_sizes(); + evaluate_once = loop_end->get_evaluate_once(); + loop_id = loop_end->get_id(); + + are_ptr_increments_dynamic = + std::any_of(ptr_increments.cbegin(), ptr_increments.cend(), ov::snippets::utils::is_dynamic_value); + are_final_offsets_dynamic = + std::any_of(finalization_offsets.cbegin(), finalization_offsets.cend(), ov::snippets::utils::is_dynamic_value); + are_ptr_shifts_dynamic = are_ptr_increments_dynamic || are_final_offsets_dynamic; const auto begin_expr = get_loop_begin_expr(expr); const auto& loop_begin_emitter = std::dynamic_pointer_cast(begin_expr->get_emitter()); OV_CPU_JIT_EMITTER_ASSERT(loop_begin_emitter, "LoopBegin expected jit_loop_begin_emitter"); + loop_begin_emitter->set_loop_end_label(loop_end_label); loop_begin_label = loop_begin_emitter->get_begin_label(); } @@ -133,116 +114,69 @@ ov::snippets::lowered::ExpressionPtr jit_loop_end_emitter::get_loop_begin_expr(c return begin_expr; } -jit_loop_end_static_emitter::jit_loop_end_static_emitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr) - : jit_loop_end_emitter(h, isa, expr) { - const auto loop_end = ov::as_type_ptr(expr->get_node()); - OV_CPU_JIT_EMITTER_ASSERT(loop_end != nullptr, "expected LoopEndStatic expr"); - work_amount = static_cast(loop_end->get_work_amount()); - is_incremented = loop_end->get_is_incremented(); - ptr_increments = loop_end->get_ptr_increments(); - finalization_offsets = loop_end->get_finalization_offsets(); - data_sizes = loop_end->get_element_type_sizes(); - evaluate_once = loop_end->get_evaluate_once(); -} - -void jit_loop_end_static_emitter::validate_arguments(const std::vector &in, const std::vector &out) const { +void jit_loop_end_emitter::validate_arguments(const std::vector &in, const std::vector &out) const { const auto io_size = num_inputs + num_outputs; OV_CPU_JIT_EMITTER_ASSERT(out.size() == 0, "Invalid number of out arguments: expected ", 0, " got ", out.size()); OV_CPU_JIT_EMITTER_ASSERT(in.size() == io_size + 1, "Invalid number of in arguments: expected ", io_size + 1, " got ", in.size()); + OV_CPU_JIT_EMITTER_ASSERT(is_incremented.size() == io_size, "Invalid is_incremented size: expected ", io_size, " got ", is_incremented.size()); OV_CPU_JIT_EMITTER_ASSERT(ptr_increments.size() == io_size, "Invalid ptr_increments size: expected ", io_size, " got ", ptr_increments.size()); OV_CPU_JIT_EMITTER_ASSERT(finalization_offsets.size() == io_size, "Invalid finalization_offsets size: expected: ", io_size, " got ", finalization_offsets.size()); OV_CPU_JIT_EMITTER_ASSERT(data_sizes.size() == io_size, "Invalid data_sizes size: expected: ", io_size, " got ", data_sizes.size()); + OV_CPU_JIT_EMITTER_ASSERT(loop_end_label != nullptr && loop_begin_label != nullptr, "has not inited labels!"); + OV_CPU_JIT_EMITTER_ASSERT(implication(are_ptr_shifts_dynamic, !evaluate_once), "with dynamic data pointer shifts cannot evaluate once!"); } -void jit_loop_end_static_emitter::emit_code(const std::vector &in, const std::vector &out, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) const { +void jit_loop_end_emitter::emit_code(const std::vector &in, const std::vector &out, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) const { validate_arguments(in, out); - emit_impl(in, out); + jit_emitter::emit_code(in, out, pool_vec_idxs, pool_gpr_idxs); +} + +size_t jit_loop_end_emitter::aux_gprs_count() const { + // We should have aux GPR to store Loop arguments from `runtime_args` + // where we will take all needed information about the current loop: data pointer shifts + return are_ptr_shifts_dynamic ? 1 : 0; } -void jit_loop_end_static_emitter::emit_impl(const std::vector& in, const std::vector& out) const { +void jit_loop_end_emitter::emit_impl(const std::vector& in, const std::vector& out) const { std::vector data_ptr_reg_idxs; // the last input is actually a work_amount reg - data_ptr_reg_idxs.reserve(num_inputs - 1); + data_ptr_reg_idxs.reserve(num_inputs + num_outputs); std::copy(in.begin(), in.end() - 1, std::back_inserter(data_ptr_reg_idxs)); - Reg64 reg_work_amount = Reg64(in.back()); - if (!evaluate_once) { + const auto id_offset = loop_id * sizeof(jit_snippets_call_args::loop_args_t); + Reg64 reg_increments = are_ptr_shifts_dynamic ? Reg64(static_cast(aux_gpr_idxs[0])) : Reg64(); + + auto apply_increments = [&](bool use_runtime_args, size_t field_offset, const std::vector& increments, size_t scale) { + if (use_runtime_args) { + Reg64 reg_runtime_params = abi_param1; /* defined by jit_kernel_emitter */ + h->mov(reg_increments, h->ptr[reg_runtime_params + GET_OFF(loop_args)]); + h->mov(reg_increments, h->ptr[reg_increments + id_offset + field_offset]); + } for (size_t idx = 0; idx < data_ptr_reg_idxs.size(); idx++) { - if (!is_incremented[idx] || ptr_increments[idx] == 0) - continue; - Reg64 data_reg = Reg64(static_cast(data_ptr_reg_idxs[idx])); - h->add(data_reg, ptr_increments[idx] * wa_increment * data_sizes[idx]); + const auto& increment = increments[idx]; + if (is_incremented[idx] && increment != 0) { + if (ov::snippets::utils::is_dynamic_value(increment)) { + OV_CPU_JIT_EMITTER_ASSERT(use_runtime_args, "Loop argument structure cannot be pushed to aux GPR"); + h->add(Reg64(static_cast(data_ptr_reg_idxs[idx])), h->ptr[reg_increments + idx * sizeof(int64_t)]); + } else { + h->add(Reg64(static_cast(data_ptr_reg_idxs[idx])), increment * scale * data_sizes[idx]); + } + } } - h->sub(reg_work_amount, wa_increment); - h->cmp(reg_work_amount, wa_increment); - h->jge(*loop_begin_label); - } - - for (size_t idx = 0; idx < data_ptr_reg_idxs.size(); idx++) { - if (!is_incremented[idx] || finalization_offsets[idx] == 0) - continue; - Reg64 data_reg = Reg64(static_cast(data_ptr_reg_idxs[idx])); - h->add(data_reg, finalization_offsets[idx] * data_sizes[idx]); - } -} - -jit_loop_end_dynamic_emitter::jit_loop_end_dynamic_emitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr) - : jit_loop_end_emitter(h, isa, expr), loop_end_label{new Xbyak::Label()} { - const auto loop_end = ov::as_type_ptr(expr->get_node()); - OV_CPU_JIT_EMITTER_ASSERT(loop_end != nullptr, "expected LoopEndDynamic expr"); - loop_id = loop_end->get_id(); + }; - const auto begin_expr = get_loop_begin_expr(expr); - const auto& loop_begin_emitter = std::dynamic_pointer_cast(begin_expr->get_emitter()); - OV_CPU_JIT_EMITTER_ASSERT(loop_begin_emitter, "LoopBeginDynamic expected jit_loop_begin_dynamic_emitter"); - loop_begin_emitter->set_loop_end_label(loop_end_label); -} - -void jit_loop_end_dynamic_emitter::validate_arguments(const std::vector &in, const std::vector &out) const { - OV_CPU_JIT_EMITTER_ASSERT(loop_end_label != nullptr && loop_begin_label != nullptr, "has not inited labels!"); - // Note: there must be additional input argument for runtime parameters - const auto io_size = num_inputs + num_outputs; - OV_CPU_JIT_EMITTER_ASSERT(in.size() == io_size + 1, "Invalid number of in arguments: expected ", io_size + 1, " got ", in.size()); - OV_CPU_JIT_EMITTER_ASSERT(out.size() == 0, "Invalid number of out arguments: expected ", 0, " got ", out.size()); -} - -void jit_loop_end_dynamic_emitter::emit_code(const std::vector &in, const std::vector &out, - const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) const { - validate_arguments(in, out); - jit_emitter::emit_code(in, out); -} - -void jit_loop_end_dynamic_emitter::emit_impl(const std::vector& in, const std::vector& out) const { - Xbyak::Reg64 reg_runtime_params = abi_param1; // defined by jit_kernel_emitter - Xbyak::Reg64 reg_work_amount = Xbyak::Reg64(static_cast(in[in.size() - 1])); - Xbyak::Reg64 reg_increments = Xbyak::Reg64(static_cast(aux_gpr_idxs[0])); - const auto id_offset = loop_id * sizeof(jit_snippets_call_args::loop_args_t); - - std::vector data_ptr_regs; - transform_idxs_to_regs(std::vector(in.begin(), in.end() - 1), data_ptr_regs); + if (!evaluate_once) { + apply_increments(are_ptr_increments_dynamic, GET_OFF_LOOP_ARGS(m_ptr_increments), ptr_increments, wa_increment); - // todo: Note that we can pre-save reg_loop_args_ptr in jit_loop_begin_dynamic_emitter and pass it here like work_amount_reg - // this would save us one dereferencing here and in finalization offsets - h->mov(reg_increments, h->ptr[reg_runtime_params + GET_OFF(loop_args)]); - h->mov(reg_increments, h->ptr[reg_increments + id_offset + GET_OFF_LOOP_ARGS(m_ptr_increments)]); - for (size_t idx = 0; idx < data_ptr_regs.size(); idx++) { - if (is_incremented[idx]) - h->add(data_ptr_regs[idx], h->ptr[reg_increments + idx * sizeof(int64_t)]); + Reg64 reg_work_amount = Reg64(in.back()); + h->sub(reg_work_amount, wa_increment); + h->cmp(reg_work_amount, wa_increment); + h->jge(*loop_begin_label, Xbyak::CodeGenerator::T_NEAR); } - h->sub(reg_work_amount, wa_increment); - h->cmp(reg_work_amount, wa_increment); - h->jge(*loop_begin_label, Xbyak::CodeGenerator::T_NEAR); - h->mov(reg_increments, h->ptr[reg_runtime_params + GET_OFF(loop_args)]); - h->mov(reg_increments, h->ptr[reg_increments + id_offset + GET_OFF_LOOP_ARGS(m_finalization_offsets)]); - for (size_t idx = 0; idx < data_ptr_regs.size(); idx++) { - if (is_incremented[idx]) - h->add(data_ptr_regs[idx], h->ptr[reg_increments + idx * sizeof(int64_t)]); - } + apply_increments(are_final_offsets_dynamic, GET_OFF_LOOP_ARGS(m_finalization_offsets), finalization_offsets, 1); h->L(*loop_end_label); } diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.hpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.hpp index 1f1013dfca77c0..0af5ac4662140a 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.hpp @@ -7,6 +7,7 @@ #include "emitters/plugin/x64/jit_emitter.hpp" #include "snippets/op/loop.hpp" +#include "snippets/utils.hpp" namespace ov { namespace intel_cpu { @@ -20,51 +21,28 @@ class jit_loop_begin_emitter: public jit_emitter { size_t get_inputs_num() const override { return 0; } - std::shared_ptr get_begin_label() { return loop_begin_label; } - -protected: - static std::shared_ptr get_loop_end(const ov::snippets::lowered::ExpressionPtr& expr); - - std::shared_ptr loop_begin_label; - int64_t wa_increment = 0; -}; - -class jit_loop_begin_static_emitter: public jit_loop_begin_emitter { -public: - jit_loop_begin_static_emitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr); - - void emit_code(const std::vector &in_idxs, const std::vector &out_idxs, - const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}) const override; -protected: - void validate_arguments(const std::vector &in, const std::vector &out) const override; - void emit_impl(const std::vector& in, const std::vector& out) const override; - - bool evaluate_once = false; - size_t work_amount = 0; -}; - -class jit_loop_begin_dynamic_emitter: public jit_loop_begin_emitter { -public: - jit_loop_begin_dynamic_emitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr); - void emit_code(const std::vector &in_idxs, const std::vector &out_idxs, - const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}) const override; + const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}) const override; void set_loop_end_label(const std::shared_ptr& label) { loop_end_label = label; } + std::shared_ptr get_begin_label() { return loop_begin_label; } protected: void validate_arguments(const std::vector &in, const std::vector &out) const override; void emit_impl(const std::vector& in, const std::vector& out) const override; - // For Loop arguments - size_t aux_gprs_count() const override { return 1; } + size_t aux_gprs_count() const override; - std::shared_ptr loop_end_label; - size_t loop_id; + std::shared_ptr loop_begin_label = nullptr; + std::shared_ptr loop_end_label = nullptr; + size_t work_amount = 0; + size_t wa_increment = 0; + size_t loop_id = 0; + bool evaluate_once = false; + bool is_work_amount_dynamic = false; }; + /* ============================================================== */ /* ================== jit_loop_end_emitter ====================== */ @@ -76,21 +54,6 @@ class jit_loop_end_emitter: public jit_emitter { size_t get_inputs_num() const override { return 0; } -protected: - static ov::snippets::lowered::ExpressionPtr get_loop_begin_expr(const ov::snippets::lowered::ExpressionPtr& expr); - - std::shared_ptr loop_begin_label; - size_t num_inputs = 0; - size_t num_outputs = 0; - int64_t wa_increment = 0; - std::vector is_incremented = {}; -}; - -class jit_loop_end_static_emitter: public jit_loop_end_emitter { -public: - jit_loop_end_static_emitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr); - void emit_code(const std::vector &in_idxs, const std::vector &out_idxs, const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}) const override; @@ -98,31 +61,25 @@ class jit_loop_end_static_emitter: public jit_loop_end_emitter { void validate_arguments(const std::vector &in, const std::vector &out) const override; void emit_impl(const std::vector& in, const std::vector& out) const override; + size_t aux_gprs_count() const override; + + static ov::snippets::lowered::ExpressionPtr get_loop_begin_expr(const ov::snippets::lowered::ExpressionPtr& expr); + + std::shared_ptr loop_begin_label = nullptr; + std::shared_ptr loop_end_label = nullptr; + size_t num_inputs = 0; + size_t num_outputs = 0; size_t work_amount = 0; + size_t wa_increment = 0; std::vector is_incremented = {}; std::vector ptr_increments = {}; std::vector finalization_offsets = {}; std::vector data_sizes = {}; + size_t loop_id = 0; bool evaluate_once = false; -}; - -class jit_loop_end_dynamic_emitter: public jit_loop_end_emitter { -public: - jit_loop_end_dynamic_emitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, - const ov::snippets::lowered::ExpressionPtr& expr); - - void emit_code(const std::vector &in_idxs, const std::vector &out_idxs, - const std::vector &pool_vec_idxs = {}, const std::vector &pool_gpr_idxs = {}) const override; - -protected: - void validate_arguments(const std::vector &in, const std::vector &out) const override; - void emit_impl(const std::vector& in, const std::vector& out) const override; - - // For Loop arguments - size_t aux_gprs_count() const override { return 1; } - - std::shared_ptr loop_end_label; - size_t loop_id; + bool are_ptr_increments_dynamic = false; + bool are_final_offsets_dynamic = false; + bool are_ptr_shifts_dynamic = false; }; /* ============================================================== */ diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp index 3cf2c08cd4b067..9e496ba5cd8ae5 100644 --- a/src/plugins/intel_cpu/src/extension.cpp +++ b/src/plugins/intel_cpu/src/extension.cpp @@ -159,10 +159,8 @@ class TypeRelaxedExtension : public ov::OpExtension> { OP_EXTENSION(ov::snippets::op::IntermediateMemoryBuffer) \ OP_EXTENSION(ov::snippets::op::Load) \ OP_EXTENSION(ov::snippets::op::LoadReshape) \ - OP_EXTENSION(ov::snippets::op::LoopBeginStatic) \ - OP_EXTENSION(ov::snippets::op::LoopBeginDynamic) \ - OP_EXTENSION(ov::snippets::op::LoopEndStatic) \ - OP_EXTENSION(ov::snippets::op::LoopEndDynamic) \ + OP_EXTENSION(ov::snippets::op::LoopBegin) \ + OP_EXTENSION(ov::snippets::op::LoopEnd) \ OP_EXTENSION(ov::snippets::op::NewMemoryBuffer) \ OP_EXTENSION(ov::snippets::op::Nop) \ OP_EXTENSION(ov::snippets::op::PowerStatic) \ diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index b3f23d2c6cc824..f1764dc174d0cc 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -1359,7 +1359,11 @@ void Graph::InferDynamic(SyncInferRequest* request) { if (request) request->throw_if_canceled(); - ExecuteNode(node, stream); + try { + ExecuteNode(node, stream); + } catch (const std::exception& exp) { + OPENVINO_THROW(node, exp.what()); + } } } } diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index bd2f52adf04cc7..1305895ed37330 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -548,12 +548,16 @@ void Node::updateShapes() { getTypeStr(), " with name: ", getName()); - if (needShapeInfer()) { - auto result = shapeInfer(); - if (ShapeInferStatus::success == result.status) { - redefineOutputMemory(result.dims); + try { + if (needShapeInfer()) { + auto result = shapeInfer(); + if (ShapeInferStatus::success == result.status) { + redefineOutputMemory(result.dims); + } + } + } catch (const std::exception& exp) { + THROW_CPU_NODE_ERR(exp.what()); } - } } void Node::updateDynamicParams() { @@ -562,18 +566,18 @@ void Node::updateDynamicParams() { getTypeStr(), " with name: ", getName()); - if (isExecutable()) { - if (needPrepareParams()) { - OPENVINO_ASSERT(inputShapesDefined(), - "Can't prepare params for ", - getTypeStr(), - " node with name: ", - getName(), - " since the input shapes are not defined."); - DEBUG_LOG(" prepareParams() on #", getExecIndex(), " ", getTypeStr(), " ", algToString(getAlgorithm()), - " ", getName(), " ", getOriginalLayers()); - prepareParams(); + try { + if (isExecutable()) { + if (needPrepareParams()) { + OPENVINO_ASSERT(inputShapesDefined(), + "Input shapes are not defined."); + DEBUG_LOG(" prepareParams() on #", getExecIndex(), " ", getTypeStr(), " ", algToString(getAlgorithm()), + " ", getName(), " ", getOriginalLayers()); + prepareParams(); + } } + } catch (const std::exception& e) { + THROW_CPU_NODE_ERR(e.what()); } } @@ -1597,34 +1601,29 @@ std::vector Node::shapeInferGeneric(const std::vector& shapes } return std::move(result.dims); - } catch (const std::runtime_error& exp) { + } catch (const std::exception& exp) { OPENVINO_THROW("Shape inference of ", getTypeStr(), " node with name ", getName(), " failed: ", exp.what()); } } IShapeInfer::Result Node::shapeInfer() const { - try { - std::vector> input_shapes; - auto input_value_port_mask = shapeInference->get_port_mask(); - - input_shapes.reserve(inputShapes.size()); - for (size_t port = 0; port < inputShapes.size(); ++port) - input_shapes.emplace_back(std::ref(getParentEdgeAt(port)->getMemory().getStaticDims())); - - std::unordered_map input_values; - if (input_value_port_mask) { - for (size_t port = 0; port < inputShapes.size(); ++port) { - if (input_value_port_mask & (1 << port)) { - input_values[port] = getSrcMemoryAtPort(port); - } + std::vector> input_shapes; + auto input_value_port_mask = shapeInference->get_port_mask(); + + input_shapes.reserve(inputShapes.size()); + for (size_t port = 0; port < inputShapes.size(); ++port) + input_shapes.emplace_back(std::ref(getParentEdgeAt(port)->getMemory().getStaticDims())); + + std::unordered_map input_values; + if (input_value_port_mask) { + for (size_t port = 0; port < inputShapes.size(); ++port) { + if (input_value_port_mask & (1 << port)) { + input_values[port] = getSrcMemoryAtPort(port); } } - - return shapeInference->infer(input_shapes, input_values); - } - catch (const std::runtime_error& exp) { - OPENVINO_THROW("Shape inference of ", getTypeStr() , " node with name ", getName(), " failed: ", exp.what()); } + + return shapeInference->infer(input_shapes, input_values); } void Node::updateLastInputDims() { @@ -1942,5 +1941,16 @@ void Node::resolveInPlaceDirection() { } } +#ifndef CPU_DEBUG_CAPS +std::ostream& operator<<(std::ostream& out, const Node& node) { + return out << "Node " << node.getName() << + " of type " << node.getTypeStr() << "\n"; +} + +std::ostream& operator<<(std::ostream& out, const Node* node) { + return operator<<(out, (*node)); +} +#endif + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index f92ec31a0b1833..d442c0280ab03c 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -833,6 +833,12 @@ class Node { CPU_DEBUG_CAP_ENABLE(friend class Verbose); }; +#ifndef CPU_DEBUG_CAPS +std::ostream& operator<<(std::ostream&, const Node&); + +std::ostream& operator<<(std::ostream&, const Node*); +#endif + template constexpr uint64_t PortMask(T... rest) { return util::bit::mask(rest...); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.cpp index 42e69764587eb9..b2e740fbad7fe0 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.cpp @@ -125,23 +125,11 @@ CausalMaskPreprocess::CausalMaskPreprocess() { makePattern({ShapeOf_49034, {1}, 0}, {{"batch_dims", 0}}); // tensor_array auto ScatterUpdate_93502 = makePattern({{0, 0, 0, 0}, {3}, Gather_41642, {0}}); // tensor_array - auto SliceAssign_201_Slice = makePattern( - {SliceAssign_201_Reshape, {0, 0, 0, 0}, ScatterUpdate_93502, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); // tensor_array + auto SliceAssign_201_Slice = GenSlice2(SliceAssign_201_Reshape, {0, 0, 0, 0}, ScatterUpdate_93502, {1, 1, 1, 1}, 3); // tensor_array auto SliceAssign_201_Reshape_1 = makePattern({SliceAssign_201_Slice, {-1, 1}}, {{"special_zero", false}}); // tensor_array - auto causal_mask_boolean_1 = - makePattern({mul_Multiply_1, {0, 0, 0, 0}, ScatterUpdate_93502, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); // tensor_array + auto causal_mask_boolean_1 = GenSlice2(mul_Multiply_1, {0, 0, 0, 0}, ScatterUpdate_93502, {1, 1, 1, 1}, 3); // tensor_array auto Constant_107278 = makeConst(ov::element::f32, ov::Shape({ 1, @@ -187,13 +175,7 @@ CausalMaskPreprocess::CausalMaskPreprocess() { {{"special_zero", true}}); // tensor_array auto ScatterUpdate_93554 = makePattern({{0, 0, 0, 0}, {3}, kvLen, {0}}); // tensor_array - auto slice_Slice_14 = makePattern( - {SliceAssign_201_Reshape_3, {0, 0, 0, 0}, ScatterUpdate_93554, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); // tensor_array + auto slice_Slice_14 = GenSlice2(SliceAssign_201_Reshape_3, {0, 0, 0, 0}, ScatterUpdate_93554, {1, 1, 1, 1}, 3); // tensor_array auto index_Gather = makePattern({slice_Slice_14, cache_positions, 2}, {{"batch_dims", 0}}, nullptr); // tensor_array diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index c900bd5a85b3ce..906c5b012a8911 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -521,24 +521,26 @@ void Transformations::PreLpt(const std::vector& defaultPrecis // 2. GroupNormalizationDecomposition produce MVN, and MVN have a conditional pass MVN6Decomposition. If call MVN6Decomposition again after // snippets pipeline as well, where MVN is decomposed to simple ops, these simple ops will not tokenized into subgraph again. // CVS-134277 to fully enable GN as snippets to disable this GroupNormalizationDecomposition entirly. - if (node->is_dynamic() || !one_of(inferencePrecision, element::f32, element::undefined)) - return false; - const auto group_norm = ov::as_type_ptr(node); - if (!group_norm || !implication(inferencePrecision == element::undefined, group_norm->get_element_type() == element::f32)) - return false; - const auto num_groups = static_cast(group_norm->get_num_groups()); - const auto shape = group_norm->get_input_partial_shape(0).to_shape(); - size_t snippets_work_amount = shape[0] * num_groups; - size_t concurrency = parallel_get_max_threads(); - if (concurrency > snippets_work_amount) - return false; - size_t spatial_dim = 1; - for (size_t i = 2; i < shape.size(); ++i) - spatial_dim = spatial_dim * shape[i]; - size_t snippets_tensor_size = spatial_dim * shape[1] / num_groups * node->get_element_type().size(); - size_t cache_size_l1 = dnnl::utils::get_cache_size(1, true); - if (snippets_tensor_size > cache_size_l1) { + if (node->is_dynamic() || !one_of(inferencePrecision, element::f32, element::undefined) || snippetsMode == Config::SnippetsMode::Disable) return false; + if (snippetsMode != Config::SnippetsMode::IgnoreCallback) { + const auto group_norm = ov::as_type_ptr(node); + if (!group_norm || !implication(inferencePrecision == element::undefined, group_norm->get_element_type() == element::f32)) + return false; + const auto num_groups = static_cast(group_norm->get_num_groups()); + const auto shape = group_norm->get_input_partial_shape(0).to_shape(); + size_t snippets_work_amount = shape[0] * num_groups; + size_t concurrency = parallel_get_max_threads(); + if (concurrency > snippets_work_amount) + return false; + size_t spatial_dim = 1; + for (size_t i = 2; i < shape.size(); ++i) + spatial_dim = spatial_dim * shape[i]; + size_t snippets_tensor_size = spatial_dim * shape[1] / num_groups * node->get_element_type().size(); + size_t cache_size_l1 = dnnl::utils::get_cache_size(1, true); + if (snippets_tensor_size > cache_size_l1) { + return false; + } } return true; @@ -782,7 +784,6 @@ void Transformations::PostLpt() { // Execute before snippets. Otherwise FQ will be converted to Subgraph CPU_REGISTER_PASS_X64(postLPTPassManager, ConvertFqRnnToQuantizedRnn); - CPU_REGISTER_PASS_X64(postLPTPassManager, ov::pass::EliminateStridedSlice); CPU_REGISTER_PASS_X64(postLPTPassManager, ov::pass::RoPEFusion); CPU_REGISTER_PASS_X64(postLPTPassManager, CausalMaskPreprocessFusion); diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/group_convolution.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/group_convolution.cpp index c3d1f77d1f90e1..47d7d3072b7337 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/group_convolution.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/group_convolution.cpp @@ -185,6 +185,12 @@ class GroupConvolutionLayerCPUTest : public testing::WithParamInterface kernel, stride, dilation; std::vector padBegin, padEnd; diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/rotary_pos_emb.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/rotary_pos_emb.cpp index a7efec643ab494..a505a010a20910 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/rotary_pos_emb.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/rotary_pos_emb.cpp @@ -325,8 +325,14 @@ TEST_F(RoPECPUTestChatGLM, smoke_CompareWithRefs) { CheckNumberOfNodesWithType(compiledModel, "RoPE", 1); } -class RoPECPUTestQwen7b : public SubgraphBaseTest { +class RoPECPUTestQwen7b : public SubgraphBaseTest, public testing::WithParamInterface { public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + const bool specialReshape = obj.param; + std::ostringstream result; + result << "specialReshape=" << specialReshape << std::endl; + return result.str(); + } void generate_inputs(const std::vector& targetInputStaticShapes) override { const auto& funcInputs = function->inputs(); @@ -346,7 +352,7 @@ class RoPECPUTestQwen7b : public SubgraphBaseTest { } protected: - std::shared_ptr buildROPE_QWen7b() { + std::shared_ptr buildROPE_QWen7b(bool specialReshape) { auto input = std::make_shared(ov::element::f32, PartialShape{-1, -1, 4096 + 4096 + 4096}); auto cos_cache = std::make_shared(ov::element::f32, PartialShape{1, -1, 1, 128}); @@ -401,8 +407,13 @@ class RoPECPUTestQwen7b : public SubgraphBaseTest { makeOP({floor_divide_Floor, {-1}}, {{"special_zero", false}}); auto ListConstruct_493_Concat = makeOP({Gather_239390, {2}, ListConstruct_493_Reshape_3}, {{"axis", 0}}); - auto reshape_Reshape = - makeOP({slice_Slice_470, ListConstruct_493_Concat}, {{"special_zero", false}}); + std::shared_ptr reshape_Reshape = nullptr; + if (specialReshape) { + reshape_Reshape = makeOP({slice_Slice_470, {0, 0, 32, 2, 64}}, {{"special_zero", true}}); + } else { + reshape_Reshape = + makeOP({slice_Slice_470, ListConstruct_493_Concat}, {{"special_zero", false}}); + } auto ListUnpack_496_Split = makeOP({reshape_Reshape, -2}, {{"num_splits", 2}}); auto ListUnpack_496_Squeeze_0 = makeOP({ListUnpack_496_Split->output(1), -2}); auto Constant_296840_compressed = makeConst(element::f16, @@ -444,19 +455,25 @@ class RoPECPUTestQwen7b : public SubgraphBaseTest { } void SetUp() override { targetDevice = ov::test::utils::DEVICE_CPU; + const bool specialReshape = this->GetParam(); const int batch = 2; const int seq_length = 7; InputShape inpShape = {{batch, -1, 4096 + 4096 + 4096}, {{batch, seq_length, 4096 + 4096 + 4096}}}; init_input_shapes({inpShape}); - function = buildROPE_QWen7b(); + function = buildROPE_QWen7b(specialReshape); } }; -TEST_F(RoPECPUTestQwen7b, smoke_CompareWithRefs) { +TEST_P(RoPECPUTestQwen7b, smoke_CompareWithRefs) { run(); CheckNumberOfNodesWithType(compiledModel, "RoPE", 1); } +INSTANTIATE_TEST_SUITE_P(smoke_RoPECPUTestQwen7b, + RoPECPUTestQwen7b, + ::testing::Values(true, false), + RoPECPUTestQwen7b::getTestCaseName); + class RoPECPUTestGPTJ : public SubgraphBaseTest, public testing::WithParamInterface { public: static std::string getTestCaseName(const testing::TestParamInfo& obj) { diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/group_normalization.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/group_normalization.cpp new file mode 100644 index 00000000000000..cbba412d66cb6a --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/group_normalization.cpp @@ -0,0 +1,47 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/group_normalization.hpp" +#include "common_test_utils/test_constants.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +// snippets ignore_callback is set in setup, so these tests will always run as snippets +const std::vector inputShape = { + {3, 8}, + {3, 8, 1}, + {3, 8, 7}, + {3, 8, 16}, + {3, 8, 21}, + {1, 4, 8, 8}, + {1, 8, 1, 22}, + {1, 16, 1, 33}, + {1, 4, 1, 1, 34}, + {1, 8, 1, 8, 2, 2}, + {1, 8, 1, 8, 2, 2, 2} +}; + +const std::vector numGroups = { + 2, 4, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_GroupNormalization, GroupNormalization, + ::testing::Combine( + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShape)), + ::testing::ValuesIn(numGroups), // num_group + ::testing::Values(0.0001), // eps + ::testing::Values(1), // expected node number + ::testing::Values(1), // expected subgraph number + ::testing::Values(ov::test::utils::DEVICE_CPU)), + GroupNormalization::getTestCaseName); + +} // namespace + +} // namespace snippets +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index 373e65b660c0ba..b0cd612cd3a378 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit 373e65b660c0ba274631cf30c422f10606de1618 +Subproject commit b0cd612cd3a378fb2dd73a84efddfca1df2a22db diff --git a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp index 89a550421f577e..b11fb675f76196 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp @@ -130,17 +130,17 @@ struct loop_impl : typed_primitive_impl { } body_network->set_shape_predictor(outer_network.get_shape_predictor()); - OPENVINO_ASSERT(!primitive->num_iteration_id.empty(), "loop operation should have num_iteration_id"); + OPENVINO_ASSERT(!instance.get_num_iterations_id().empty(), "loop operation should have num_iteration_id"); // shortcut of execution_condition memory in body network memory::ptr body_execution_condition_mem = nullptr; - if (!primitive->body_execution_condition_id.empty()) { - body_execution_condition_mem = body_network->get_primitive(primitive->body_execution_condition_id)->output_memory_ptr(); + if (!instance.get_condition_id().empty()) { + body_execution_condition_mem = body_network->get_primitive(instance.get_condition_id())->output_memory_ptr(); } // shortcut of current_iteration memory in body network - if (!primitive->body_current_iteration_id.empty()) { - memory::ptr body_current_iteration_mem = body_network->get_primitive(primitive->body_current_iteration_id)->output_memory_ptr(); + if (!instance.get_current_iteration_id().empty()) { + memory::ptr body_current_iteration_mem = body_network->get_primitive(instance.get_current_iteration_id())->output_memory_ptr(); write_scalar_value(body_current_iteration_mem, body_network->get_stream(), 0); } @@ -149,11 +149,11 @@ struct loop_impl : typed_primitive_impl { // read trip_count from outer network int64_t trip_count = -1; - if (!primitive->trip_count_id.empty()) { - memory::ptr trip_count_mem = outer_network.get_primitive(primitive->trip_count_id)->output_memory_ptr(); + if (!instance.get_trip_count_id().empty()) { + memory::ptr trip_count_mem = outer_network.get_primitive(instance.get_trip_count_id())->output_memory_ptr(); trip_count = read_scalar_value(std::move(trip_count_mem), stream); } else { - OPENVINO_ASSERT(!primitive->body_execution_condition_id.empty() + OPENVINO_ASSERT(!instance.get_condition_id().empty() || num_iterations > 0 || primitive->max_num_iterations > 0, "num_iterations should be positive when trip_count_id is not existed"); // If trip_count_id is not existed, the original ngraph operation is TensorIterator. @@ -166,11 +166,11 @@ struct loop_impl : typed_primitive_impl { // read initial execution condition from outer network int64_t execution_condition = 1; - if (!primitive->first_execution_condition_id.empty()) { + if (!instance.get_initial_execution_id().empty()) { // Wait for completion of the execution_condition of outer_network - if (outer_network.has_event(primitive->first_execution_condition_id)) - outer_network.get_primitive_event(primitive->first_execution_condition_id)->wait(); - memory::ptr first_execution_condition_mem = outer_network.get_primitive(primitive->first_execution_condition_id)->output_memory_ptr(); + if (outer_network.has_event(instance.get_initial_execution_id())) + outer_network.get_primitive_event(instance.get_initial_execution_id())->wait(); + memory::ptr first_execution_condition_mem = outer_network.get_primitive(instance.get_initial_execution_id())->output_memory_ptr(); execution_condition = read_scalar_value(first_execution_condition_mem, stream); } GPU_DEBUG_LOG << "execution_condition: " << execution_condition << std::endl; @@ -178,7 +178,7 @@ struct loop_impl : typed_primitive_impl { // When execution_condition is false or trip_count is zero, return execute_impl without any body_network execution. if (!execution_condition || trip_count == 0) { // Update num_iterations (actual number of iterations) - memory::ptr num_actual_iterations_mem = outer_network.get_primitive(primitive->num_iteration_id)->output_memory_ptr(); + memory::ptr num_actual_iterations_mem = outer_network.get_primitive(instance.get_num_iterations_id())->output_memory_ptr(); write_scalar_value(num_actual_iterations_mem, stream, current_iteration_idx); instance.update_output_layout(); @@ -255,7 +255,7 @@ struct loop_impl : typed_primitive_impl { // execution condition is the result of body network execution if (body_execution_condition_mem != nullptr) { - auto execution_id = primitive->body_execution_condition_id; + auto execution_id = instance.get_condition_id(); if (body_network->has_event(execution_id)) { auto ev = body_network->get_primitive_event(execution_id); if (ev) ev->wait(); @@ -275,9 +275,9 @@ struct loop_impl : typed_primitive_impl { // Update actual num iteration // update num_iterations (actual number of iterations) - memory::ptr num_actual_iterations_mem = outer_network.get_primitive(primitive->num_iteration_id)->output_memory_ptr(); + memory::ptr num_actual_iterations_mem = outer_network.get_primitive(instance.get_num_iterations_id())->output_memory_ptr(); write_scalar_value(num_actual_iterations_mem, stream, current_iteration_idx); - GPU_DEBUG_LOG << "current_iteration_idx(" << primitive->num_iteration_id << ", " + GPU_DEBUG_LOG << "current_iteration_idx(" << instance.get_num_iterations_id() << ", " << num_actual_iterations_mem << ") : " << current_iteration_idx << std::endl; if (is_dynamic) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kv_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kv_cache.cpp index 4c463d5d6b7c1e..4801be6622d50c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kv_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kv_cache.cpp @@ -149,14 +149,15 @@ struct kv_cache_impl : multi_stage_primitive { execute_stage(events, instance, res_events, concat_stage); auto impl_param = *instance.get_impl_params(); - auto kv_shape = impl_param.input_layouts[0].get_partial_shape(); - if (desc->indirect && kv_shape[desc->gather_axis].get_length() > 1) { + auto kv_in_shape = impl_param.input_layouts[0].get_partial_shape(); + auto kv_out_shape = impl_param.output_layouts[0].get_partial_shape(); + if (desc->indirect && ((kv_out_shape[desc->gather_axis].get_length() > 1) || + (kv_in_shape[desc->concat_axis].get_length() == 0))) { const auto bt_alloc_type = engine.get_preferred_memory_allocation_type(false); - - auto beam_table_state = dynamic_cast(variable).get_beam_table_state(); + auto beam_table_state = + dynamic_cast(variable).get_beam_table_state(); auto bt_layout = instance.get_impl_params()->output_layouts[1]; auto bt_shape = bt_layout.get_shape(); - std::swap(beam_table_prev, beam_table_new); if (!beam_table_new || beam_table_new->count() < ov::shape_size(bt_shape)) { diff --git a/src/plugins/intel_gpu/src/graph/include/loop_inst.h b/src/plugins/intel_gpu/src/graph/include/loop_inst.h index 216b7601ec8d4d..a9ec2262342bec 100644 --- a/src/plugins/intel_gpu/src/graph/include/loop_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/loop_inst.h @@ -22,6 +22,12 @@ struct typed_program_node : public typed_program_node_base { private: using parent = typed_program_node_base; + primitive_id trip_count_id; + primitive_id initial_execution_id; + primitive_id current_iteration_id; + primitive_id execution_condition_id; + primitive_id num_iterations_id; + std::vector& input_primitive_maps; std::vector& output_primitive_maps; std::vector& back_edges; @@ -31,21 +37,32 @@ struct typed_program_node : public typed_program_node_base { parent(prim, prog), input_primitive_maps(prim->input_primitive_maps), output_primitive_maps(prim->output_primitive_maps), - back_edges(prim->back_edges) {} + back_edges(prim->back_edges) { + set_primitive_ids(prim); + } program::ptr get_body_program() const { return get_primitive()->body_program; } - const primitive_id& get_trip_count_id() const { return get_primitive()->trip_count_id; } - const primitive_id& get_initial_execution_id() const { return get_primitive()->first_execution_condition_id; } - const primitive_id& get_current_iteration_id() const { return get_primitive()->body_current_iteration_id; } - const primitive_id& get_execution_condition_id() const { return get_primitive()->body_execution_condition_id; } - const primitive_id& get_num_iterations_id() const { return get_primitive()->num_iteration_id; } + const primitive_id& get_trip_count_id() const { return trip_count_id; } + const primitive_id& get_initial_execution_id() const { return initial_execution_id; } + const primitive_id& get_current_iteration_id() const { return current_iteration_id; } + const primitive_id& get_execution_condition_id() const { return execution_condition_id; } + const primitive_id& get_num_iterations_id() const { return num_iterations_id; } + const int32_t get_max_num_iteration() const { return get_primitive()->max_num_iterations; } const std::vector& get_input_primitive_maps() const { return input_primitive_maps; } const std::vector& get_output_primitive_maps() const { return output_primitive_maps; } const std::vector& get_back_edges() const { return back_edges;} + void set_primitive_ids(std::shared_ptr prim) { + trip_count_id = prim->trip_count_id; + initial_execution_id = prim->first_execution_condition_id; + current_iteration_id = prim->body_current_iteration_id; + execution_condition_id = prim->body_execution_condition_id; + num_iterations_id = prim->num_iteration_id; + } + void update_primitive_map(const primitive_id& prevID, const primitive_id& newID, bool external_id = true) { if (external_id) { for (auto& pm : input_primitive_maps) { @@ -78,6 +95,18 @@ struct typed_program_node : public typed_program_node_base { } } } + + // Update ids + if (get_trip_count_id() == prevID) + trip_count_id = newID; + if (get_initial_execution_id() == prevID) + initial_execution_id = newID; + if (get_current_iteration_id() == prevID) + current_iteration_id = newID; + if (get_execution_condition_id() == prevID) + execution_condition_id = newID; + if (get_num_iterations_id() == prevID) + num_iterations_id = newID; } // current_iteration is necessary to calculate output layout in dynamic shape @@ -329,6 +358,12 @@ class typed_primitive_inst : public typed_primitive_inst_base { std::vector preprocess_memory_for_body_network(int64_t current_iteration_idx); std::vector postprocess_memory_for_body_network(int64_t current_iteration_idx); + primitive_id get_trip_count_id() { return _trip_count_id; } + primitive_id get_initial_execution_id() { return _initial_execution_id; } + primitive_id get_current_iteration_id() { return _current_iteration_id; } + primitive_id get_condition_id() { return _condition_id; } + primitive_id get_num_iterations_id() { return _num_iterations_id; } + private: network::ptr body_network; memory::ptr get_external_memory(const primitive_id& external_id, size_t mem_idx = 0) const; diff --git a/src/plugins/intel_gpu/src/graph/kv_cache.cpp b/src/plugins/intel_gpu/src/graph/kv_cache.cpp index 9b310dd967a638..d5a36011b81344 100644 --- a/src/plugins/intel_gpu/src/graph/kv_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/kv_cache.cpp @@ -33,7 +33,11 @@ std::vector kv_cache_inst::calc_output_layouts(kv_cache_node const& /*no op.set_concat_axis(desc->concat_axis); op.set_gather_axis(desc->gather_axis); - std::vector input_shapes = {impl_param.get_input_layout(0).get(), impl_param.get_input_layout(1).get()}; + std::vector input_shapes = {impl_param.get_input_layout(0).get(), + impl_param.get_input_layout(1).get()}; + if (desc->num_outputs > 1) + input_shapes.push_back(impl_param.get_input_layout(2).get()); + std::vector output_shapes = shape_infer(&op, input_shapes); const std::map ports_map = {{0, 0}, {1, 2}}; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 6a71cbc8981587..a9d04d3fc765b6 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -1032,6 +1032,15 @@ void primitive_inst::do_runtime_in_place_kv_cache() { auto& past_layout = _impl_params->input_layouts[0]; auto& present_layout = _impl_params->output_layouts[0]; const auto& sequence_axis = desc->concat_axis; + const auto& gather_axis = desc->gather_axis; + + const auto& prev_batch_size = past_layout.get_shape()[gather_axis]; + const auto& beam_size = present_layout.get_shape()[gather_axis]; + if (prev_batch_size != beam_size) { + // If the previous batch size is not same as beam size, need explicit concat + _impl_params->_can_be_optimized = false; + return; + } auto sequence_axis_legacy = kv_cache_inst::get_sequence_axis_legacy(sequence_axis, past_layout.get_partial_shape().size()); if (present_layout.data_padding.get_dynamic_pad_dims().sizes()[sequence_axis_legacy] != 1) @@ -1067,6 +1076,7 @@ void primitive_inst::do_runtime_skip_gather() { return; GPU_DEBUG_TRACE_DETAIL << "[do_runtime_skip_gather] " << id() << " : check optimizability" << std::endl; + auto input_shape = _impl_params->get_input_layout(0).get_shape(); auto axis = _impl_params->typed_desc()->axis; auto idx_id = get_node().get_dependency(1).id(); diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index cdd21875dd96a2..08447c6bdd2d0f 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -228,7 +228,11 @@ std::vector reshape_inst::calc_output_layouts(reshape_node const& node, output_format = node.get_preferred_output_fmt(); } - return { layout {output_shapes[0], input_layout.data_type, format::adjust_to_rank(output_format, output_shapes[0].size()), out_pad} }; + auto new_out_pad = out_pad; + if (new_out_pad == padding()) + new_out_pad = impl_param.get_output_layout(0).data_padding; + + return { layout {output_shapes[0], input_layout.data_type, format::adjust_to_rank(output_format, output_shapes[0].size()), new_out_pad} }; } template std::vector reshape_inst::calc_output_layouts(reshape_node const& node, const kernel_impl_params& impl_param); diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 8c6ff2b2d6ecd9..7e3805787707fb 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -34,12 +34,12 @@ namespace { inline bool can_use_usm_host(const cldnn::engine& engine) { auto can_use_usm = engine.use_unified_shared_memory(); - // WA: Disable USM host memory for infer request`s tensors for PVC as - // it has performance issues in case of host <-> device data transfers inside kernels - // Use unsupported SIMD8 as unique attribute of PVC - auto supported_simd_sizes = engine.get_device_info().supported_simd_sizes; - if (std::find(supported_simd_sizes.begin(), supported_simd_sizes.end(), 8) == supported_simd_sizes.end()) + if (engine.get_device_info().gfx_ver.major == 12 && engine.get_device_info().gfx_ver.minor == 60) { + // WA: Disable USM host memory for infer request`s tensors for PVC as + // it has performance issues in case of host <-> device data transfers inside kernels + GPU_DEBUG_TRACE << "Do not use usm_host for performance issue" << std::endl; can_use_usm = false; + } return can_use_usm; } diff --git a/src/plugins/intel_gpu/src/plugin/transformations/op/kv_cache.cpp b/src/plugins/intel_gpu/src/plugin/transformations/op/kv_cache.cpp index 46c227044a2ee5..0636eeb4bd110e 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/op/kv_cache.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/op/kv_cache.cpp @@ -3,6 +3,7 @@ // #include "intel_gpu/op/kv_cache.hpp" +#include "gather_shape_inference.hpp" #include "concat_shape_inference.hpp" #include "openvino/core/partial_shape.hpp" #include "openvino/core/validation_util.hpp" @@ -55,6 +56,8 @@ bool KVCache::visit_attributes(ov::AttributeVisitor& visitor) { void KVCache::validate_and_infer_types() { auto output_type = m_output_type == ov::element::undefined ? get_input_element_type(0) : m_output_type; std::vector input_shapes = {m_variable->get_info().data_shape, get_input_partial_shape(1)}; + if (get_output_size() == 2) + input_shapes.push_back(get_input_partial_shape(2)); auto shapes = shape_infer(this, input_shapes); set_output_type(0, output_type, shapes[0]); if (m_indirect) { @@ -85,16 +88,25 @@ std::shared_ptr KVCache::clone_with_new_inputs(const ov::OutputVector& new std::vector shape_infer(const KVCache* op, std::vector input_shapes) { ov::op::v0::Concat concat; concat.set_axis(op->get_concat_axis()); - std::vector concat_shapes = {input_shapes[0], input_shapes[1]}; - auto out_shapes = ov::op::v0::shape_infer(&concat, concat_shapes); + std::vector out_shapes; if (op->get_output_size() == 2) { + ov::op::v8::Gather gather; int64_t gather_axis = ov::util::normalize(op->get_gather_axis(), input_shapes[0].size()); + auto gather_axis_tensor = ov::Tensor(ov::element::i64, ov::Shape{1}, static_cast(&gather_axis)); + std::unordered_map gather_axis_data = {{2, gather_axis_tensor}}; + std::vector gather_inputs = {input_shapes[0], input_shapes[2], ov::PartialShape{1}}; + auto gather_out_shapes = ov::op::shape_infer(&gather, gather_inputs, ov::make_tensor_accessor(gather_axis_data)); + std::vector concat_shapes = {gather_out_shapes[0], input_shapes[1]}; + out_shapes = ov::op::v0::shape_infer(&concat, concat_shapes); int64_t concat_axis = ov::util::normalize(op->get_concat_axis(), input_shapes[0].size()); ov::PartialShape beam_table_shape(std::vector(out_shapes[0].size(), 1)); - beam_table_shape[gather_axis] = input_shapes[0][gather_axis]; + beam_table_shape[gather_axis] = out_shapes[0][gather_axis]; beam_table_shape[concat_axis] = out_shapes[0][concat_axis]; out_shapes.push_back(beam_table_shape); + } else { + std::vector concat_shapes = {input_shapes[0], input_shapes[1]}; + out_shapes = ov::op::v0::shape_infer(&concat, concat_shapes); } return out_shapes; diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 0047a244dd07c7..0c1041b742c0fb 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -321,10 +321,6 @@ void TransformationsPipeline::apply(std::shared_ptr func) { const auto& value_ps = sdpa->get_input_partial_shape(2); // Known limitations: - // - SDPA impl could be slower in non-LLM scenarios than decomposed version - if (func->get_variables().size() == 0) - return false; - // - The data type of SDPA should be fp16 if (sdpa->get_output_element_type(0) != ov::element::f16) return false; @@ -347,7 +343,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // - The head size should be divisible by 16 const auto optimal_subgroup_size = 16; if (query_ps[query_ps.size() - 1].is_dynamic() || - query_ps[query_ps.size() - 1].get_length() > 256 || + query_ps[query_ps.size() - 1].get_length() != 128 || query_ps[query_ps.size() - 1].get_length() % optimal_subgroup_size != 0) { return false; } diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 66b8d3e70cab1f..b0edfe39c90181 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -50,7 +50,7 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::intel_gpu::hint::host_task_priority, ov::hint::Priority::MEDIUM), std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM), std::make_tuple(ov::intel_gpu::hint::queue_priority, ov::hint::Priority::MEDIUM), - std::make_tuple(ov::intel_gpu::hint::enable_sdpa_optimization, false), + std::make_tuple(ov::intel_gpu::hint::enable_sdpa_optimization, true), std::make_tuple(ov::intel_gpu::enable_loop_unrolling, true), std::make_tuple(ov::intel_gpu::disable_winograd_convolution, false), std::make_tuple(ov::internal::exclusive_async_requests, false), diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/scaled_dot_product_attention.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/scaled_dot_product_attention.cpp index 3b97cde5cfe636..15203e9c5f26bf 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/scaled_dot_product_attention.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/scaled_dot_product_attention.cpp @@ -106,21 +106,9 @@ void ScaledAttnLayerGPUTest::SetUp() { } } - // Add artificial read/value operations to the model to trigger the enabling of the SDPA operation - auto read_key = std::make_shared(inputParams.at(1), "v0"); - auto assign_key = std::make_shared(read_key, "v0"); - - auto read_value = std::make_shared(inputParams.at(2), "v0"); - auto assign_value = std::make_shared(read_value, "v0"); - ov::OutputVector inputs; for (size_t i = 0; i < inputParams.size(); i++) { - if (i == 1) - inputs.push_back(read_key); - else if (i == 2) - inputs.push_back(read_value); - else - inputs.push_back(inputParams[i]); + inputs.push_back(inputParams[i]); } auto sdp = std::make_shared(inputs, is_causal); @@ -128,7 +116,7 @@ void ScaledAttnLayerGPUTest::SetUp() { auto output = std::make_shared(sdp->output(0)); - function = std::make_shared(ov::OutputVector{output}, ov::SinkVector{assign_key, assign_value}, inputParams, "sdpa_model"); + function = std::make_shared(ov::OutputVector{output}, inputParams, "sdpa_model"); functionRefs = function->clone(); ov::pass::Manager manager; @@ -137,11 +125,8 @@ void ScaledAttnLayerGPUTest::SetUp() { manager.register_pass(); manager.run_passes(functionRefs); - // Enable SDPA - configuration.insert(ov::intel_gpu::hint::enable_sdpa_optimization(true)); - auto it = std::find_if(inputShapes[1].second.begin(), inputShapes[1].second.end(), [&](const ov::Shape& shape){ - return shape[2] >= 384; + return shape[2] >= 384 || shape[3] >= 128; }); bool has_long_seq = it != inputShapes[1].second.end(); @@ -190,12 +175,12 @@ const std::vector> shapes{ // normal case, shapes of q,k,v are same { // q shape - {ov::test::InputShape{ov::PartialShape{-1, 8, -1, 64}, - {ov::Shape{1, 8, 100, 64}, ov::Shape{1, 8, 1, 64}, ov::Shape{2, 8, 10, 64}}} + {ov::test::InputShape{ov::PartialShape{-1, 8, -1, 128}, + {ov::Shape{1, 8, 100, 128}, ov::Shape{1, 8, 1, 128}, ov::Shape{2, 8, 10, 128}}} }, // kv shape - {ov::test::InputShape{ov::PartialShape{-1, 8, -1, 64}, - {ov::Shape{1, 8, 100, 64}, ov::Shape{1, 8, 1, 64}, ov::Shape{2, 8, 10, 64}}} + {ov::test::InputShape{ov::PartialShape{-1, 8, -1, 128}, + {ov::Shape{1, 8, 100, 128}, ov::Shape{1, 8, 1, 128}, ov::Shape{2, 8, 10, 128}}} }, // attn shape: [B, 1, -1, L0+L1] {ov::test::InputShape{ov::PartialShape{-1, 1, -1, -1}, @@ -204,12 +189,12 @@ const std::vector> shapes{ }, { // q shape - {ov::test::InputShape{ov::PartialShape{-1, 5, -1, 64}, - {ov::Shape{2, 5, 100, 64}, ov::Shape{2, 5, 1, 64}, ov::Shape{2, 5, 384, 64}}} + {ov::test::InputShape{ov::PartialShape{-1, 5, -1, 128}, + {ov::Shape{2, 5, 100, 128}, ov::Shape{2, 5, 1, 128}, ov::Shape{2, 5, 384, 128}}} }, // kv shape - {ov::test::InputShape{ov::PartialShape{-1, 5, -1, 64}, - {ov::Shape{2, 5, 100, 64}, ov::Shape{2, 5, 1, 64}, ov::Shape{2, 5, 384, 64}}} + {ov::test::InputShape{ov::PartialShape{-1, 5, -1, 128}, + {ov::Shape{2, 5, 100, 128}, ov::Shape{2, 5, 1, 128}, ov::Shape{2, 5, 384, 128}}} }, // attn shape: [B, 1, -1, L0+L1] {ov::test::InputShape{ov::PartialShape{-1, 1, -1, -1}, @@ -219,12 +204,12 @@ const std::vector> shapes{ // heads number of kv is 1, attn mask: [B, H, L1, L0+L1] { // q shape - {ov::test::InputShape{ov::PartialShape{-1, 8, -1, 64}, - {ov::Shape{1, 8, 100, 64}, ov::Shape{1, 8, 1, 64}, ov::Shape{2, 8, 10, 64}}} + {ov::test::InputShape{ov::PartialShape{-1, 8, -1, 128}, + {ov::Shape{1, 8, 100, 128}, ov::Shape{1, 8, 1, 128}, ov::Shape{2, 8, 10, 128}}} }, // kv shape - {ov::test::InputShape{ov::PartialShape{-1, 1, -1, 64}, - {ov::Shape{1, 1, 100, 64}, ov::Shape{1, 1, 1, 64}, ov::Shape{2, 1, 10, 64}}} + {ov::test::InputShape{ov::PartialShape{-1, 1, -1, 128}, + {ov::Shape{1, 1, 100, 128}, ov::Shape{1, 1, 1, 128}, ov::Shape{2, 1, 10, 128}}} }, // attn shape {ov::test::InputShape{ov::PartialShape{-1, 8, -1, -1}, diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp index e5461ca96d7f7f..77477648fd4860 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp @@ -259,7 +259,8 @@ class KVCacheTests: public ::testing::Test { ov::element::Type model_element_type = ov::element::f16, size_t num_iter = 10, size_t num_groups = 1, - bool set_state_on_each_iter = false) { + bool set_state_on_each_iter = false, + int32_t initial_batch = -1) { #if defined(ANDROID) GTEST_SKIP(); #endif @@ -320,22 +321,11 @@ class KVCacheTests: public ::testing::Test { auto beam_idx_shape = ov::Shape{batch}; - auto beam_idx_data_0 = ov::Tensor(ov::element::i32, beam_idx_shape); - auto beam_idx_data_1 = ov::Tensor(ov::element::i32, beam_idx_shape); - for (size_t i = 0; i < batch; i++) { - beam_idx_data_0.data()[i] = i; - beam_idx_data_1.data()[i] = batch - i - 1; - } - - std::vector beam_idx_data_array = { - beam_idx_data_0, - beam_idx_data_1, - }; - - auto get_ref_results = [&ref_model, fuse_cache_reorder, &beam_idx_shape](const ov::Tensor& kv_cache, - const ov::Tensor& new_token_data, - const ov::Tensor& matmul_data, - const ov::Tensor& beam_idx_data) { + auto get_ref_results = [&ref_model, fuse_cache_reorder](const ov::Tensor& kv_cache, + const ov::Tensor& new_token_data, + const ov::Tensor& matmul_data, + const ov::Tensor& beam_idx_data, + const ov::Shape& beam_idx_shape) { auto input0 = ref_model->get_parameters().at(0); auto input1 = ref_model->get_parameters().at(1); auto input2 = ref_model->get_parameters().at(2); @@ -387,9 +377,11 @@ class KVCacheTests: public ::testing::Test { ov::Tensor ref_kv_cache; size_t cache_size = 0; { - const ov::Shape new_token_size_initial = {batch, context_size, n_heads / num_groups, n_features}; - const ov::Shape kv_cache_size_initial = {batch, n_heads / num_groups, cache_size, n_features}; - const ov::Shape matmul_in_size_initial = {batch, n_heads, context_size, context_size}; + // first infer + size_t init_batch = initial_batch == -1 ? batch : static_cast(initial_batch); + const ov::Shape new_token_size_initial = {init_batch, context_size, n_heads / num_groups, n_features}; + const ov::Shape kv_cache_size_initial = {init_batch, n_heads / num_groups, cache_size, n_features}; + const ov::Shape matmul_in_size_initial = {init_batch, n_heads, context_size, context_size}; auto new_token_data = ov::test::utils::create_and_fill_tensor(element_type, new_token_size_initial); auto matmul_data = ov::test::utils::create_and_fill_tensor(element_type, matmul_in_size_initial); @@ -400,13 +392,19 @@ class KVCacheTests: public ::testing::Test { new_token_data.copy_to(new_token_input); matmul_data.copy_to(matmul_input); + auto init_beam_idx_shape = ov::Shape{init_batch}; + auto init_beam_idx_data_0 = ov::Tensor(ov::element::i32, init_beam_idx_shape); + for (size_t i = 0; i < init_batch; i++) { + init_beam_idx_data_0.data()[i] = 0; + } + if (fuse_cache_reorder) { - infer_request.set_tensor(input2, beam_idx_data_array[0]); + infer_request.set_tensor(input2, init_beam_idx_data_0); } ref_kv_cache = ov::Tensor(element_type, kv_cache_size_initial); - auto ref_results = get_ref_results(ref_kv_cache, new_token_data, matmul_data, beam_idx_data_array[0]); + auto ref_results = get_ref_results(ref_kv_cache, new_token_data, matmul_data, init_beam_idx_data_0, init_beam_idx_shape); ref_kv_cache = ref_results[0]; infer_request.infer(); @@ -416,6 +414,22 @@ class KVCacheTests: public ::testing::Test { cache_size += context_size; } + auto beam_idx_data_0 = ov::Tensor(ov::element::i32, beam_idx_shape); + auto beam_idx_data_1 = ov::Tensor(ov::element::i32, beam_idx_shape); + auto beam_idx_data_2 = ov::Tensor(ov::element::i32, beam_idx_shape); + auto beam_idx_data_init = ov::Tensor(ov::element::i32, beam_idx_shape); + for (size_t i = 0; i < batch; i++) { + beam_idx_data_0.data()[i] = i; + beam_idx_data_1.data()[i] = batch - i - 1; + beam_idx_data_2.data()[i] = 0; + } + + std::vector beam_idx_data_array = { + beam_idx_data_0, + beam_idx_data_1, + beam_idx_data_2, + }; + const size_t input_tokens = 1; const ov::Shape new_token_size = {batch, input_tokens, n_heads / num_groups, n_features}; size_t context_length = cache_size + input_tokens; @@ -423,11 +437,11 @@ class KVCacheTests: public ::testing::Test { ov::Shape matmul_in_size_loop = {batch, n_heads, input_tokens, context_length}; auto new_token_data = ov::test::utils::create_and_fill_tensor(element_type, new_token_size); auto matmul_data = ov::test::utils::create_and_fill_tensor(element_type, matmul_in_size_loop); + size_t beam_idx_array_idx = i == 0 ? 2 : i % 2; if (fuse_cache_reorder) { - infer_request.set_tensor(input2, beam_idx_data_array[i % beam_idx_data_array.size()]); + infer_request.set_tensor(input2, beam_idx_data_array[beam_idx_array_idx]); } - - auto ref_results = get_ref_results(ref_kv_cache, new_token_data, matmul_data, beam_idx_data_array[i % beam_idx_data_array.size()]); + auto ref_results = get_ref_results(ref_kv_cache, new_token_data, matmul_data, beam_idx_data_array[beam_idx_array_idx], beam_idx_shape); ref_kv_cache = ref_results[0]; new_token_input.set_shape(new_token_data.get_shape()); @@ -499,6 +513,15 @@ TEST_F(KVCacheTests, smoke_multipleIterations_stateful_gather_with_initializer_b this->test_smoke_multipleIterations_stateful(false, true, true, 3); } +TEST_F(KVCacheTests, smoke_multipleIterations_stateful_gather_with_initializer_batch_1_3) { + this->test_smoke_multipleIterations_stateful(false, true, true, 3, 2, ov::element::f16, 10, 1, false, 1); +} + +TEST_F(KVCacheTests, smoke_multipleIterations_stateful_gather_with_initializer_batch_1_5) { + this->test_smoke_multipleIterations_stateful(false, true, true, 5, 2, ov::element::f16, 10, 1, false, 1); +} + + TEST_F(KVCacheTests, smoke_multipleIterations_stateful_same_shape_after_reset) { this->test_smoke_multipleIterations_stateful(false, false, false, 1, 2, ov::element::f16, 0); } diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/tensor_iterator.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/tensor_iterator.cpp index dff05c614bb4fb..0687bbfaef9391 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/tensor_iterator.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/tensor_iterator.cpp @@ -3,7 +3,7 @@ // #include "shared_test_classes/base/ov_subgraph.hpp" -#include "shared_test_classes/base/utils/generate_inputs.hpp" +#include "shared_test_classes/base/utils/ranges.hpp" #include "openvino/op/parameter.hpp" #include "openvino/op/constant.hpp" @@ -232,23 +232,22 @@ class DynamicTensorIteratorTest : public testing::WithParamInterface& targetInputStaticShapes) override { inputs.clear(); ov::Shape default_shape{batch_size, 1, hidden_size}; - auto inputMap = ov::test::utils::getInputMap(); + ov::test::utils::ModelRange modelRange; + modelRange.find_mode_ranges(function); auto itTargetShape = targetInputStaticShapes.begin(); for (const auto ¶m : function->get_parameters()) { std::shared_ptr inputNode = param; for (size_t i = 0; i < param->get_output_size(); i++) { for (const auto &node : param->get_output_target_inputs(i)) { std::shared_ptr nodePtr = node.get_node()->shared_from_this(); - auto it = inputMap.find(nodePtr->get_type_info()); - ASSERT_NE(it, inputMap.end()); for (size_t port = 0; port < nodePtr->get_input_size(); ++port) { if (itTargetShape != targetInputStaticShapes.end()) { if (nodePtr->get_input_node_ptr(port)->shared_from_this() == inputNode->shared_from_this()) { - inputs.insert({param, it->second(nodePtr, port, param->get_element_type(), *itTargetShape)}); + inputs.insert({param, modelRange.generate_input(nodePtr, port, *itTargetShape)}); break; } } else { - inputs.insert({param, it->second(nodePtr, port, param->get_element_type(), default_shape)}); + inputs.insert({param, modelRange.generate_input(nodePtr, port, default_shape)}); } } } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp index 26ca489c5a8115..c1f914c12ad0d4 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp @@ -787,3 +787,179 @@ TEST(loop_gpu, support_loop_w_dynamic_input_w_various_shapes) { std::vector(), 2, 3); } + +static void test_loop_gpu_wo_trip_count_update_primitive_id(ov::PartialShape body_input_layout, + std::vector whole_layouts, + std::vector> input_data_list, + std::vector expected_output_data, + size_t axis, + size_t exit_value, + bool is_caching_test = false) { + auto& engine = get_test_engine(); + + auto b_input_layout = cldnn::layout{ body_input_layout, data_types::f32, format::bfyx }; + + ov::PartialShape sliced_input_shape = body_input_layout; + sliced_input_shape[axis] = 1; + auto sliced_input_layout = cldnn::layout{ sliced_input_shape, data_types::f32, format::bfyx }; + + auto const_layout = cldnn::layout{ {}, data_types::i64, format::bfyx }; + + auto e_initial_condition_mem = engine.allocate_memory(const_layout); + auto e_num_iteration_mem = engine.allocate_memory(const_layout); + auto b_exit_value_mem = engine.allocate_memory(const_layout); + auto b_index_inc_mem = engine.allocate_memory(const_layout); + + // initialize input buffers + set_values(e_initial_condition_mem, {1}); + set_values(b_exit_value_mem, {exit_value}); + set_values(b_index_inc_mem, {1}); + set_values(e_num_iteration_mem, {0}); + + primitive_id body_current_iteration_id = "b_index"; + primitive_id body_execution_condition_id = "b_cond_exit_value"; + + cldnn::topology body( + input_layout(body_current_iteration_id, const_layout), + input_layout("b_add_data", sliced_input_layout), + input_layout("b_mul_data", sliced_input_layout), + data("b_exit_value", b_exit_value_mem), + data("b_index_inc", b_index_inc_mem), + eltwise("b_index_update", input_info(body_current_iteration_id), input_info("b_index_inc"), eltwise_mode::sum), + reorder("b_index_cast", input_info("b_index_update"), + cldnn::format::any, data_types::f32, {}, cldnn::reorder_mean_mode::subtract, cldnn::padding(), true), + eltwise(body_execution_condition_id, input_info("b_index"), input_info("b_exit_value"), eltwise_mode::lt), + eltwise("b_add", input_info("b_add_data"), input_info("b_index_cast"), eltwise_mode::sum), + eltwise("b_mul", input_info("b_mul_data"), input_info("b_index_cast"), eltwise_mode::prod)); + + primitive_id trip_count_id = ""; + primitive_id actual_iteration_count_id = "actual_iteration_count"; + primitive_id initial_mean = "initial_mean"; + + primitive_id initial_condition_id = "initial_condition"; + primitive_id initial_condition_id_elt = "initial_condition_elt"; + primitive_id initial_condition_id_reorder = "initial_condition_reorder"; + primitive_id initial_condition_id_reorder2 = "initial_condition_reorder2"; + int64_t num_iterations = -1; + + std::vector input_primitive_maps { + loop::io_primitive_map("input", "b_add_data", axis), + loop::io_primitive_map("input", "b_mul_data", axis), + loop::io_primitive_map(actual_iteration_count_id, body_current_iteration_id) }; + std::vector output_primitive_maps { + loop::io_primitive_map(cldnn::input_info("loop", 0), cldnn::input_info("b_add", 0), axis), + loop::io_primitive_map(cldnn::input_info("loop", 1), cldnn::input_info("b_mul", 0), axis) }; + std::vector back_edges { + loop::backedge_mapping("b_index_update", body_current_iteration_id) }; + + auto body_program = build_program(engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true); + + auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx}); + + + std::vector body_input_layouts; + for (size_t i = 0; i < body_input_layout.size(); i++) { + if (body_input_layout[i].is_dynamic()) + body_input_layouts.push_back(-1); + else + body_input_layouts.push_back(body_input_layout[i].get_length()); + } + set_values(const_shape, body_input_layouts); + const std::vector values_to_subtract = {0.f}; + + cldnn::topology topology( + input_layout("input_origin", b_input_layout), + input_layout(initial_condition_id, e_initial_condition_mem->get_layout()), + mutable_data(actual_iteration_count_id, e_num_iteration_mem), + + reorder(initial_condition_id_reorder, input_info(initial_condition_id), cldnn::format::any, data_types::f32, values_to_subtract), + reorder(initial_condition_id_reorder2, input_info(initial_condition_id_reorder), cldnn::format::any, data_types::i32), // should be fused to test updating input id of loop + + shape_of("shape_of_input", input_info("input_origin"), data_types::i32), + reduce("reduced_shape", input_info("shape_of_input"), reduce_mode::prod, {0}, true), + reshape("reshape1", input_info("input_origin"), input_info("reduced_shape"), false, ov::PartialShape::dynamic(1)), + data("const", const_shape), + reshape("input", input_info("reshape1"), input_info("const"), false, ov::PartialShape::dynamic(4)), + + loop("loop", { input_info(actual_iteration_count_id), input_info(initial_condition_id_reorder2), input_info("input") }, body_program, + trip_count_id, initial_condition_id_reorder2, actual_iteration_count_id, + input_primitive_maps, output_primitive_maps, back_edges, + num_iterations, body_current_iteration_id, body_execution_condition_id, 2), + eltwise("out_sum", input_info("loop", 0), input_info("loop", 1), eltwise_mode::sum)); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + + cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); + + for (size_t i = 0 ; i < whole_layouts.size(); i++) { + auto whole_layout = whole_layouts[i]; + auto input_data = input_data_list[i]; + + // initialize input buffers + set_values(e_initial_condition_mem, {1}); + set_values(b_exit_value_mem, {exit_value}); + set_values(b_index_inc_mem, {1}); + set_values(e_num_iteration_mem, {0}); + + auto e_input_layout = cldnn::layout{ whole_layout, data_types::f32, format::bfyx }; + auto e_input_mem = engine.allocate_memory(e_input_layout); // b,f,x,y + auto expected_output_layout = whole_layout; + set_values(e_input_mem, input_data); + network->set_input_data("input_origin", e_input_mem); + + network->set_input_data(initial_condition_id, e_initial_condition_mem); + + auto outputs = network->execute(); + ASSERT_EQ(outputs.size(), 1); + + auto expected_num_iterations = (exit_value + 1); + expected_output_layout[axis] = expected_num_iterations; + auto e_output_layout = cldnn::layout{ expected_output_layout, data_types::f32, format::bfyx }; + + auto num_iter_mem = network->get_output_memory(actual_iteration_count_id); + if (num_iter_mem != nullptr) { + mem_lock num_iter_ptr{ num_iter_mem, get_test_stream() }; + ASSERT_EQ(num_iter_ptr.data()[0], expected_num_iterations); + } + + std::vector expected(input_data.size()); + if (expected_output_data.size() == 0) { + size_t unit = 1; + for (size_t k = axis; k < whole_layout.size(); k++) { + unit *= whole_layout[k].get_length(); + } + + for (size_t j = 0; j < input_data.size(); j++) { + auto val = static_cast((j % unit) / 4) + 1; + expected[j] = static_cast(input_data[j] + val) + static_cast(input_data[j] * val); + } + } else { + expected = expected_output_data; + } + + auto output_mem = outputs.begin()->second.get_memory(); + auto output_layout = output_mem->get_layout(); + ASSERT_EQ(output_layout.batch(), e_output_layout.batch()); + ASSERT_EQ(output_layout.feature(), e_output_layout.feature()); + ASSERT_EQ(output_layout.spatial(0), e_output_layout.spatial(0)); + ASSERT_EQ(output_layout.spatial(1), e_output_layout.spatial(1)); + // value check + { + mem_lock output_ptr{ output_mem, get_test_stream() }; + for (size_t i = 0, iend = output_layout.count(); i < iend; ++i) { + ASSERT_FLOAT_EQ(output_ptr[i], expected.at(i)); + } + } + } +} + + +TEST(loop_gpu, support_loop_w_dynamic_input_update_primitive_id) { + test_loop_gpu_wo_trip_count_update_primitive_id( + { 1, -1, 4, 4 }, + {{ 1, 1, 4, 4 }}, // axis value should be iter_num = (exit_value + 1) + {input_data_4_4, input_data_2_4_4}, + std::vector(), + 2, 3); +} diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reshape_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reshape_gpu_test.cpp index 7ccd5966c46f65..75244f1dba8164 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/reshape_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/reshape_gpu_test.cpp @@ -1688,3 +1688,76 @@ TEST(reshape_gpu_f32, followed_by_convolution_dynamic) { } } } + +TEST(reshape_gpu_f32, followed_by_convolution_dynamic_w_pad) { + auto& engine = get_test_engine(); + + ov::Shape in0_shape = { 1, 1, 4, 5 }; + auto in0_dyn_layout = layout{ov::PartialShape::dynamic(in0_shape.size()), data_types::f32, format::bfyx}; + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 3, 2 } }); + set_values(weights, { + 1.0f, 2.0f, 1.0f, + 2.0f, 1.0f, 2.0f + }); + + topology topology( + input_layout("input", in0_dyn_layout), + shape_of("shape_of_input", input_info("input"), data_types::i32), + reshape("reshape", input_info("input"), input_info("shape_of_input"), false, ov::PartialShape::dynamic(4), + cldnn::reshape::reshape_mode::base, padding({0, 0, 1, 1}, {0, 0, 2, 2})), + data("weights", weights), + pooling("pooling", input_info("weights"), pooling_mode::max, ov::Shape{3, 3}, { 1, 1 }, {0, 0}, {0, 0}, tensor(3, 3, 1, 1), data_types::f32), + convolution("conv", input_info("reshape"), "pooling", "", 1, { 1, 1 }, {1, 1}, {2, 2}, {0, 0}, false) + ); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::allow_static_input_reorder(true)); + + network network(engine, topology, config); + + // execute + { + auto input0 = engine.allocate_memory({ in0_shape, data_types::f32, format::bfyx }); + set_values(input0, { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, + 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f + }); + network.set_input_data("input", input0); + + auto outputs = network.execute(); + + // check 'conv' + auto output_memory = outputs.at("conv").get_memory(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + + int y_size = output_layout.spatial(1); + int x_size = output_layout.spatial(0); + int f_size = output_layout.feature(); + int b_size = output_layout.batch(); + + ASSERT_EQ(output_layout.format, format::bfyx); + ASSERT_EQ(y_size, 6); + ASSERT_EQ(x_size, 7); + ASSERT_EQ(f_size, 1); + ASSERT_EQ(b_size, 1); + + VVF output_vec = { + { 0, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 2, 4, 6, 8, 10 }, + { 0, 0, 4, 4, 6, 8, 12 }, + { 0, 0, 6, 6, 6, 10, 2 }, + { 0, 0, 2, 2, 2, 2, 2 } + }; + + for (int y = 0; y < y_size; ++y) { + for (int x = 0; x < x_size; ++x) { + ASSERT_EQ(output_vec[y][x], output_ptr[y * x_size + x]); + } + } + } +} diff --git a/src/plugins/intel_gpu/tests/unit/transformations/indirect_kv_cache_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/indirect_kv_cache_test.cpp index 15f3bffb0ff9ca..b23c3064a568d0 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/indirect_kv_cache_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/indirect_kv_cache_test.cpp @@ -105,7 +105,7 @@ TEST_F(TransformationTestsF, IndirectKVCache3) { { auto variable = std::make_shared(ov::op::util::VariableInfo{{1, 32, -1, 80}, ov::element::f32, "v0"}); auto new_token_param = std::make_shared(ov::element::f32, ov::PartialShape{1, 32, -1, 80}); - auto beam_idx = std::make_shared(ov::element::i32, ov::PartialShape{1}); + auto beam_idx = std::make_shared(ov::element::i32, ov::PartialShape{32}); auto past = std::make_shared(variable); auto axis = std::make_shared(ov::element::i64, ov::Shape{}, 1); auto gather_past = std::make_shared(past, beam_idx, axis); @@ -120,7 +120,7 @@ TEST_F(TransformationTestsF, IndirectKVCache3) { { auto variable = std::make_shared(ov::op::util::VariableInfo{{1, 32, -1, 80}, ov::element::f32, "v0"}); auto parameter = std::make_shared(ov::element::f32, ov::PartialShape{1, 32, -1, 80}); - auto beam_idx = std::make_shared(ov::element::i32, ov::PartialShape{1}); + auto beam_idx = std::make_shared(ov::element::i32, ov::PartialShape{32}); auto past = std::make_shared(variable); auto kv_cache = std::make_shared(past, parameter, beam_idx, variable, 2, 1, ov::element::f32); auto gemm_in = std::make_shared(ov::element::f32, ov::PartialShape{1, 32, -1, -1}); diff --git a/src/plugins/intel_npu/src/backend/include/zero_profiling.hpp b/src/plugins/intel_npu/src/backend/include/zero_profiling.hpp index 6f559aebe843e6..f01eff26a23131 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_profiling.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_profiling.hpp @@ -94,7 +94,7 @@ struct NpuInferProfiling final { ze_device_handle_t _device_handle; ov::log::Level _loglevel; Logger _logger; - ze_device_properties_t _dev_properties; + ze_device_properties_t _dev_properties = {}; int64_t _npu_infer_stats_min_cc = LLONG_MAX; int64_t _npu_infer_stats_max_cc = 0; int64_t _npu_infer_stats_accu_cc = 0; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op/single_op.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op/single_op.cpp index ec36cca504aac7..a00e9afcb41b43 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op/single_op.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op/single_op.cpp @@ -41,7 +41,8 @@ bool SingleOpMatcher::match_inputs(const std::shared_ptr &node, if (node->get_input_size() != ref->get_input_size()) { return false; } - const std::vector &ignored_ports = get_config(node)->ignored_ports; + const auto &cfg = get_config(node); + const std::vector &ignored_ports = cfg->ignored_ports; for (size_t port_id = 0; port_id < node->get_input_size(); ++port_id) { if (std::find(ignored_ports.begin(), ignored_ports.end(), port_id) != ignored_ports.end()) { diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp index a35b4f108795aa..8a45575f0fe22a 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp @@ -213,7 +213,7 @@ RepeatPatternExtractor::get_patterns_by_nodes(const std::vector& start_o }); // exclude not repeated pattern - while (potential_patterns.rbegin()->size() < 2 && !potential_patterns.empty()) { + while (!potential_patterns.empty() && potential_patterns.rbegin()->size() < 2) { potential_patterns.pop_back(); } patterns = potential_patterns; diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp index 048dd35e2a02b6..458689d4333a6b 100644 --- a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp @@ -146,7 +146,7 @@ void ReadIRTest::SetUp() { // auto next_node = param->get_default_output().get_node_shared_ptr(); auto next_node = param->get_default_output().get_target_inputs().begin()->get_node()->shared_from_this(); auto it = inputMap.find(next_node->get_type_info()); - auto tensor = it->second(next_node, function->get_parameter_index(param), param->get_element_type(), param->get_shape()); + auto tensor = it->second(next_node, function->get_parameter_index(param), param->get_element_type(), param->get_shape(), nullptr); auto const_node = std::make_shared(tensor); const_node->set_friendly_name(param->get_friendly_name()); ov::replace_node(param, const_node); diff --git a/src/tests/functional/plugin/shared/include/snippets/group_normalization.hpp b/src/tests/functional/plugin/shared/include/snippets/group_normalization.hpp new file mode 100644 index 00000000000000..7f37032c8303f5 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/snippets/group_normalization.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/snippets_test_utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +typedef std::tuple< + InputShape, // Input 0 Shape + size_t, // numGroup + float, // epsilon + size_t, // Expected num nodes + size_t, // Expected num subgraphs + std::string // Target Device +> GroupNormalizationParams; + +class GroupNormalization : public testing::WithParamInterface, + virtual public ov::test::SnippetsTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; + InputShape ExtractScaleShiftShape(const InputShape& shape); +}; + +} // namespace snippets +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp index 02a83e4eed658e..ea7ea3c77e8e32 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp @@ -362,7 +362,7 @@ TEST_P(CompileModelLoadFromFileTestBase, CanCreateCacheDirAndDumpBinariesUnicode ov::util::string_to_wstring(cache_path_mb + ov::util::FileTraits::file_separator + m_weightsName); try { - ov::test::utils::createDirectory(cache_path_mb); + ov::test::utils::createDirectory(cache_path_w); // Copy IR files into unicode folder for read_model test ov::test::utils::copyFile(m_modelName, model_xml_path_w); diff --git a/src/tests/functional/plugin/shared/src/snippets/group_normalization.cpp b/src/tests/functional/plugin/shared/src/snippets/group_normalization.cpp new file mode 100644 index 00000000000000..aa157be0e7d308 --- /dev/null +++ b/src/tests/functional/plugin/shared/src/snippets/group_normalization.cpp @@ -0,0 +1,78 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/common_utils.hpp" +#include "snippets/group_normalization.hpp" +#include "subgraph_group_normalization.hpp" +#include "functional_test_utils/skip_tests_config.hpp" + +namespace ov { +namespace test { +namespace snippets { + +std::string GroupNormalization::getTestCaseName(testing::TestParamInfo obj) { + InputShape inputShapes; + size_t numGroup; + float eps; + std::string targetDevice; + size_t num_nodes, num_subgraphs; + std::tie(inputShapes, numGroup, eps, num_nodes, num_subgraphs, targetDevice) = obj.param; + + std::ostringstream result; + result << "IS=" << ov::test::utils::partialShape2str({inputShapes.first}) << "_"; + result << "TS="; + for (const auto& shape : inputShapes.second) { + result << "(" << ov::test::utils::vec2str(shape) << ")_"; + } + result << "numGroup=" << numGroup << "_"; + result << "epsilon=" << eps << "_"; + result << "#N=" << num_nodes << "_"; + result << "#S=" << num_subgraphs << "_"; + result << "targetDevice=" << targetDevice; + return result.str(); +} + +void GroupNormalization::SetUp() { + InputShape inputShape; + size_t numGroup; + float eps; + std::tie(inputShape, numGroup, eps, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam(); + + InputShape scaleShiftShape = ExtractScaleShiftShape(inputShape); + + init_input_shapes({inputShape, scaleShiftShape, scaleShiftShape}); + + auto f = ov::test::snippets::GroupNormalizationFunction(inputDynamicShapes, numGroup, eps); + function = f.getOriginal(); + + if (!configuration.count("SNIPPETS_MODE")) { + configuration.insert({"SNIPPETS_MODE", "IGNORE_CALLBACK"}); + } + + abs_threshold = 1e-5; +} + +InputShape GroupNormalization::ExtractScaleShiftShape(const InputShape& shape) { + std::vector biasShape; + std::transform(shape.second.cbegin(), shape.second.cend(), std::back_inserter(biasShape), + [](const ov::Shape& s)->ov::Shape { + OPENVINO_ASSERT(s.size() >= 2, "First input rank for group normalization op should be greater than 1"); + return {s[1]}; + }); + InputShape biasInputShape { + shape.first.is_dynamic() ? ov::PartialShape{shape.first[1]} : shape.first, + std::move(biasShape) + }; + return biasInputShape; +} + +TEST_P(GroupNormalization, CompareWithRefImpl) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + run(); + validateNumSubgraphs(); +} + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/generate_inputs.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/generate_inputs.hpp index b857e027ffc698..d2930be59d5eac 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/generate_inputs.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/generate_inputs.hpp @@ -19,7 +19,8 @@ using InputsMap = std::map& node, size_t port, const ov::element::Type& elemType, - const ov::Shape& targetShape)>>; + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData)>>; InputsMap getInputMap(); diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp index 7e415dc282dccc..5e7c51a8146666 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp @@ -4,152 +4,253 @@ #pragma once +#include + #include #include #include "common_test_utils/ov_tensor_utils.hpp" +#include "common_test_utils/type_ranges.hpp" #include "openvino/core/node.hpp" - -#include "openvino/op/proposal.hpp" -#include "openvino/op/power.hpp" -#include "openvino/op/mod.hpp" -#include "openvino/op/floor_mod.hpp" -#include "openvino/op/divide.hpp" -#include "openvino/op/erf.hpp" -#include "openvino/op/non_max_suppression.hpp" -#include "openvino/op/reduce_l1.hpp" -#include "openvino/op/reduce_l2.hpp" -#include "openvino/op/reduce_sum.hpp" -#include "openvino/op/reduce_prod.hpp" -#include "openvino/op/reduce_mean.hpp" -#include "openvino/op/maximum.hpp" -#include "openvino/op/minimum.hpp" -#include "openvino/op/reduce_max.hpp" -#include "openvino/op/reduce_min.hpp" -#include "openvino/op/dft.hpp" -#include "openvino/op/idft.hpp" -#include "openvino/op/rdft.hpp" -#include "openvino/op/irdft.hpp" -#include "openvino/op/logical_and.hpp" -#include "openvino/op/logical_or.hpp" -#include "openvino/op/logical_xor.hpp" -#include "openvino/op/logical_not.hpp" -#include "openvino/op/abs.hpp" -#include "openvino/op/acos.hpp" -#include "openvino/op/acosh.hpp" -#include "openvino/op/asin.hpp" -#include "openvino/op/asinh.hpp" -#include "openvino/op/atan.hpp" -#include "openvino/op/atanh.hpp" -#include "openvino/op/ceiling.hpp" -#include "openvino/op/clamp.hpp" -#include "openvino/op/constant.hpp" -#include "openvino/op/cos.hpp" -#include "openvino/op/cosh.hpp" -#include "openvino/op/elu.hpp" -#include "openvino/op/erf.hpp" -#include "openvino/op/exp.hpp" -#include "openvino/op/floor.hpp" -#include "openvino/op/gelu.hpp" -#include "openvino/op/hard_sigmoid.hpp" -#include "openvino/op/hsigmoid.hpp" -#include "openvino/op/hswish.hpp" -#include "openvino/op/log.hpp" -#include "openvino/op/mish.hpp" -#include "openvino/op/negative.hpp" -#include "openvino/op/parameter.hpp" -#include "openvino/op/prelu.hpp" -#include "openvino/op/relu.hpp" -#include "openvino/op/round.hpp" -#include "openvino/op/selu.hpp" -#include "openvino/op/sigmoid.hpp" -#include "openvino/op/sign.hpp" -#include "openvino/op/sin.hpp" -#include "openvino/op/sinh.hpp" -#include "openvino/op/softplus.hpp" -#include "openvino/op/softsign.hpp" -#include "openvino/op/sqrt.hpp" -#include "openvino/op/swish.hpp" -#include "openvino/op/tan.hpp" -#include "openvino/op/tanh.hpp" -#include "openvino/op/max_pool.hpp" +#include "openvino/op/ops.hpp" +#include "ov_ops/augru_cell.hpp" +#include "ov_ops/augru_sequence.hpp" namespace ov { namespace test { namespace utils { -static std::map>> inputRanges = { - // NodeTypeInfo: {IntRanges{}, RealRanges{}} (Ranges are used by generate) - { ov::op::v0::Erf::get_type_info_static(), {{{-3, 6}}, {{-3, 6, 10}}} }, - { ov::op::v1::Divide::get_type_info_static(), {{{101, 100}}, {{2, 2, 128}}} }, - { ov::op::v1::FloorMod::get_type_info_static(), {{{2, 4}}, {{2, 2, 128}}} }, - { ov::op::v1::Mod::get_type_info_static(), {{{2, 4}}, {{2, 2, 128}}} }, - { ov::op::v1::ReduceMax::get_type_info_static(), {{{0, 5}}, {{-5, 5, 1000}}} }, - { ov::op::v1::ReduceMean::get_type_info_static(), {{{0, 5, 1000}}, {{0, 5, 1000}}} }, - { ov::op::v1::ReduceMin::get_type_info_static(), {{{0, 5}}, {{0, 5, 1000}}} }, - { ov::op::v1::ReduceProd::get_type_info_static(), {{{0, 5}}, {{0, 5, 1000}}} }, - { ov::op::v1::ReduceSum::get_type_info_static(), {{{0, 5}}, {{0, 5, 1000}}} }, - { ov::op::v1::ReduceSum::get_type_info_static(), {{{0, 5}}, {{0, 5, 1000}}} }, - { ov::op::v1::ReduceSum::get_type_info_static(), {{{0, 5}}, {{0, 5, 1000}}} }, - { ov::op::v1::Power::get_type_info_static(), {{{2, 4}}, {{2, 2, 128}}} }, - { ov::op::v4::Proposal::get_type_info_static(), {{{0, 1, 1000, 8234231}}, {{0, 1, 1000, 8234231}}} }, - { ov::op::v4::ReduceL1::get_type_info_static(), {{{0, 5}}, {{0, 5, 1000}}} }, - { ov::op::v4::ReduceL2::get_type_info_static(), {{{0, 5}}, {{0, 5, 1000}}} }, - { ov::op::v7::DFT::get_type_info_static(), {{{0, 1}}, {{0, 1, 1000000}}} }, - { ov::op::v9::RDFT::get_type_info_static(), {{{0, 1}}, {{0, 1, 1000000}}} }, - { ov::op::v1::LogicalAnd::get_type_info_static(), {{{0, 2}}, {{0, 2, 1}}} }, - { ov::op::v1::LogicalOr::get_type_info_static(), {{{0, 2}}, {{0, 2, 1}}} }, - { ov::op::v1::LogicalNot::get_type_info_static(), {{{0, 2}}, {{0, 2, 1}}} }, - { ov::op::v1::LogicalXor::get_type_info_static(), {{{0, 2}}, {{0, 2, 1}}} }, - { ov::op::v7::IDFT::get_type_info_static(), {{{0, 1}}, {{0, 1, 1000000}}} }, - { ov::op::v9::IRDFT::get_type_info_static(), {{{0, 1}}, {{0, 1, 1000000}}} }, - { ov::op::v0::Sigmoid::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Tanh::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Relu::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::PRelu::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Exp::get_type_info_static(), {{{0, 15}}, {{-10, 20, 32768}}} }, - { ov::op::v0::Log::get_type_info_static(), {{{0, 15}}, {{1, 20, 32768}}} }, - { ov::op::v0::Sign::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Abs::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Clamp::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Negative::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Acos::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v3::Acosh::get_type_info_static(), {{{1, 15}}, {{1, 200, 32768}}} }, - { ov::op::v0::Asin::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v3::Asinh::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Atan::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v3::Atanh::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Cos::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Cosh::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Floor::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Sin::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Sinh::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Sqrt::get_type_info_static(), {{{0, 15}}, {{1, 20, 32768}}} }, - { ov::op::v0::Tan::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Elu::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Erf::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::HardSigmoid::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Selu::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Sigmoid::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Tanh::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Relu::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Exp::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Log::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Sign::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Abs::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Gelu::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v0::Ceiling::get_type_info_static(), {{{0, 15}}, {{-1000, 2000, 32768}}} }, - { ov::op::v4::Mish::get_type_info_static(), {{{0, 15}}, {{-10, 60, 32768}}} }, - { ov::op::v4::HSwish::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v4::SoftPlus::get_type_info_static(), {{{0, 15}}, {{-100, 200, 32768}}} }, - { ov::op::v4::Swish::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v5::HSigmoid::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v5::Round::get_type_info_static(), {{{0, 15}}, {{-10, 20, 4}}} }, - { ov::op::v7::Gelu::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, - { ov::op::v8::MaxPool::get_type_info_static(), {{{0, 10, 1, 1}}, {{0, 10, 1, 1}}} }, - { ov::op::v9::SoftSign::get_type_info_static(), {{{0, 15}}, {{-100, 200, 32768}}} }, +// NOTE: Default ranges are collected by data type and have resolution 1(for real types too) +// to set up correct ranges and resolutions, please, configure range for Op in inputRanges structure +struct Range { + std::vector int_port_ranges; + std::vector real_port_ranges; + + Range(const std::vector& int_ranges = {}, + const std::vector& real_ranges = {}) + : int_port_ranges(int_ranges), + real_port_ranges(real_ranges) { + size_t max_known_port = std::max(real_port_ranges.size(), int_port_ranges.size()); + max_known_port = std::max(static_cast(max_known_port), 1); + for (size_t port = 0; port < max_known_port; port++) { + std::map type_map; + for (auto& type : ov::element::Type::get_known_types()) { + ov::test::utils::InputGenerateData new_range = rangeByType.get_range(*type); + if (type->is_real() && port < real_port_ranges.size()) { + new_range.correct_range(real_port_ranges.at(port)); + new_range.input_attribute = real_port_ranges.at(port).input_attribute; + } else if (type->is_integral() && port < int_port_ranges.size()) { + new_range.correct_range(int_port_ranges.at(port)); + new_range.input_attribute = int_port_ranges.at(port).input_attribute; + } + type_map[*type] = new_range; + } + data.push_back(type_map); + } + } + + std::vector> data; + + ov::test::utils::InputGenerateData get_data(size_t port, ov::element::Type type) { + if (port < data.size()) { + return data.at(port).at(type); + } else { + return data.at(0).at(type); + } + } +}; + +static std::map inputRanges = { + {ov::op::v0::Erf::get_type_info_static(), Range({{-3, 6}}, {{-3, 6, 10}})}, + {ov::op::v1::Divide::get_type_info_static(), Range({{101, 100}}, {{2, 2, 128}})}, + {ov::op::v1::FloorMod::get_type_info_static(), Range({{2, 4}}, {{2, 2, 128}})}, + {ov::op::v1::Mod::get_type_info_static(), Range({{2, 4}}, {{2, 2, 128}})}, + {ov::op::v1::ReduceMax::get_type_info_static(), Range({{0, 5}}, {{-5, 5, 1000}})}, + {ov::op::v1::ReduceMean::get_type_info_static(), Range({{0, 5, 1000}}, {{0, 5, 1000}})}, + {ov::op::v1::ReduceMin::get_type_info_static(), Range({{0, 5}}, {{0, 5, 1000}})}, + {ov::op::v1::ReduceProd::get_type_info_static(), Range({{0, 5}}, {{0, 5, 1000}})}, + {ov::op::v1::ReduceSum::get_type_info_static(), Range({{0, 5}}, {{0, 5, 1000}})}, + {ov::op::v1::ReduceSum::get_type_info_static(), Range({{0, 5}}, {{0, 5, 1000}})}, + {ov::op::v1::ReduceSum::get_type_info_static(), Range({{0, 5}}, {{0, 5, 1000}})}, + {ov::op::v1::Power::get_type_info_static(), Range({{2, 4}}, {{2, 2, 128}})}, + {ov::op::v4::Proposal::get_type_info_static(), Range({{0, 255, 1, 8234231}}, {{0, 1, 1000, 8234231}})}, + {ov::op::v4::ReduceL1::get_type_info_static(), Range({{0, 5}}, {{0, 5, 1000}})}, + {ov::op::v4::ReduceL2::get_type_info_static(), Range({{0, 5}}, {{0, 5, 1000}})}, + {ov::op::v7::DFT::get_type_info_static(), Range({{0, 1}}, {{0, 1, 1000000}})}, + {ov::op::v9::RDFT::get_type_info_static(), Range({{0, 1}}, {{0, 1, 1000000}})}, + {ov::op::v1::LogicalAnd::get_type_info_static(), Range({{0, 2}}, {{0, 2, 1}})}, + {ov::op::v1::LogicalOr::get_type_info_static(), Range({{0, 2}}, {{0, 2, 1}})}, + {ov::op::v1::LogicalNot::get_type_info_static(), Range({{0, 2}}, {{0, 2, 1}})}, + {ov::op::v1::LogicalXor::get_type_info_static(), Range({{0, 2}}, {{0, 2, 1}})}, + {ov::op::v7::IDFT::get_type_info_static(), Range({{0, 1}}, {{0, 1, 1000000}})}, + {ov::op::v9::IRDFT::get_type_info_static(), Range({{0, 1}}, {{0, 1, 1000000}})}, + {ov::op::v0::Sigmoid::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Tanh::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Relu::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::PRelu::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Exp::get_type_info_static(), Range({{0, 15}}, {{-10, 20, 32768}})}, + {ov::op::v0::Log::get_type_info_static(), Range({{0, 15}}, {{1, 20, 32768}})}, + {ov::op::v0::Sign::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Abs::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Clamp::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Negative::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Acos::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v3::Acosh::get_type_info_static(), Range({{1, 15}}, {{1, 200, 32768}})}, + {ov::op::v0::Asin::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v3::Asinh::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Atan::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v3::Atanh::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Cos::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Cosh::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Floor::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Sin::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Sinh::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Sqrt::get_type_info_static(), Range({{0, 15}}, {{1, 20, 32768}})}, + {ov::op::v0::Tan::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Elu::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Erf::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::HardSigmoid::get_type_info_static(), + Range({{0, 15}}, {{-1, 2, 32768}, {0.2, 0, 1, 1, true}, {0.5, 0, 1, 1, true}})}, + {ov::op::v0::Selu::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Sigmoid::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Tanh::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Relu::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Exp::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Log::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Sign::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Abs::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Gelu::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v0::Ceiling::get_type_info_static(), Range({{0, 15}}, {{-1000, 2000, 32768}})}, + {ov::op::v4::Mish::get_type_info_static(), Range({{0, 15}}, {{-10, 60, 32768}})}, + {ov::op::v4::HSwish::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v4::SoftPlus::get_type_info_static(), Range({{0, 15}}, {{-100, 200, 32768}})}, + {ov::op::v4::Swish::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v5::HSigmoid::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v5::Round::get_type_info_static(), Range({{0, 15}}, {{-10, 20, 4}})}, + {ov::op::v7::Gelu::get_type_info_static(), Range({{0, 15}}, {{-1, 2, 32768}})}, + {ov::op::v8::MaxPool::get_type_info_static(), Range({{0, 10, 1, 1}}, {{0, 10, 1, 1}})}, + {ov::op::v1::MaxPool::get_type_info_static(), Range({{0, 10, 1, 1}}, {{0, 10, 1, 1}})}, + {ov::op::v1::AvgPool::get_type_info_static(), Range({{0, 10, 1, 1}}, {{0, 10, 1, 1}})}, + {ov::op::v9::SoftSign::get_type_info_static(), Range({{0, 15}}, {{-100, 200, 32768}})}, + // new temp + {ov::op::v1::Convolution::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::ConvolutionBackpropData::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::GroupConvolution::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::GroupConvolutionBackpropData::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v12::ScatterElementsUpdate::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v3::ScatterUpdate::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::Unsqueeze::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::RegionYolo::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::MatMul::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v11::Interpolate::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v4::Interpolate::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::LRN::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::Pad::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v3::Broadcast::get_type_info_static(), Range({{0, 200}, {0, 10, 1, 1, true}, {0, 10, 1, 1, true}}, {{0, 2000, 32768}})}, + {ov::op::v5::NonMaxSuppression::get_type_info_static(), + Range({{0, 15}, {0, 1, 1000, 1, true}}, {{0, 8, 32}, {0, 1, 1000, 1, true}})}, + {ov::op::v9::NonMaxSuppression::get_type_info_static(), + Range({{0, 15}, {0, 1, 1000, 1, true}}, {{0, 8, 32}, {0, 1, 1000, 1, true}})}, + {ov::op::v8::MatrixNms::get_type_info_static(), + Range({{0, 15}, {0, 1, 1000, 1, true}}, {{0, 8, 32}, {0, 1, 1000, 1, true}})}, + {ov::op::v6::ExperimentalDetectronGenerateProposalsSingleImage::get_type_info_static(), + Range({{1, 0, 1, 1}}, {{1, 0, 1, 1}})}, + {ov::op::v6::ExperimentalDetectronPriorGridGenerator::get_type_info_static(), + Range({{0, 0, 1}}, {{-100, 200, 2, 1}, {0, 0, 1, 1, true}, {0, 0, 1, 1, true}})}, + {ov::op::v8::DeformableConvolution::get_type_info_static(), + Range({{0, 15}, {0, 2, 10, 1, true}, {0, 1, 20, 1, true}}, + {{0, 8, 32}, {0, 2, 10, 1, true}, {0, 1, 20, 1, true}})}, + {ov::op::v5::GRUSequence::get_type_info_static(), Range({{0, 15}, {0, 15}, {0, 10, 1, 1, true}}, {{0, 8, 32}})}, + {ov::op::v5::BatchNormInference::get_type_info_static(), Range({{0, 3}}, {{0, 3, 1}})}, + {ov::op::v5::RNNSequence::get_type_info_static(), + Range({{0, 15}, {0, 15}, {0, 10, 1, 1, true}}, {{0, 8, 32}, {0, 8, 32}, {0, 10, 1, 1, true}})}, + {ov::op::v1::LogicalAnd::get_type_info_static(), Range({{0, 2}}, {{0, 2}})}, + {ov::op::v1::LogicalNot::get_type_info_static(), Range({{0, 2}}, {{0, 2}})}, + {ov::op::v1::LogicalOr::get_type_info_static(), Range({{0, 2}}, {{0, 2}})}, + {ov::op::v1::LogicalXor::get_type_info_static(), Range({{0, 2}}, {{0, 2}})}, + {ov::op::v1::ReduceLogicalAnd::get_type_info_static(), Range({{0, 2}}, {{0, 2}})}, + {ov::op::v1::ReduceLogicalOr::get_type_info_static(), Range({{0, 2}}, {{0, 2}})}, + {ov::op::v1::Reshape::get_type_info_static(), Range({{-1000, 2000}, {0, 256, 1, 1, true}}, {{-100, 200, 32768}})}, + {ov::op::v3::TopK::get_type_info_static(), Range({{-1000, 2000}, {0, 1000, 1, 1, true}}, {{-1000, 2000, 32768}})}, + {ov::op::v11::TopK::get_type_info_static(), Range({{-1000, 2000}, {0, 1000, 1, 1, true}}, {{-1000, 2000, 32768}})}, + {ov::op::v4::Range::get_type_info_static(), + Range({{0, 15}, {1, 1000, 1, 1, true}}, {{-1000, 2000, 32768}, {1, 1000, 1, 1, true}})}, + {ov::op::v3::ROIAlign::get_type_info_static(), + Range({{0, 15}, {0, 1000, 1, 1, true}, {0, 1000, 1, 1, true}}, + {{-1000, 2000, 32768}, {0, 1000, 1, 1, true}, {0, 1000, 1, 1, true}})}, + {ov::op::v9::ROIAlign::get_type_info_static(), + Range({{0, 15}, {0, 1000, 1, 1, true}, {0, 1000, 1, 1, true}}, + {{-1000, 2000, 32768}, {0, 1000, 1, 1, true}, {0, 1000, 1, 1, true}})}, + {ov::op::v0::Convert::get_type_info_static(), Range({{0, 1000}}, {{-100, 200, 32768}})}, + {ov::op::v0::FakeQuantize::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::FakeQuantize::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::Select::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::Multiply::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::StridedSlice::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v5::LSTMSequence::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::VariadicSplit::get_type_info_static(), Range({{0, 10}}, {{0, 8, 32}})}, + {ov::op::v1::Subtract::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::SpaceToBatch::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v8::GatherND::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v8::Gather::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::DepthToSpace::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v7::Einsum::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v8::RandomUniform::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v9::Eye::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::CumSum::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::MVN::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v6::MVN::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v3::GRUCell::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v5::GRUSequence::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v8::If::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::TensorIterator::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v12::GroupNormalization::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::ReverseSequence::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::GatherTree::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::DeformablePSROIPooling::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::Softmax::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v8::Softmax::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::PSROIPooling::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::internal::AUGRUSequence::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::internal::AUGRUCell::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v7::Roll::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v4::LSTMCell::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v13::ScaledDotProductAttention::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::Transpose::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v5::Loop::get_type_info_static(), Range({{1, 10, 1, 1, true}, {0, 2, 1, 1, true}, {0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::SquaredDifference::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v4::CTCLoss::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v9::GridSample::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v13::Multinomial::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v3::EmbeddingBagOffsetsSum::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v15::EmbeddingBagOffsets::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v9::GenerateProposals::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::ROIPooling::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::ShuffleChannels::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v8::Slice::get_type_info_static(), + Range({{0, 15}, {0, 15, 1, 1, true}, {0, 15, 1, 1, true}, {1, 5, 1, 1, true}, {0, 15, 1, 1, true}}, {{0, 8, 32}})}, + {ov::op::v3::EmbeddingBagPackedSum::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v3::EmbeddingSegmentsSum::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v15::EmbeddingBagPacked::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v0::GRN::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::Add::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v15::ROIAlignRotated::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, + {ov::op::v1::BatchToSpace::get_type_info_static(), Range({{0, 15}}, {{0, 8, 32}})}, +}; + +class ModelRange { + // key for map calculated in get_range_id and contais [Parameter Name]_[parameter type] + std::map> node_ranges; + +public: + void find_mode_ranges(const std::shared_ptr& function); + std::string get_range_id(const std::shared_ptr& node); + ov::Tensor generate_input(std::shared_ptr node, size_t port, const ov::Shape& targetShape); + + const std::shared_ptr get_range_for_param( + const std::shared_ptr& node); }; -} // namespace utils -} // namespace test -} // namespace ov +} // namespace utils +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/tensor_iterator.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/tensor_iterator.hpp index 41749ca218e4a3..f02da2a92511b2 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/tensor_iterator.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/tensor_iterator.hpp @@ -34,6 +34,7 @@ class TensorIteratorTest : public testing::WithParamInterface& targetInputStaticShapes) override; }; } // namespace test } // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp index 638cf8a396c151..2df908bbba2273 100644 --- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp +++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp @@ -29,10 +29,11 @@ #include "functional_test_utils/crash_handler.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" -#include "shared_test_classes/base/utils/generate_inputs.hpp" #include "shared_test_classes/base/utils/compare_results.hpp" #include "shared_test_classes/base/utils/calculate_thresholds.hpp" +#include "shared_test_classes/base/utils/ranges.hpp" + namespace ov { namespace test { @@ -311,18 +312,18 @@ void SubgraphBaseTest::compile_model() { void SubgraphBaseTest::generate_inputs(const std::vector& targetInputStaticShapes) { inputs.clear(); - auto inputMap = utils::getInputMap(); + ov::test::utils::ModelRange modelRange; + modelRange.find_mode_ranges(function); + auto itTargetShape = targetInputStaticShapes.begin(); for (const auto ¶m : function->get_parameters()) { std::shared_ptr inputNode = param; for (size_t i = 0; i < param->get_output_size(); i++) { for (const auto &node : param->get_output_target_inputs(i)) { std::shared_ptr nodePtr = node.get_node()->shared_from_this(); - auto it = inputMap.find(nodePtr->get_type_info()); - ASSERT_NE(it, inputMap.end()); for (size_t port = 0; port < nodePtr->get_input_size(); ++port) { if (nodePtr->get_input_node_ptr(port)->shared_from_this() == inputNode->shared_from_this()) { - inputs.insert({param, it->second(nodePtr, port, param->get_element_type(), *itTargetShape)}); + inputs.insert({param, modelRange.generate_input(nodePtr, port, *itTargetShape)}); break; } } diff --git a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp index 39dfc9dcac5f75..171266db31a4b9 100644 --- a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp +++ b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp @@ -54,24 +54,24 @@ static inline void set_real_number_generation_data(InputGenerateData& inGenData) } ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { InputGenerateData inGenData; + if (inGenRangeData) { + inGenData = *inGenRangeData.get(); + } else { + if (elemType.is_real()) { + set_real_number_generation_data(inGenData); + } - if (elemType.is_real()) { - set_real_number_generation_data(inGenData); - } - - const size_t inNodeCnt = node->get_input_size(); - auto it = inputRanges.find(node->get_type_info()); - if (it != inputRanges.end()) { - const auto& ranges = it->second; - if (ranges.size() != 2) { - throw std::runtime_error("Incorrect size of ranges. It should be 2 (real and int cases)"); + const size_t inNodeCnt = node->get_input_size(); + auto it = inputRanges.find(node->get_type_info()); + if (it != inputRanges.end()) { + auto ranges = it->second; + inGenData = ranges.get_data(port, elemType); } - const auto& range = ranges.at(elemType.is_real()); - inGenData = range.size() < inNodeCnt ? range.front() : range.at(port); } return ov::test::utils::create_and_fill_tensor(elemType, targetShape, inGenData); } @@ -88,35 +88,11 @@ ov::Tensor generate(const ov::element::Type& elemType, } } // namespace Activation -ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - switch (port) { - case 1: { - ov::test::utils::InputGenerateData in_data; - in_data.start_from = 0.2; - in_data.range = 0; - return ov::test::utils::create_and_fill_tensor(elemType, targetShape, in_data); - } - case 2: { - ov::test::utils::InputGenerateData in_data; - in_data.start_from = 0.5; - in_data.range = 0; - return ov::test::utils::create_and_fill_tensor(elemType, targetShape, in_data); - } - default: { - return Activation::generate(elemType, targetShape); - } - } - - return Activation::generate(elemType, targetShape); -} - ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { switch (port) { case 1: { auto name = node->input(1).get_node()->get_friendly_name(); @@ -141,9 +117,10 @@ ov::Tensor generate(const std::shared_ptr& node, } ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { switch (port) { case 1: { std::vector alpha(node->get_input_shape(1).size(), 1.6732f); @@ -160,9 +137,10 @@ ov::Tensor generate(const std::shared_ptr& node, } ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { InputGenerateData inGenData; inGenData.start_from = 0; inGenData.range = 1; @@ -222,9 +200,10 @@ bool get_fq_scalar_range(const std::shared_ptr &node, } // namespace ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { int seed = 1; size_t constDataSize = ov::shape_size(targetShape); std::vector inputLowData, inputHighData, outputLowData, outputHighData; @@ -288,9 +267,10 @@ ov::Tensor generate(const std::shared_ptr& node, } ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { const auto &inputShape = node->get_input_shape(0); if (port == 1) { auto tensor = ov::Tensor(elemType, targetShape); @@ -305,13 +285,14 @@ ov::Tensor generate(const std::shared_ptr& node, node->get_mode()); return tensor; } - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { if (port == 1) { const auto &inputShape = node->get_input_shape(0); auto tensor = ov::Tensor(elemType, targetShape); @@ -345,14 +326,15 @@ ov::Tensor generate(const std::shared_ptr& node, #undef CASE return tensor; } - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { auto &shape = node->get_input_shape(0); auto maxBeamIndx = shape.at(2) - 1; @@ -371,62 +353,11 @@ ov::Tensor generate(const std::shared_ptr& node, } } -namespace LogicalOp { -ov::Tensor generate(const ov::element::Type& elemType, - const ov::Shape& targetShape) { - ov::test::utils::InputGenerateData in_data; - in_data.start_from = 0; - in_data.range = 2; - return create_and_fill_tensor(elemType, targetShape, in_data); -} -} - -ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return LogicalOp::generate(elemType, targetShape); -} - -ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return LogicalOp::generate(elemType, targetShape); -} - -ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return LogicalOp::generate(elemType, targetShape); -} - -ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return LogicalOp::generate(elemType, targetShape); -} - -ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return LogicalOp::generate(elemType, targetShape); -} - -ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return LogicalOp::generate(elemType, targetShape); -} - ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { switch (port) { case 0: { auto data_size = shape_size(targetShape); @@ -441,14 +372,15 @@ ov::Tensor generate(const std::shared_ptr& node, return create_and_fill_tensor_unique_sequence(elemType, targetShape, 0, 10, 8234231); } default: - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } } ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { switch (port) { case 1: { if (node->get_sampling_ratio() != 0) { @@ -472,14 +404,47 @@ ov::Tensor generate(const std::shared_ptr& node, return ov::test::utils::create_tensor(elemType, targetShape, roiIdxVector); } default: - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); + } +} + +ov::Tensor generate(const std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { + switch (port) { + case 1: { + if (node->get_sampling_ratio() != 0) { + const auto &inputShape = node->get_input_shape(0); + std::vector blobData(node->get_shape()[0] * 4); + ov::test::ROIAlignLayerTest::fillCoordTensor(blobData, + inputShape[2], + inputShape[3], + node->get_spatial_scale(), + node->get_sampling_ratio(), + node->get_pooled_h(), + node->get_pooled_w()); + return ov::test::utils::create_tensor(ov::element::f32, targetShape, blobData); + } else { + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); + } + } + case 2: { + std::vector roiIdxVector(node->get_shape()[0]); + ov::test::ROIAlignLayerTest::fillIdxTensor(roiIdxVector, node->get_shape()[0]); + return ov::test::utils::create_tensor(elemType, targetShape, roiIdxVector); + } + default: + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } } ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { if (port == 1) { return ov::test::utils::create_and_fill_tensor_normal_distribution(elemType, targetShape, 0.0f, 0.2f, 7235346); } else if (port == 2) { @@ -493,33 +458,14 @@ ov::Tensor generate(const std::shared_ptr& node, return tensor; } - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); -} - -ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return ov::test::utils::create_and_fill_tensor_consistently(elemType, targetShape, 3, 0, 1); -} - -ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - if (port == 2) { - ov::test::utils::InputGenerateData in_data; - in_data.start_from = 0; - in_data.range = 10; // max_seq_len - return ov::test::utils::create_and_fill_tensor(elemType, targetShape, in_data); - } - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { if (port == 2) { ov::test::utils::InputGenerateData in_data; in_data.start_from = 0; @@ -532,13 +478,14 @@ ov::Tensor generate(const std::shared_ptr& node, in_data.range = node->input(0).get_shape()[1]; // seq_len return ov::test::utils::create_and_fill_tensor(elemType, targetShape, in_data); } - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { if (port == 2) { ov::Tensor tensor = ov::Tensor(elemType, targetShape); @@ -570,13 +517,14 @@ ov::Tensor generate(const std::shared_ptr& nod OPENVINO_THROW("Unsupported element type for segment_ids: ", elemType); } } - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { if (port == 6) { ov::Tensor tensor = ov::Tensor(elemType, targetShape); @@ -594,13 +542,14 @@ ov::Tensor generate(const std::shared_ptr& node } return tensor; } - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { if (port == 5) { ov::Tensor tensor = ov::Tensor(elemType, targetShape); @@ -618,7 +567,7 @@ ov::Tensor generate(const std::shared_ptr& node, } return tensor; } - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } template @@ -640,9 +589,10 @@ ov::Tensor generate_unique_possibilities(const ov::Shape &targetShape) { } ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { if (port == 1) { switch (elemType) { case element::Type_t::f16: @@ -653,26 +603,14 @@ ov::Tensor generate(const std::shared_ptr(node), port, elemType, targetShape); -} - -ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - if (port == 2) { - ov::test::utils::InputGenerateData in_data; - in_data.start_from = 0; - in_data.range = 10; // max_seq_len - return ov::test::utils::create_and_fill_tensor(elemType, targetShape, in_data); - } - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { auto axis = node->get_axis(); axis = axis < 0 ? targetShape.size() + axis : axis; unsigned datasetSize = std::accumulate(targetShape.begin() + axis, targetShape.end(), 1, @@ -681,14 +619,14 @@ ov::Tensor generate(const std::shared_ptr& node, // to avoid NaN values in Softmax results for fp16 precision if (datasetSize >= 2048 && static_cast(elemType) == ov::element::Type_t::f16) return ov::test::utils::create_and_fill_tensor_normal_distribution(elemType, targetShape, -5.f, 0.5f, 7235346); - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { +ov::Tensor generate(const std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { if (port == 1) { ov::Tensor tensor(elemType, targetShape); auto data_input_shape = node->input(0).get_shape(); @@ -703,14 +641,14 @@ ov::Tensor generate(const ov::test::utils::fill_tensor_random(tensor, 1.8, -0.9); return tensor; } - return generate(std::static_pointer_cast(node), port, elemType, targetShape); + return generate(std::static_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { +ov::Tensor generate(const std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { // when fill indices if (port == 1) { auto srcShape = node->get_input_shape(0); @@ -787,15 +725,30 @@ ov::Tensor generate(const return tensor; } else { return generate(std::dynamic_pointer_cast(node), port, elemType, - targetShape); + targetShape, inGenRangeData); + } +} + +ov::Tensor generate(const std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { + if (port == 1) { + auto rank = node->get_output_partial_shape(0).rank(); + double max_len = rank.get_max_length(); + InputGenerateData inGenData = InputGenerateData(1, max_len - 1); + return ov::test::utils::create_and_fill_tensor(elemType, targetShape, inGenData); } + + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { +ov::Tensor generate(const std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { auto tensor = ov::Tensor{elemType, targetShape}; size_t size = tensor.get_size(); int start = - static_cast(size / 2); @@ -820,28 +773,6 @@ ov::Tensor generate(const return tensor; } -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - InputGenerateData in_gen_data; - if (elemType.is_real()) { - set_real_number_generation_data(in_gen_data); - } - - if (1 == port) { - in_gen_data.range = 2; - in_gen_data.start_from = 0; - in_gen_data.resolution = 10; - } else if (2 == port) { - in_gen_data.range = 1; - in_gen_data.start_from = 0; - in_gen_data.resolution = 20; - } - return ov::test::utils::create_and_fill_tensor(elemType, targetShape, in_gen_data); -} - namespace comparison { void fill_tensor(ov::Tensor& tensor) { auto data_ptr = static_cast(tensor.data()); @@ -867,21 +798,21 @@ void fill_tensor(ov::Tensor& tensor) { } } // namespace comparison -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { +ov::Tensor generate(const std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { ov::Tensor tensor(elemType, targetShape); comparison::fill_tensor(tensor); return tensor; } -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { +ov::Tensor generate(const std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { ov::Tensor tensor{elemType, targetShape}; comparison::fill_tensor(tensor); return tensor; @@ -911,11 +842,11 @@ void fill_tensor(ov::Tensor& tensor) { } } // namespace is_inf -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { +ov::Tensor generate(const std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { auto tensor = ov::Tensor(elemType, targetShape); if (elemType == ov::element::f16) { is_inf::fill_tensor(tensor); @@ -942,74 +873,75 @@ void fill_tensor(ov::Tensor& tensor, ov::preprocess::ColorFormat format) { } // namespace color_conversion ov::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { auto b_dim = static_cast(targetShape[1] * 2 / (3 * targetShape[2])); if (node->inputs().size() > 1 || b_dim < 2) - return generate(std::static_pointer_cast(node), port, elemType, targetShape); + return generate(std::static_pointer_cast(node), port, elemType, targetShape, inGenRangeData); ov::Tensor tensor(elemType, targetShape); color_conversion::fill_tensor(tensor, ov::preprocess::ColorFormat::I420_SINGLE_PLANE); return tensor; } -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { +ov::Tensor generate(const std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { auto b_dim = static_cast(targetShape[1] * 2 / (3 * targetShape[2])); if (node->inputs().size() > 1 || b_dim < 2) - return generate(std::static_pointer_cast(node), port, elemType, targetShape); + return generate(std::static_pointer_cast(node), port, elemType, targetShape, inGenRangeData); ov::Tensor tensor(elemType, targetShape); color_conversion::fill_tensor(tensor, ov::preprocess::ColorFormat::I420_SINGLE_PLANE); return tensor; } -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { +ov::Tensor generate(const std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { auto b_dim = static_cast(targetShape[1] * 2 / (3 * targetShape[2])); if (node->inputs().size() > 1 || b_dim < 2) - return generate(std::static_pointer_cast(node), port, elemType, targetShape); + return generate(std::static_pointer_cast(node), port, elemType, targetShape, inGenRangeData); ov::Tensor tensor(elemType, targetShape); color_conversion::fill_tensor(tensor, ov::preprocess::ColorFormat::NV12_SINGLE_PLANE); return tensor; } -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { +ov::Tensor generate(const std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { auto b_dim = static_cast(targetShape[1] * 2 / (3 * targetShape[2])); if (node->inputs().size() > 1 || b_dim < 2) - return generate(std::static_pointer_cast(node), port, elemType, targetShape); + return generate(std::static_pointer_cast(node), port, elemType, targetShape, inGenRangeData); ov::Tensor tensor(elemType, targetShape); color_conversion::fill_tensor(tensor, ov::preprocess::ColorFormat::NV12_SINGLE_PLANE); return tensor; } -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { +ov::Tensor generate(const std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { if (port == 0) { InputGenerateData inGenData(-5, 10, 7, 222); return ov::test::utils::create_and_fill_tensor(elemType, targetShape, inGenData); } - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { +ov::Tensor generate(const std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { InputGenerateData inGenData(1, 0, 1, 1); auto tensor = ov::test::utils::create_and_fill_tensor(elemType, targetShape, inGenData); @@ -1039,71 +971,31 @@ ov::Tensor generate(const return tensor; } -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - InputGenerateData inGenData(1, 0, 1, 1); - return ov::test::utils::create_and_fill_tensor(elemType, targetShape, inGenData); -} - -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - InputGenerateData inGenData(0, 0, 1, 1); - if (0 == port) { - inGenData.start_from = -100; - inGenData.range = 200; - inGenData.resolution = 2; - } - return ov::test::utils::create_and_fill_tensor(elemType, targetShape, inGenData); -} - -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { +ov::Tensor generate(const std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData = nullptr) { if (port == 1) { - InputGenerateData inGenData(0, 1, 1000, 1); + auto seq_axis = node->get_sequence_axis(); + // range - [start_data, start_data + range) + InputGenerateData inGenData(1, seq_axis + 1); return ov::test::utils::create_and_fill_tensor(elemType, targetShape, inGenData); } - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); -} - -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - if (port == 1) { - InputGenerateData inGenData(0, 1, 1000, 1); - return ov::test::utils::create_and_fill_tensor(elemType, targetShape, inGenData); - } - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); -} - -ov::Tensor generate(const - std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - if (port == 1) { - InputGenerateData inGenData(0, 1, 1000, 1); - return ov::test::utils::create_and_fill_tensor(elemType, targetShape, inGenData); - } - return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); + return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape, inGenRangeData); } template ov::Tensor generateInput(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return generate(ov::as_type_ptr(node), port, elemType, targetShape); + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape, + std::shared_ptr inGenRangeData) { + if (inGenRangeData) { + return generate(ov::as_type_ptr(node), port, elemType, targetShape, inGenRangeData); + } else { + return generate(ov::as_type_ptr(node), port, elemType, targetShape); + } } } // namespace diff --git a/src/tests/functional/shared_test_classes/src/base/utils/ranges.cpp b/src/tests/functional/shared_test_classes/src/base/utils/ranges.cpp new file mode 100644 index 00000000000000..48c6f70db877cd --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/base/utils/ranges.cpp @@ -0,0 +1,102 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/base/utils/ranges.hpp" + +#include +#include +#include + +#include "common_test_utils/ov_tensor_utils.hpp" +#include "gtest/gtest.h" +#include "openvino/core/model.hpp" +#include "openvino/core/node.hpp" +#include "openvino/core/type/element_type.hpp" +#include "openvino/core/type/element_type_traits.hpp" +#include "openvino/op/util/op_types.hpp" +#include "shared_test_classes/base/utils/generate_inputs.hpp" + +namespace ov { +namespace test { +namespace utils { + +const std::shared_ptr ModelRange::get_range_for_param( + const std::shared_ptr& node) { + return node_ranges.at(get_range_id(node)); +} + +std::string ModelRange::get_range_id(const std::shared_ptr& node) { + return node->get_name() + '_' + node->get_element_type().to_string(); +} + +ov::Tensor ModelRange::generate_input(std::shared_ptr node, size_t port, const ov::Shape& targetShape) { + auto inputMap = ov::test::utils::getInputMap(); + auto it = inputMap.find(node->get_type_info()); + if (it == inputMap.end()) { + throw std::runtime_error("Couln't find Operation in inputMap: " + std::string(node->get_type_name())); + } + + std::string range_id = get_range_id(node->get_input_node_shared_ptr(port)); + return it->second(node, port, node->get_input_element_type(port), targetShape, node_ranges[range_id]); +} + +void ModelRange::find_mode_ranges(const std::shared_ptr& model) { + for (auto param : model->get_parameters()) { + std::shared_ptr data = + std::make_shared(ov::test::utils::rangeByType.get_range(param->get_element_type())); + + bool range_corrected = true; + std::queue> queue; + queue.push(param); + try { + while (!queue.empty()) { + auto node = queue.front(); + queue.pop(); + + for (auto& output : node->outputs()) { + for (auto& out_target_input : output.get_target_inputs()) { + queue.push(out_target_input.get_node()->shared_from_this()); + auto it = ov::test::utils::inputRanges.find(out_target_input.get_node()->get_type_info()); + ov::test::utils::InputGenerateData range; + if (it != ov::test::utils::inputRanges.end()) { + auto ranges = it->second; + range = ranges.get_data(out_target_input.get_index(), out_target_input.get_element_type()); + } else { + range = ov::test::utils::rangeByType.get_range(out_target_input.get_element_type()); + } + range_corrected = data->correct_range(range); + if (!range_corrected) { + throw std::runtime_error("WARNING: range correction is failed for " + + node->get_friendly_name() + + ", it looks like we can not find intersection for ranges any " + "more, so last founded intersection will be used"); + } else if (range.input_attribute) { + throw std::runtime_error( + "WARNING: parameter " + node->get_friendly_name() + + " is input attribute, propagation is finished and it's range will be used"); + } + } + } + } + } catch (const std::exception& ex) { + (void)ex; +#ifndef NDEBUG + std::cout << ex.what() << std::endl; +#endif + } +// #ifndef NDEBUG + std::cout << "RANGE FOR PARAMETER: " << param->get_friendly_name() + << " start from: " << std::to_string(data->start_from) << " range: " << std::to_string(data->range) + << " resolution: " << std::to_string(data->resolution) << " seed: " << std::to_string(data->seed) + << std::endl; +// #endif + + std::string range_id = get_range_id(param); + node_ranges[range_id] = data; + } +} + +} // namespace utils +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/dft.cpp b/src/tests/functional/shared_test_classes/src/single_op/dft.cpp index cc9553bcc10d45..830bc47fe03485 100644 --- a/src/tests/functional/shared_test_classes/src/single_op/dft.cpp +++ b/src/tests/functional/shared_test_classes/src/single_op/dft.cpp @@ -43,12 +43,8 @@ void DFTLayerTest::generate_inputs(const std::vector& targetInputStat } if (it != ov::test::utils::inputRanges.end()) { - const auto& ranges = it->second; - if (ranges.size() != 2) { - throw std::runtime_error("Incorrect size of ranges. It should be 2 (real and int cases)"); - } - const auto& range = ranges.at(elemType.is_real()); - inGenData = range.size() < inNodeCnt ? range.front() : range.at(0); + ov::test::utils::Range ranges = it->second; + inGenData = ranges.get_data(0, elemType); } inputs.clear(); diff --git a/src/tests/functional/shared_test_classes/src/single_op/gru_sequence.cpp b/src/tests/functional/shared_test_classes/src/single_op/gru_sequence.cpp index fe4f387aadb575..3b57bb8c56602b 100644 --- a/src/tests/functional/shared_test_classes/src/single_op/gru_sequence.cpp +++ b/src/tests/functional/shared_test_classes/src/single_op/gru_sequence.cpp @@ -82,7 +82,6 @@ void GRUSequenceTest::SetUp() { ov::ParameterVector params{std::make_shared(inType, inputDynamicShapes[0]), std::make_shared(inType, inputDynamicShapes[1])}; - const auto& w_shape = ov::Shape{num_directions, 3 * hidden_size, input_size}; const auto& r_shape = ov::Shape{num_directions, 3 * hidden_size, hidden_size}; const auto& b_shape = ov::Shape{num_directions, (linear_before_reset ? 4 : 3) * hidden_size}; diff --git a/src/tests/functional/shared_test_classes/src/single_op/rdft.cpp b/src/tests/functional/shared_test_classes/src/single_op/rdft.cpp index b2da53017dfca6..b437c075bdfcfe 100644 --- a/src/tests/functional/shared_test_classes/src/single_op/rdft.cpp +++ b/src/tests/functional/shared_test_classes/src/single_op/rdft.cpp @@ -42,12 +42,8 @@ void RDFTLayerTest::generate_inputs(const std::vector& targetInputSta } if (it != ov::test::utils::inputRanges.end()) { - const auto& ranges = it->second; - if (ranges.size() != 2) { - throw std::runtime_error("Incorrect size of ranges. It should be 2 (real and int cases)"); - } - const auto& range = ranges.at(elemType.is_real()); - inGenData = range.size() < inNodeCnt ? range.front() : range.at(0); + ov::test::utils::Range ranges = it->second; + inGenData = ranges.get_data(0, elemType); } inputs.clear(); diff --git a/src/tests/functional/shared_test_classes/src/single_op/tensor_iterator.cpp b/src/tests/functional/shared_test_classes/src/single_op/tensor_iterator.cpp index de359a3f8c0752..32f48dea3b7ec1 100644 --- a/src/tests/functional/shared_test_classes/src/single_op/tensor_iterator.cpp +++ b/src/tests/functional/shared_test_classes/src/single_op/tensor_iterator.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include #include "transformations/control_flow/unroll_tensor_iterator.hpp" #include "shared_test_classes/single_op/tensor_iterator.hpp" #include "openvino/pass/manager.hpp" @@ -236,5 +237,21 @@ void TensorIteratorTest::SetUp() { m.run_passes(function); } } + +void TensorIteratorTest::generate_inputs(const std::vector& targetInputStaticShapes) { + inputs.clear(); + + const auto& funcInputs = function->inputs(); + for (size_t i = 0; i < funcInputs.size(); i++) { + const auto& funcInput = funcInputs[i]; + ov::test::utils::InputGenerateData in_data; + in_data.start_from = 0; + in_data.range = 8; + in_data.resolution = funcInput.get_element_type().is_real() ? 32 : 1; + ov::Tensor tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], in_data); + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } +} + } // namespace test } // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/subgraph/perm_conv_perm_concat.cpp b/src/tests/functional/shared_test_classes/src/subgraph/perm_conv_perm_concat.cpp index 13baf0feb56dce..69138c3bedf3c5 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/perm_conv_perm_concat.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/perm_conv_perm_concat.cpp @@ -44,6 +44,10 @@ void PermConvPermConcat::SetUp() { std::tie(element_type, targetDevice, input_shape, kernel_shape, output_channels, additional_config) = this->GetParam(); + if (element_type == ov::element::f32) { + abs_threshold = 1e-6; + } + configuration.insert(additional_config.begin(), additional_config.end()); const std::size_t input_dim = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); diff --git a/src/tests/ov_helpers/ov_lpt_models/src/fuse_fake_quantize.cpp b/src/tests/ov_helpers/ov_lpt_models/src/fuse_fake_quantize.cpp index 2472bdfff9164e..50b1e33aeb6009 100644 --- a/src/tests/ov_helpers/ov_lpt_models/src/fuse_fake_quantize.cpp +++ b/src/tests/ov_helpers/ov_lpt_models/src/fuse_fake_quantize.cpp @@ -49,15 +49,16 @@ std::shared_ptr FuseFakeQuantizeFunction::getOriginal( namespace { std::shared_ptr make_convolution( const ov::PartialShape& inputShape, - const ov::element::Type precisionBefore, + const ov::element::Type precisionData, + const ov::element::Type precisionWeights, const std::shared_ptr& parent, const size_t index) { const ov::Shape shape = inputShape.to_shape(); const ov::Shape weightsShape({ shape[1], shape[1], 1ull, 1ull }); - auto weightsConstant = std::make_shared(ov::element::f32, weightsShape, std::vector(9, 1.f)); + auto weightsConstant = std::make_shared(precisionWeights, weightsShape, std::vector(9, 1.f)); auto weights = makeFakeQuantize( weightsConstant, - precisionBefore, + precisionData, FakeQuantizeOnData( 255, ov::Shape({ shape[1], 1ull, 1ull, 1ull }), @@ -65,7 +66,7 @@ std::shared_ptr make_convolution( { 1.28f, 1.28f, 1.28f }, { -1.27f, -1.27f, -1.27f }, { 1.28f, 1.28f, 1.28f }, - precisionBefore)); + precisionData)); auto convolution = std::make_shared( parent, @@ -160,8 +161,8 @@ std::shared_ptr FuseFakeQuantizeFunction::get( } ov::ResultVector results{ - std::make_shared(make_convolution(inputShape, precisionBefore, parent, 0)), - std::make_shared(make_convolution(inputShape, precisionBefore, parent, 1)) + std::make_shared(make_convolution(inputShape, precisionBefore, precisionBefore, parent, 0)), + std::make_shared(make_convolution(inputShape, precisionBefore, precisionBefore, parent, 1)) }; return std::make_shared(results, ov::ParameterVector{ input }, "FuseFakeQuantizeFunction"); } diff --git a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_group_normalization.hpp b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_group_normalization.hpp new file mode 100644 index 00000000000000..742c44a55019c1 --- /dev/null +++ b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_group_normalization.hpp @@ -0,0 +1,65 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "snippets_helpers.hpp" + +namespace ov { +namespace test { +namespace snippets { + +/* Graph with example shape propogation: + * Graph between Reshape[1,2,1,8] and Reshape[1,2,2,4] is a MVN + * Parameter[1,4,2,2], group_num is 2 + * | + * Reshape[1,2,1,8] + * | | + * | ReduceSum[1,2,1,1] Scalar + * | | / + * | Multiply[1,2,1,1] + * | / + * Substract[1,2,1,8] + * | | + * | PowerStatic[1,2,1,8] + * | | + * | ReduceSum[1,2,1,1] + * | | + * | FMA(Multiply+Add)[1,2,1,1] + * | | + * | sqrt[1,2,1,1] + * | | + * | PowerStatic[1,2,1,1] + * | / + * Multiply[1,2,1,8] Parameter[4] Parameter[4] + * | | | + * Reshape[1,2,2,4] Reshape[1,2,2,1] Reshape[1,2,2,1] + * \ | / + * \ | / + * FMA(Multiply+Add)[1,2,2,4] + * | + * Reshape[1,4,2,2] + * | + * Result[1,4,2,2] + */ +class GroupNormalizationFunction : public SnippetsFunctionBase { +public: + explicit GroupNormalizationFunction(const std::vector& inputShapes, const size_t& numGroup, const float& eps) + : SnippetsFunctionBase(inputShapes), num_groups(numGroup), epsilon(eps) { + OPENVINO_ASSERT(input_shapes.size() == 3, "Got invalid number of input shapes"); + } + +protected: + std::shared_ptr initOriginal() const override; + std::shared_ptr initReference() const override; + std::shared_ptr initLowered() const override; + +private: + size_t num_groups; + float epsilon; +}; + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_group_normalization.cpp b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_group_normalization.cpp new file mode 100644 index 00000000000000..e669200b008661 --- /dev/null +++ b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_group_normalization.cpp @@ -0,0 +1,99 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "subgraph_group_normalization.hpp" +#include + +namespace ov { +namespace test { +namespace snippets { + +std::shared_ptr GroupNormalizationFunction::initOriginal() const { + auto data = std::make_shared(precision, input_shapes[0]); + auto scale = std::make_shared(precision, input_shapes[1]); + auto shift = std::make_shared(precision, input_shapes[2]); + const auto groupNormalization = std::make_shared(data, scale, shift, num_groups, epsilon); + return std::make_shared(NodeVector{groupNormalization}, ParameterVector{data, scale, shift}); +} + +std::shared_ptr GroupNormalizationFunction::initReference() const { + auto data = std::make_shared(precision, input_shapes[0]); + auto scale = std::make_shared(precision, input_shapes[1]); + auto shift = std::make_shared(precision, input_shapes[2]); + auto data_ = std::make_shared(precision, input_shapes[0]); + auto scale_ = std::make_shared(precision, input_shapes[1]); + auto shift_ = std::make_shared(precision, input_shapes[2]); + const auto groupNormalization = std::make_shared(data_, scale_, shift_, num_groups, epsilon); + + auto subgraph = std::make_shared(NodeVector{data, scale, shift}, + std::make_shared(NodeVector{groupNormalization}, ParameterVector{data_, scale_, shift_})); + + return std::make_shared(NodeVector{subgraph}, ParameterVector{data, scale, shift}); +} + +std::shared_ptr GroupNormalizationFunction::initLowered() const { + auto data = std::make_shared(precision, input_shapes[0]); + auto scale = std::make_shared(precision, input_shapes[1]); + auto bias = std::make_shared(precision, input_shapes[2]); + + // reshape [N, C, spatial] to [N, group, 1, (C / group) * spatial] + const auto orig_shape = input_shapes[0].to_shape(); + size_t orig_rank = orig_shape.size(); + size_t group_rank = 4; + size_t c_in_group = orig_shape[1] / num_groups; + size_t spatial_dim = 1; + for (size_t i = 2; i < orig_rank; ++i) { + spatial_dim = spatial_dim * orig_shape[i]; + } + ov::Shape group_shape = {orig_shape[0], num_groups, 1ul, c_in_group * spatial_dim}; + std::shared_ptr reshaped_node_orig = std::make_shared(data, group_shape); + const auto reduce_sum = std::make_shared(reshaped_node_orig, group_rank - 1); + + // reduceMean + float group_size_inv = 1.0f / static_cast(group_shape[3]); + // scalar const -> scalar in data_flow_optimization. + const auto group_size_inv_node = std::make_shared(element::f32, Shape{1}, group_size_inv); + const auto reduce_mean = std::make_shared(reduce_sum, group_size_inv_node); + + // x - mean + std::shared_ptr reshaped_node2 = reshaped_node_orig; + auto sub_mean = std::make_shared(reshaped_node2, reduce_mean); + // (x - mean) ^ 2 + // power -> poweStatic in data_flow_optimization + auto sqr = std::make_shared(sub_mean, 2.0f); + // reduceSum((x - mean) ^ 2) + auto sqr_reduce_sum = std::make_shared(sqr, group_rank - 1); + // reduceMean((x - mean) ^ 2) + const auto group_size_inv_node_aux = std::make_shared(element::f32, Shape{1}, group_size_inv); + auto sqr_mean = std::make_shared(sqr_reduce_sum, group_size_inv_node_aux); + // reduceMean((x - mean) ^ 2) + eps + auto eps_node = std::make_shared(element::f32, Shape{1}, epsilon); + auto eps_add = std::make_shared(sqr_mean, eps_node); + // variance = sqrt( reducemean( (x - mean) ^ 2 ) + eps ) + auto variance = std::make_shared(eps_add); + // divide variance + const auto variance_inv = std::make_shared(variance, -1.f); + auto mvn = std::make_shared(sub_mean, variance_inv); + + // reshape mvn from [N, group, 1, (C / group) * spatial] to [N, group, C / group, spatial] + ov::Shape group_channel_shape = {orig_shape[0], num_groups, c_in_group, spatial_dim}; + const auto mvn_reshaped = std::make_shared(mvn, group_channel_shape); + + // reshape scale and bias to [1, group, C / group, 1] + ov::Shape scale_bias_shape = {1ul, num_groups, c_in_group, 1ul}; + std::shared_ptr reshape_scale = std::make_shared(scale, scale_bias_shape); + std::shared_ptr reshape_bias = std::make_shared(bias, scale_bias_shape); + + auto scaled_node = std::make_shared(mvn_reshaped, reshape_scale); + auto biased_node = std::make_shared(scaled_node, reshape_bias); + + // reshape_back [N, group, C / group, spatial] to [N, C, spatial] + const auto reshape_back_node = std::make_shared(biased_node, orig_shape); + + return std::make_shared(NodeVector{reshape_back_node}, ParameterVector{data, scale, bias}); +} + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/file_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/file_utils.hpp index 970139245a91cc..44056404f2cc67 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/file_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/file_utils.hpp @@ -144,6 +144,16 @@ inline int createDirectory(const std::string& dirPath) { #endif } +#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT +inline int createDirectory(const std::wstring& dirPath) { +# ifdef _WIN32 + return _wmkdir(dirPath.c_str()); +# else + return mkdir(ov::util::wstring_to_string(dirPath).c_str(), mode_t(0777)); +# endif +} +#endif + inline std::vector splitStringByDelimiter(std::string paths, const std::string& delimiter = ",") { size_t delimiterPos; std::vector splitPath; diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp index 053d8f6cd9a668..c971ee3f3771fc 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp @@ -33,12 +33,18 @@ struct InputGenerateData { uint32_t range = 10; int32_t resolution = 1; int32_t seed = 1; + bool input_attribute = false; - InputGenerateData(double _start_from = 0, uint32_t _range = 10, int32_t _resolution = 1, int32_t _seed = 1) + InputGenerateData(double _start_from = 0, + uint32_t _range = 10, + int32_t _resolution = 1, + int32_t _seed = 1, + bool _input_attribute = false) : start_from(_start_from), range(_range), resolution(_resolution), - seed(_seed) { + seed(_seed), + input_attribute(_input_attribute) { if (ConstRanges::is_defined) { auto min_orig = start_from; auto max_orig = start_from + range * resolution; @@ -50,6 +56,64 @@ struct InputGenerateData { (uint32_t)round((max_orig > max_ref || max_orig == 10 ? max_ref : max_orig - start_from) - start_from); } }; + + bool correct_range(const InputGenerateData new_range) { + bool success = true; + + double new_max = new_range.start_from + new_range.range; + double current_max = start_from + range; + + if (start_from == new_range.start_from) { + // nothing to do - -----start_curr/new+++++++++++++++range*res curr/new----------------------- + // nothing to do - -----start_curr/new+++++++++++++++range*res curr----------range*res new---- + // reduce range - -----start_curr/new+++++++++++++++range*res new-----------range*res curr--- + if (current_max > new_max) { + range = new_range.range; + resolution = new_range.resolution > resolution ? new_range.resolution : resolution; + } + } else if (start_from > new_range.start_from) { + // nothing to do - -----start_new-----start_curr++++++++++range*res curr/new------------------- + // nothing to do - -----start_new-----start_curr++++++++++range*res curr------range*res new---- + // reduce range - -----start_new-----start_curr++++++++++range*res new-------range*res curr--- + // could not find range - -----start_new---range*res new-----start_curr-----range*res curr--- + if (start_from > new_max) { + success = false; +#ifndef NDEBUG + std::cout << " FAIL TO FIND RANGE: current->start_from > new_range->start_from + new_range->range " + << " current->start_from: " << std::to_string(start_from) + << " new_range->start_from: " << std::to_string(new_range.start_from) + << " new_range max: " << std::to_string(new_max) << std::endl; +#endif + } else if (current_max > new_max) { + range = (uint32_t)round(new_max - start_from); + resolution = new_range.resolution > resolution ? new_range.resolution : resolution; + } + } else if (start_from < new_range.start_from) { + // reset to new - -----start_curr-----start_new++++++++++range*res curr/new------------------- + // reset to new - -----start_curr-----start_new++++++++++range*res new-------range*res curr--- + // recalculate range - -----start_curr-----start_new++++++++++range*res curr------range*res new---- + // could not find range - -----start_curr---range*res curr-----start_new-----range*res new--- + if (current_max < new_range.start_from) { + success = false; +#ifndef NDEBUG + std::cout << " FAIL TO FIND RANGE: current->start_from + current->range < new_range->start_from " + << " new_range start_from: " << std::to_string(new_range.start_from) + << " current->start_from: " << std::to_string(start_from) + << " current max: " << std::to_string(current_max) << std::endl; +#endif + } else if (current_max >= new_max) { + start_from = new_range.start_from; + range = new_range.range; + resolution = new_range.resolution > resolution ? new_range.resolution : resolution; + } else { + range = (uint32_t)round(current_max - new_range.start_from); + resolution = new_range.resolution > resolution ? new_range.resolution : resolution; + start_from = new_range.start_from; + } + } + + return success; + }; }; // Pre-defaned eps based on mantissa bit depth diff --git a/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp b/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp index 49c6d8def15c8e..c22b6d76ccbef2 100644 --- a/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp @@ -330,7 +330,7 @@ ov::Tensor create_and_fill_tensor_consistently(const ov::element::Type element_t namespace tensor_comparation { constexpr double eps = std::numeric_limits::epsilon(); -inline double less(double a, double b) { +inline bool less(const double a, const double b) { if (std::isnan(a) || std::isnan(b)) { return false; } else if (std::isinf(b) && std::isinf(b)) { @@ -344,7 +344,7 @@ inline double less(double a, double b) { return std::fabs(a - b) > eps && a < b; } -inline double equal(double a, double b) { +inline bool equal(const double a, const double b) { if (std::isnan(a) || std::isnan(b)) { return false; } else if (std::isinf(b) && std::isinf(b)) { @@ -358,12 +358,12 @@ inline double equal(double a, double b) { return std::fabs(b - a) <= eps; } -inline double less_or_equal(double a, double b) { +inline bool less_or_equal(double a, double b) { return less(a, b) || equal(a, b); } template -inline bool is_value_suitable_for_comparation(double value1, double value2) { +inline bool is_value_suitable_for_comparation(const double value1, const double value2) { bool res = true; auto max_val1 = std::numeric_limits::max(); auto min_val1 = std::numeric_limits::lowest(); diff --git a/src/tests/test_utils/common_test_utils/tests/CMakeLists.txt b/src/tests/test_utils/common_test_utils/tests/CMakeLists.txt index c8bb3c3718edbe..69f9d716b35ce5 100644 --- a/src/tests/test_utils/common_test_utils/tests/CMakeLists.txt +++ b/src/tests/test_utils/common_test_utils/tests/CMakeLists.txt @@ -10,7 +10,9 @@ ov_add_test_target( DEPENDENCIES LINK_LIBRARIES common_test_utils + func_test_utils + sharedTestClasses ADD_CPPLINT LABELS OV UNIT -) +) \ No newline at end of file diff --git a/src/tests/test_utils/common_test_utils/tests/core_config.cpp b/src/tests/test_utils/common_test_utils/tests/core_config.cpp new file mode 100644 index 00000000000000..71fb033de4bcd1 --- /dev/null +++ b/src/tests/test_utils/common_test_utils/tests/core_config.cpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/file_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "openvino/util/file_util.hpp" + +namespace ov { +namespace test { + +void core_configuration(ov::test::SubgraphBaseTest* test) {} + +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/tests/test_utils/common_test_utils/tests/generate_inputs.cpp b/src/tests/test_utils/common_test_utils/tests/generate_inputs.cpp new file mode 100644 index 00000000000000..88c6e122f30e46 --- /dev/null +++ b/src/tests/test_utils/common_test_utils/tests/generate_inputs.cpp @@ -0,0 +1,280 @@ +// Copyright (C) 20234 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "openvino/op/util/op_types.hpp" +#include "common_test_utils/type_ranges.hpp" +#include "shared_test_classes/base/utils/ranges.hpp" +#include "shared_test_classes/base/utils/generate_inputs.hpp" + +#include "openvino/op/concat.hpp" +#include "openvino/op/relu.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/result.hpp" +#include "openvino/op/reduce_mean.hpp" +#include "openvino/op/floor_mod.hpp" +#include "openvino/op/reshape.hpp" + +using namespace testing; +using namespace ov::util; + +using ov::Shape; +using ov::op::v0::Parameter; +using ov::op::v0::Result; +using ov::op::v0::Concat; +using ov::op::v0::Relu; +using ov::op::v1::ReduceMean; +using ov::op::v1::FloorMod; +using ov::op::v1::Reshape; + +TEST(RangesTests, ranges_by_type_real) { + auto p0 = std::make_shared(ov::element::f16, Shape{3}); + auto p1 = std::make_shared(ov::element::f16, Shape{3}); + auto concat = std::make_shared(ov::OutputVector{p0, p1}, 0); + auto func = std::make_shared(concat, ov::ParameterVector{p0, p1}); + + ov::test::utils::ModelRange modelRange; + modelRange.find_mode_ranges(func); + + auto real_range = modelRange.get_range_for_param(p0); + + ov::float16 lowest_tmp = std::numeric_limits::lowest(); + ov::float16 max_tmp = std::numeric_limits::max(); + double lowest = 0 - static_cast(lowest_tmp.to_bits()); + double max = max_tmp.to_bits(); + double range = max - lowest; + ASSERT_EQ(real_range->start_from, lowest); + ASSERT_EQ(real_range->range, range); + ASSERT_EQ(real_range->resolution, 1); + + for (size_t port = 0; port < concat->get_input_size(); ++port) { + ov::Tensor tensor1 = modelRange.generate_input(concat, port, Shape{3}); + auto data1 = tensor1.data(); + for (size_t i = 0; i < shape_size(tensor1.get_shape()); ++i) { + double value = data1[i]; + ASSERT_GE(value, lowest); + ASSERT_LE(value, range); + } + } +} + +TEST(RangesTests, ranges_by_type_int) { + auto p0 = std::make_shared(ov::element::i8, Shape{3}); + auto p1 = std::make_shared(ov::element::i8, Shape{3}); + auto concat = std::make_shared(ov::OutputVector{p0, p1}, 0); + auto func = std::make_shared(concat, ov::ParameterVector{p0, p1}); + + ov::test::utils::ModelRange modelRange; + modelRange.find_mode_ranges(func); + + auto int_range = modelRange.get_range_for_param(p0); + + ASSERT_EQ(int_range->start_from, std::numeric_limits::lowest()); + uint32_t range = static_cast(std::numeric_limits::max()) - static_cast(std::numeric_limits::lowest()); + ASSERT_EQ(int_range->range, range); + ASSERT_EQ(int_range->resolution, 1); + + for (size_t port = 0; port < concat->get_input_size(); ++port) { + ov::Tensor tensor1 = modelRange.generate_input(concat, port, Shape{3}); + auto data1 = tensor1.data(); + for (size_t i = 0; i < shape_size(tensor1.get_shape()); ++i) { + double value = data1[i]; + ASSERT_GE(value, std::numeric_limits::lowest()); + ASSERT_LE(value, std::numeric_limits::max()); + } + } +} + +TEST(RangesTests, intersection_real) { + auto p0 = std::make_shared(ov::element::f32, Shape{3}); + auto p1 = std::make_shared(ov::element::f32, Shape{3}); + + auto relu = std::make_shared(p0); + auto concat = std::make_shared(ov::OutputVector{p1, relu}, 0); + + auto func = std::make_shared(concat, ov::ParameterVector{p0, p1}); + + ov::test::utils::ModelRange modelRange; + modelRange.find_mode_ranges(func); + auto relu_range = modelRange.get_range_for_param(p0); + + auto relu_range_ref = ov::test::utils::InputGenerateData(-1, 2, 32768); + ASSERT_EQ(relu_range->start_from, relu_range_ref.start_from); + ASSERT_EQ(relu_range->range, relu_range_ref.range); + ASSERT_EQ(relu_range->resolution, relu_range_ref.resolution); + + ov::Tensor tensor1 = modelRange.generate_input(relu, 0, Shape{3}); + auto data1 = tensor1.data(); + for (size_t i = 0; i < shape_size(tensor1.get_shape()); ++i) { + double value = data1[i]; + ASSERT_GE(value, relu_range_ref.start_from); + ASSERT_LE(value, relu_range_ref.range); + } + + auto concat_range_ref = ov::test::utils::rangeByType.get_range(ov::element::f32); + auto concat_range = modelRange.get_range_for_param(p1); + ASSERT_EQ(concat_range->start_from, concat_range_ref.start_from); + ASSERT_EQ(concat_range->range, concat_range_ref.range); + ASSERT_EQ(concat_range->resolution, concat_range_ref.resolution); + + ov::Tensor tensor2 = modelRange.generate_input(concat, 0, Shape{3}); + auto data2 = tensor1.data(); + for (size_t i = 0; i < shape_size(tensor2.get_shape()); ++i) { + double value = data2[i]; + ASSERT_GE(value, concat_range_ref.start_from); + ASSERT_LE(value, concat_range_ref.range); + } +} + +TEST(RangesTests, intersection_integral) { + auto p0 = std::make_shared(ov::element::i32, Shape{3}); + auto p1 = std::make_shared(ov::element::i32, Shape{3}); + + auto relu = std::make_shared(p0); + auto concat = std::make_shared(ov::OutputVector{p1, relu}, 0); + + auto func = std::make_shared(concat, ov::ParameterVector{p0, p1}); + + ov::test::utils::ModelRange modelRange; + modelRange.find_mode_ranges(func); + auto relu_range = modelRange.get_range_for_param(p0); + + auto relu_range_ref = ov::test::utils::InputGenerateData(0, 15); + ASSERT_EQ(relu_range->start_from, relu_range_ref.start_from); + ASSERT_EQ(relu_range->range, relu_range_ref.range); + ASSERT_EQ(relu_range->resolution, relu_range_ref.resolution); + + ov::Tensor tensor1 = modelRange.generate_input(relu, 0, Shape{3}); + auto data1 = tensor1.data(); + for (size_t i = 0; i < shape_size(tensor1.get_shape()); ++i) { + double value = data1[i]; + ASSERT_GE(value, relu_range_ref.start_from); + ASSERT_LE(value, relu_range_ref.range); + } + + auto concat_range_ref = ov::test::utils::rangeByType.get_range(ov::element::f32); + auto concat_range = modelRange.get_range_for_param(p1); + ASSERT_EQ(concat_range->start_from, concat_range_ref.start_from); + ASSERT_EQ(concat_range->range, concat_range_ref.range); + ASSERT_EQ(concat_range->resolution, concat_range_ref.resolution); + + ov::Tensor tensor2 = modelRange.generate_input(concat, 0, Shape{3}); + auto data2 = tensor1.data(); + for (size_t i = 0; i < shape_size(tensor2.get_shape()); ++i) { + double value = data2[i]; + ASSERT_GE(value, concat_range_ref.start_from); + ASSERT_LE(value, concat_range_ref.range); + } +} + +TEST(RangesTests, spetial_ranges) { + auto p0 = std::make_shared(ov::element::f32, Shape{1, 2, 3}); + p0->set_friendly_name("p0"); + auto p1 = std::make_shared(ov::element::f32, Shape{1, 2, 3}); + p1->set_friendly_name("p1"); + auto p2 = std::make_shared(ov::element::i32, Shape{1}); + p2->set_friendly_name("p2"); + + auto concat = std::make_shared(ov::OutputVector{p0, p1}, 1); + concat->set_friendly_name("Concat"); + auto reshape = std::make_shared(concat, p2, true); + reshape->set_friendly_name("reshape"); + + auto res = std::make_shared(reshape); + + auto func = std::make_shared(ov::ResultVector{res}, ov::ParameterVector{p0, p1, p2}); + + ov::test::utils::ModelRange modelRange; + modelRange.find_mode_ranges(func); + auto real_range = modelRange.get_range_for_param(p0); + + auto main_range = ov::test::utils::InputGenerateData(-100, 200, 32768); + ASSERT_EQ(real_range->start_from, main_range.start_from); + ASSERT_EQ(real_range->range, main_range.range); + ASSERT_EQ(real_range->resolution, main_range.resolution); + + ov::Tensor tensor1 = modelRange.generate_input(concat, 0, Shape{1, 2, 3}); + auto data1 = tensor1.data(); + for (size_t i = 0; i < shape_size(tensor1.get_shape()); ++i) { + double value = data1[i]; + ASSERT_GE(value, main_range.start_from); + ASSERT_LE(value, main_range.range); + } + + auto spetial_range_ref = ov::test::utils::InputGenerateData(0, 256, 1, 1, true); + auto spetial_range = modelRange.get_range_for_param(p2); + ASSERT_EQ(spetial_range->start_from, spetial_range_ref.start_from); + ASSERT_EQ(spetial_range->range, spetial_range_ref.range); + ASSERT_EQ(spetial_range->resolution, spetial_range_ref.resolution); + + ov::Tensor tensor2 = modelRange.generate_input(reshape, 1, Shape{1}); + auto data2 = tensor2.data(); + for (size_t i = 0; i < shape_size(tensor2.get_shape()); ++i) { + double value = data2[i]; + ASSERT_GE(value, spetial_range_ref.start_from); + ASSERT_LE(value, spetial_range_ref.range); + } +} + +TEST(RangesTests, intersection_range) { + auto p0 = std::make_shared(ov::element::f32, Shape{1, 2}); + auto p1 = std::make_shared(ov::element::f32, Shape{1, 2}); + auto p2 = std::make_shared(ov::element::i32, Shape{1}); + + auto relu = std::make_shared(p0); + auto concat = std::make_shared(ov::OutputVector{p1, relu}, 1); + auto reduce = std::make_shared(concat, p2, true); + + auto func = std::make_shared(reduce, ov::ParameterVector{p0, p1, p2}); + + ov::test::utils::ModelRange modelRange; + modelRange.find_mode_ranges(func); + auto real_range = modelRange.get_range_for_param(p0); + + auto intersection_range_real = ov::test::utils::InputGenerateData(0, 1, 32768); + ASSERT_EQ(real_range->start_from, intersection_range_real.start_from); + ASSERT_EQ(real_range->range, intersection_range_real.range); + ASSERT_EQ(real_range->resolution, intersection_range_real.resolution); + + ov::Tensor tensor1 = modelRange.generate_input(relu, 0, Shape{1}); + auto data1 = tensor1.data(); + for (size_t i = 0; i < shape_size(tensor1.get_shape()); ++i) { + double value = data1[i]; + ASSERT_GE(value, intersection_range_real.start_from); + ASSERT_LE(value, intersection_range_real.range); + } + + auto int_range = modelRange.get_range_for_param(p2); + auto intersection_range_int = ov::test::utils::InputGenerateData(0, 5, 1000); + ASSERT_EQ(int_range->start_from, intersection_range_int.start_from); + ASSERT_EQ(int_range->range, intersection_range_int.range); + ASSERT_EQ(int_range->resolution, intersection_range_int.resolution); +} + +TEST(RangesTests, not_intersection) { + auto p0 = std::make_shared(ov::element::f32, Shape{1, 2}); + auto p1 = std::make_shared(ov::element::f32, Shape{1, 2}); + + auto relu = std::make_shared(p0); + auto floorMod = std::make_shared(relu, p1); + + auto func = std::make_shared(floorMod, ov::ParameterVector{p0, p1}); + + ov::test::utils::ModelRange modelRange; + modelRange.find_mode_ranges(func); + + auto not_intersection_range = modelRange.get_range_for_param(p0); + auto not_intersection_range_ref = ov::test::utils::InputGenerateData(-1, 2, 32768); + ASSERT_EQ(not_intersection_range->start_from, not_intersection_range_ref.start_from); + ASSERT_EQ(not_intersection_range->range, not_intersection_range_ref.range); + ASSERT_EQ(not_intersection_range->resolution, not_intersection_range_ref.resolution); + + auto floorMod_range = modelRange.get_range_for_param(p1); + auto floorMod_range_ref = ov::test::utils::InputGenerateData(2, 2, 128); + ASSERT_EQ(floorMod_range->start_from, floorMod_range_ref.start_from); + ASSERT_EQ(floorMod_range->range, floorMod_range_ref.range); + ASSERT_EQ(floorMod_range->resolution, floorMod_range_ref.resolution); +} + diff --git a/src/tests/test_utils/common_test_utils/tests/skip_tests_config.cpp b/src/tests/test_utils/common_test_utils/tests/skip_tests_config.cpp new file mode 100644 index 00000000000000..dc54bec286395e --- /dev/null +++ b/src/tests/test_utils/common_test_utils/tests/skip_tests_config.cpp @@ -0,0 +1,12 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "functional_test_utils/skip_tests_config.hpp" + +#include +#include + +std::vector disabledTestPatterns() { + return std::vector{}; +} diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/github/skip_configs/CPU/expected_failures_OP.csv b/src/tests/test_utils/functional_test_utils/layer_tests_summary/github/skip_configs/CPU/expected_failures_OP.csv index 084c11cef57813..857d92d1ddd718 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/github/skip_configs/CPU/expected_failures_OP.csv +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/github/skip_configs/CPU/expected_failures_OP.csv @@ -247,4 +247,5 @@ conformance_Loop/ReadIRTest.Inference/Op=Loop.5_Type=f32_Shape=static_IR=35c61b2 conformance_subgraph/ReadIRTest.Inference/Extractor=fused_names_Shape=static_IR=7caba2dff8ab10660f66796a39d8d2a78f3e282f0629c2ecbee9b90c34e62aa0_Device=CPU_Config=(),2.1896e-06 conformance_subgraph/ReadIRTest.Inference/Extractor=fused_names_Shape=static_IR=2e06088cb191d8d26309843b1285b9ae4a1eb0722e1370875edde7fd2783851b_Device=CPU_Config=(),1.88776e-06 conformance_subgraph/ReadIRTest.Inference/Extractor=repeat_pattern_Shape=static_IR=183e5203c7008618a9cfb2680265bb3f588f80c2493bf7fac92eb258e66da2cf_Device=CPU_Config=(),1.88776e-06 -conformance_subgraph/ReadIRTest.Inference/Extractor=repeat_pattern_Shape=static_IR=d9771ac46751569172412bbd4495eccdbac435f78a97f8fdfffa9215faa74544_Device=CPU_Config=(),1.88776e-06 \ No newline at end of file +conformance_subgraph/ReadIRTest.Inference/Extractor=repeat_pattern_Shape=static_IR=d9771ac46751569172412bbd4495eccdbac435f78a97f8fdfffa9215faa74544_Device=CPU_Config=(),1.88776e-06 +conformance_subgraph/ReadIRTest.Inference/Extractor=repeat_pattern_Shape=static_IR=9b4725171957a420a98f908742f18062fbcee198871d527ab5b4d939005ac4e6_Device=CPU_Config=(),0.00116845 \ No newline at end of file diff --git a/tests/layer_tests/tensorflow_tests/test_tf_AdjustSaturation.py b/tests/layer_tests/tensorflow_tests/test_tf_AdjustSaturation.py new file mode 100644 index 00000000000000..c83a84e08f8043 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_AdjustSaturation.py @@ -0,0 +1,62 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + +class TestAdjustSaturation(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'images:0' in inputs_info + if self.special_case == "Black Image": + images_shape = inputs_info['images:0'] + inputs_data = {} + inputs_data['images:0'] = np.zeros(images_shape).astype(self.input_type) + elif self.special_case == "Grayscale Image": + images_shape = inputs_info['images:0'] + inputs_data = {} + inputs_data['images:0'] = np.ones(images_shape).astype(self.input_type) * np.random.rand() + else: + images_shape = inputs_info['images:0'] + inputs_data = {} + inputs_data['images:0'] = np.random.rand(*images_shape).astype(self.input_type) + + inputs_data['scale:0'] = np.random.rand() + + return inputs_data + + def create_adjust_saturation_net(self, input_shape, input_type, special_case=False): + self.special_case = special_case + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + images = tf.compat.v1.placeholder(input_type, input_shape, 'images') + scale = tf.compat.v1.placeholder(input_type, [], 'scale') + tf.raw_ops.AdjustSaturation(images=images, scale=scale) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + # Each input is a tensor of at least 3 dimensions. + # The last dimension is interpreted as channels, and must be three. + test_data_basic = [ + dict(input_shape=[7, 7, 3], input_type=np.float32, special_case="Black Image"), + dict(input_shape=[7, 7, 3], input_type=np.float32, special_case="Grayscale Image"), + dict(input_shape=[5, 5, 3], input_type=np.float32), + dict(input_shape=[2, 3, 4, 3], input_type=np.float32), + dict(input_shape=[1, 2, 3, 3, 3], input_type=np.float32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit + @pytest.mark.nightly + def test_adjust_saturation_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_legacy_frontend): + if ie_device == 'GPU': + pytest.skip("Accuracy mismatch on GPU") + self._test(*self.create_adjust_saturation_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_legacy_frontend=use_legacy_frontend) \ No newline at end of file diff --git a/tests/model_hub_tests/pytorch/models/hf-tiny-random-models-precommit b/tests/model_hub_tests/pytorch/models/hf-tiny-random-models-precommit new file mode 100644 index 00000000000000..bab7d1ff0a8676 --- /dev/null +++ b/tests/model_hub_tests/pytorch/models/hf-tiny-random-models-precommit @@ -0,0 +1,58 @@ + facebook/opt-125m,https://huggingface.co/facebook/opt-125m + hf-tiny-model-private/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-CodeGenForCausalLM +hf-tiny-model-private/tiny-random-GPT2LMHeadModel,https://huggingface.co/hf-tiny-model-private/tiny-random-GPT2LMHeadModel +hf-tiny-model-private/tiny-random-OPTForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-OPTForCausalLM +hf-tiny-model-private/tiny-random-GPTJForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-GPTJForCausalLM +hf-tiny-model-private/tiny-random-BloomForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-BloomForCausalLM +hf-tiny-model-private/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-GPTNeoForCausalLM +hf-tiny-model-private/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-GPTNeoXForCausalLM +hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM +hf-internal-testing/tiny-random-MptForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MptForCausalLM +hf-internal-testing/tiny-random-BloomForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BloomForCausalLM +hf-internal-testing/tiny-random-GPTJForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTJForCausalLM +hf-internal-testing/tiny-random-CohereForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CohereForCausalLM +hf-internal-testing/tiny-random-FalconForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-FalconForCausalLM +hf-tiny-model-private/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CodeGenForCausalLM +hf-tiny-model-private/tiny-random-OPTForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-OPTForCausalLM +hf-internal-testing/tiny-random-MistralForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-OPTForCausalLM +hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLM +hf-internal-testing/tiny-random-LlamaForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-LlamaForCausalLM +hf-internal-testing/tiny-random-StableLmForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-StableLmForCausalLM +hf-internal-testing/tiny-random-PhiForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PhiForCausalLM +hf-internal-testing/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CodeGenForCausalLM +hf-internal-testing/tiny-random-Starcoder2ForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-Starcoder2ForCausalLM +hf-internal-testing/tiny-random-OPTForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-OPTForCausalLM +hf-tiny-model-private/tiny-random-BartForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-BartForCausalLM,xfail,not working +hf-tiny-model-private/tiny-random-BigBirdForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-BigBirdForCausalLM,xfail,not working +hf-tiny-model-private/tiny-random-BigBirdPegasusForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-BigBirdPegasusForCausalLM,xfail,not working +hf-tiny-model-private/tiny-random-BioGptForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BioGptForCausalLM,xfail,not working +hf-tiny-model-private/tiny-random-BlenderbotSmallForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BlenderbotSmallForCausalLM,xfail,not working +hf-tiny-model-private/tiny-random-BlenderbotForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BlenderbotForCausalLM,xfail,not working +hf-tiny-model-private/tiny-random-ErnieForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-ErnieForCausalLM,xfail,not working +hf-tiny-model-private/tiny-random-GPTNeoXJapaneseForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXJapaneseForCausalLM,xfail,not working +hf-tiny-model-private/tiny-random-MBartForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MBartForCausalLM,xfail,not working +hf-tiny-model-private/tiny-random-MvpForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MvpForCausalLM,xfail,not working +hf-tiny-model-private/tiny-random-PegasusForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PegasusForCausalLM,xfail,not working +hf-tiny-model-private/tiny-random-PLBartForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PLBartForCausalLM,xfail,not working +hf-tiny-model-private/tiny-random-XGLMForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-XGLMForCausalLM,xfail,not working +# PrunaAI/hf-tiny-model-private-tiny-random-BloomForCausalLM-bnb-4bit-smashed, +# PrunaAI/hf-tiny-model-private-tiny-random-BloomForCausalLM-bnb-8bit-smashed, +# PrunaAI/hf-tiny-model-private-tiny-random-BloomForCausalLM-HQQ-2bit-smashed, +hf-internal-testing/tiny-random-PersimmonForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PersimmonForCausalLM,xfail,not working +hf-internal-testing/tiny-random-BartForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BartForCausalLM,xfail,not working +hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,xfail,not working +hf-internal-testing/tiny-random-XGLMForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-XGLMForCausalLM,xfail,not working +hf-internal-testing/tiny-random-PegasusForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PegasusForCausalLM,xfail,not working +hf-internal-testing/tiny-random-MBartForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MBartForCausalLM,xfail,not working +hf-internal-testing/tiny-random-BigBirdPegasusForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BigBirdPegasusForCausalLM,xfail,not working +hf-internal-testing/tiny-random-BigBirdForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BigBirdForCausalLM,xfail,not working +hf-internal-testing/tiny-random-MegaForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MegaForCausalLM,xfail,not working +hf-internal-testing/tiny-random-RobertaPreLayerNormForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-RobertaPreLayerNormForCausalLM,xfail,not working +hf-internal-testing/tiny-random-BioGptForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BioGptForCausalLM,xfail,not working +hf-internal-testing/tiny-random-ProphetNetForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-ProphetNetForCausalLM,xfail,not working +hf-internal-testing/tiny-random-PLBartForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PLBartForCausalLM,xfail,not working +hf-internal-testing/tiny-random-MegatronBertForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MegatronBertForCausalLM,xfail,not working +hf-internal-testing/tiny-random-GPTNeoXJapaneseForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXJapaneseForCausalLM,xfail,not working +hf-internal-testing/tiny-random-ErnieForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-ErnieForCausalLM,xfail,not working +hf-internal-testing/tiny-random-BlenderbotForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BlenderbotSmallForCausalLM,xfail,not working +hf-internal-testing/tiny-random-BlenderbotSmallForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BlenderbotSmallForCausalLM,xfail,not working \ No newline at end of file diff --git a/tests/model_hub_tests/pytorch/requirements.txt b/tests/model_hub_tests/pytorch/requirements.txt index 882be0f61ec1a9..ac134fbd38c255 100644 --- a/tests/model_hub_tests/pytorch/requirements.txt +++ b/tests/model_hub_tests/pytorch/requirements.txt @@ -25,3 +25,12 @@ transformers wheel PyYAML kornia + +# use latest released version once it's available +git+https://github.com/huggingface/optimum-intel.git@main +# set 'export HF_HUB_ENABLE_HF_TRANSFER=1' to benefits from hf_transfer +hf_transfer + +# requirements for specific models +# - hf-tiny-model-private/tiny-random-RoFormerForCausalLM +rjieba \ No newline at end of file diff --git a/tests/model_hub_tests/pytorch/test_pa_transformation.py b/tests/model_hub_tests/pytorch/test_pa_transformation.py new file mode 100644 index 00000000000000..d077bb1fd5f8fb --- /dev/null +++ b/tests/model_hub_tests/pytorch/test_pa_transformation.py @@ -0,0 +1,38 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino._offline_transformations import paged_attention_transformation +from openvino._pyopenvino.op import _PagedAttentionExtension +from optimum.intel import OVModelForCausalLM +import models_hub_common.utils as utils +import pytest +import os + +def run_pa(tmp_path, model_id, model_link): + model = OVModelForCausalLM.from_pretrained(model_id, export=True) + + paged_attention_transformation(model.model) + + # Test that a _PagedAttentionExtension node appeared after the transformation. + assert any(isinstance(op, _PagedAttentionExtension) for op in model.model.get_ordered_ops()), f"The model '{model_id}' has no _PagedAttentionExtension present." + + model_inputs = model.model.inputs + for input in model_inputs: + names = list(input.get_names()) # names stored in as set (in this case usually of 1 element) + for name in names: + if (("key_cache." in name) or ("value_cache." in name)): + shape = input.get_partial_shape() + # PagedAttention uses key_cache and value_cache inputs so the last 2 dimensions have to be static + assert shape[-1].is_static, f"Dimension {len(shape) - 1} of input '{name}' in '{model_id}' is not static: {shape}" + assert shape[-2].is_static, f"Dimension {len(shape) - 2} of input '{name}' in '{model_id}' is not static: {shape}" + +@pytest.mark.precommit +@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"))) +def test_pa_precommit(tmp_path, model_name, model_link, mark, reason, ie_device): + assert mark is None or mark == 'skip' or mark == 'xfail', \ + "Incorrect test case: {}, {}".format(model_name, model_link) + if mark == 'skip': + pytest.skip(reason) + elif mark == 'xfail': + pytest.xfail(reason) + run_pa(tmp_path, model_name, model_link) \ No newline at end of file