diff --git a/.github/dockerfiles/docker_tag b/.github/dockerfiles/docker_tag index 5a4f7795ea4a44..3783a7e8d5600a 100644 --- a/.github/dockerfiles/docker_tag +++ b/.github/dockerfiles/docker_tag @@ -1 +1 @@ -pr-27384 +pr-27430 diff --git a/.github/dockerfiles/ov_build/manylinux2014_x86_64/Dockerfile b/.github/dockerfiles/ov_build/manylinux2014_x86_64/Dockerfile new file mode 100644 index 00000000000000..59239575be329c --- /dev/null +++ b/.github/dockerfiles/ov_build/manylinux2014_x86_64/Dockerfile @@ -0,0 +1,20 @@ +ARG REGISTRY="quay.io" +FROM openvinogithubactions.azurecr.io/quayio/pypa/manylinux2014_x86_64 + +USER root + +# Install build dependencies +ADD install_build_dependencies.sh /install_build_dependencies.sh +RUN chmod +x /install_build_dependencies.sh && /install_build_dependencies.sh + +# Install sscache +ARG SCCACHE_VERSION="v0.7.5" +ENV SCCACHE_HOME="/opt/sccache" \ + SCCACHE_PATH="/opt/sccache/sccache" + +RUN mkdir ${SCCACHE_HOME} && cd ${SCCACHE_HOME} && \ + SCCACHE_ARCHIVE="sccache-${SCCACHE_VERSION}-x86_64-unknown-linux-musl.tar.gz" && \ + curl -SLO https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/${SCCACHE_ARCHIVE} && \ + tar -xzf ${SCCACHE_ARCHIVE} --strip-components=1 && rm ${SCCACHE_ARCHIVE} + +ENV PATH="$SCCACHE_HOME:$PATH" diff --git a/.github/dockerfiles/ov_build/ubuntu_22_04_x64_docker/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_22_04_x64_docker/Dockerfile new file mode 100644 index 00000000000000..2d5bc1c878069a --- /dev/null +++ b/.github/dockerfiles/ov_build/ubuntu_22_04_x64_docker/Dockerfile @@ -0,0 +1,42 @@ +ARG REGISTRY="docker.io" +FROM ${REGISTRY}/library/ubuntu:22.04 + +USER root + +# APT configuration +RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf && \ + echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::Fix-Broken "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::no-install-recommends "true";' >> /etc/apt/apt.conf + +ENV DEBIAN_FRONTEND="noninteractive" \ + TZ="Europe/London" + +RUN apt-get update && \ + apt-get install software-properties-common && \ + add-apt-repository --yes --no-update ppa:git-core/ppa && \ + add-apt-repository --yes --no-update ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install \ + curl \ + git \ + gpg-agent \ + tzdata \ + # parallel gzip + pigz \ + python3 \ + python3-pip \ + && \ + rm -rf /var/lib/apt/lists/* + +# Install docker +RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | \ + gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg && \ + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] \ + https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | \ + tee /etc/apt/sources.list.d/docker.list > /dev/null + +RUN apt-get update && \ + apt-get install -y docker-ce docker-ce-cli containerd.io + +ENV DOCKER_BUILDKIT=1 \ No newline at end of file diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml index 29afb466d69a42..0de1708527739a 100644 --- a/.github/workflows/job_tensorflow_layer_tests.yml +++ b/.github/workflows/job_tensorflow_layer_tests.yml @@ -30,7 +30,7 @@ env: jobs: TensorFlow_Layer_Tests: name: TensorFlow Layer Tests - timeout-minutes: 30 + timeout-minutes: 45 runs-on: ${{ inputs.runner }} container: ${{ fromJSON(inputs.container) }} defaults: diff --git a/.github/workflows/manylinux_2014.yml b/.github/workflows/manylinux_2014.yml new file mode 100644 index 00000000000000..ed375fb868459f --- /dev/null +++ b/.github/workflows/manylinux_2014.yml @@ -0,0 +1,191 @@ +name: Manylinux 2014 +on: + workflow_dispatch: + pull_request: + merge_group: + push: + branches: + - master + - 'releases/**' + +concurrency: + # github.ref is not unique in post-commit + group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-manylinux-2014 + cancel-in-progress: true + +permissions: read-all + +env: + PIP_CACHE_PATH: /mount/caches/pip/linux + +jobs: + Smart_CI: + runs-on: ubuntu-latest + outputs: + affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" + skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" + steps: + - name: checkout action + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + sparse-checkout: .github/actions/smart-ci + + - name: Get affected components + id: smart_ci + uses: ./.github/actions/smart-ci + with: + repository: ${{ github.repository }} + pr: ${{ github.event.number }} + commit_sha: ${{ github.sha }} + ref_name: ${{ github.ref_name }} + component_pattern: "category: (.*)" + repo_token: ${{ secrets.GITHUB_TOKEN }} + skip_when_only_listed_labels_set: 'docs' + skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg' + + - name: Show affected components + run: | + echo "${{ toJSON(steps.smart_ci.outputs.affected_components) }}" + shell: bash + + Docker: + needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" + runs-on: aks-linux-4-cores-16gb-docker-build + container: + image: openvinogithubactions.azurecr.io/docker_build:0.2 + volumes: + - /mount:/mount + outputs: + images: "${{ steps.handle_docker.outputs.images }}" + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - uses: ./.github/actions/handle_docker + id: handle_docker + with: + images: | + ov_build/ubuntu_22_04_x64_docker + ov_build/manylinux2014_x86_64 + registry: 'openvinogithubactions.azurecr.io' + dockerfiles_root_dir: '.github/dockerfiles' + changed_components: ${{ needs.smart_ci.outputs.changed_components }} + + Build: + needs: [Docker] + timeout-minutes: 120 + defaults: + run: + shell: bash + runs-on: aks-linux-16-cores-32gb-manylinux + if: ${{ github.repository_owner == 'openvinotoolkit' }} + container: + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_22_04_x64_docker }} + volumes: + - /mount:/mount + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING -e DOCKER_CONFIG -v ${{ github.workspace }}:${{ github.workspace }} + env: + CMAKE_BUILD_TYPE: 'Release' + OPENVINO_REPO: ${{ github.workspace }}/src + INSTALL_DIR: ${{ github.workspace }}/install/openvino + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels + BUILD_DIR: ${{ github.workspace }}/build + DOCKER_CONFIG: "/mount/.docker" + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache + SCCACHE_IGNORE_SERVER_IO_ERROR: 1 + SCCACHE_SERVER_PORT: 35555 + SCCACHE_CACHE_SIZE: 50G + SCCACHE_AZURE_KEY_PREFIX: manylinux_2014 + + steps: + - name: Clone OpenVINO + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + path: ${{ env.OPENVINO_REPO }} + submodules: 'true' + + - name: System info + uses: ./src/.github/actions/system_info + + - name: Create docker build cache + run: | + docker volume create ov_build_cache + + - name: Build OpenVINO + run: | + docker run --rm \ + -v ${{ env.OPENVINO_REPO }}:/work/src \ + -v ov_build_cache:/work/build \ + -v ${{ env.INSTALL_DIR }}:/work/install \ + -e SCCACHE_AZURE_BLOB_CONTAINER \ + -e SCCACHE_AZURE_CONNECTION_STRING \ + -e SCCACHE_SERVER_PORT \ + -e SCCACHE_IGNORE_SERVER_IO_ERROR \ + -e SCCACHE_CACHE_SIZE \ + -e SCCACHE_AZURE_KEY_PREFIX \ + -e CMAKE_CXX_COMPILER_LAUNCHER \ + -e CMAKE_C_COMPILER_LAUNCHER \ + -w /work/src \ + ${{ fromJSON(needs.docker.outputs.images).ov_build.manylinux2014_x86_64 }} \ + /bin/bash -c " + cmake -DENABLE_CPPLINT=OFF -DENABLE_NCC_STYLE=OFF -DCMAKE_VERBOSE_MAKEFILE=ON -DENABLE_PYTHON=OFF -DENABLE_WHEEL=OFF -S /work/src -B /work/build && + cmake --build /work/build --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} && + cmake --install /work/build --config ${{ env.CMAKE_BUILD_TYPE }} --prefix /work/install + " + + - name: Pack Artifacts + run: mkdir -p ${{ env.BUILD_DIR }} && tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_package.tar.gz + working-directory: ${{ env.INSTALL_DIR }} + + - name: Build Python API(Python 3.9-3.13) + run: | + SUPPORTED_PYTHON_VERSIONS=("39" "310" "311" "312" "313") + for PY_VER in "${SUPPORTED_PYTHON_VERSIONS[@]}"; do + python_path=/opt/python/cp${PY_VER}-cp${PY_VER}/bin + docker run --rm \ + -v ${{ env.OPENVINO_REPO }}:/work/src \ + -v ${{ env.INSTALL_WHEELS_DIR }}:/work/wheels \ + -v ${{ env.PIP_CACHE_PATH }}:/work/pip_cache \ + -v ov_build_cache:/work/build \ + -e SCCACHE_AZURE_BLOB_CONTAINER \ + -e SCCACHE_AZURE_CONNECTION_STRING \ + -e SCCACHE_SERVER_PORT \ + -e SCCACHE_IGNORE_SERVER_IO_ERROR \ + -e SCCACHE_CACHE_SIZE \ + -e SCCACHE_AZURE_KEY_PREFIX \ + -e CMAKE_CXX_COMPILER_LAUNCHER \ + -e CMAKE_C_COMPILER_LAUNCHER \ + -w /work/src \ + ${{ fromJSON(needs.docker.outputs.images).ov_build.manylinux2014_x86_64 }} \ + /bin/bash -c " + export PATH=${python_path}:\$PATH + PIP_VER=$(python3 -c "import pip; print(pip.__version__)") + export "PIP_CACHE_DIR=/work/pip_cache/${PIP_VER}" + python3 -m pip install -r /work/src/src/bindings/python/wheel/requirements-dev.txt && + cmake -DOpenVINODeveloperPackage_DIR=/work/build -DENABLE_PYTHON=ON -DENABLE_WHEEL=ON -S /work/src/src/bindings/python -B /work/build_py${PY_VER} && + cmake --build /work/build_py${PY_VER} --parallel $(nproc) --target ie_wheel --config ${{ env.CMAKE_BUILD_TYPE }} && + cmake --install /work/build_py${PY_VER} --config ${{ env.CMAKE_BUILD_TYPE }} --prefix /work/wheels --component python_wheels + " + done + + # + # Upload build artifacts + # + - name: Upload openvino package + if: ${{ always() }} + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + with: + name: openvino_package + path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz + if-no-files-found: 'error' + + - name: Upload openvino wheels + if: ${{ always() }} + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + with: + name: openvino_wheels + path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl + if-no-files-found: 'error' \ No newline at end of file diff --git a/README.md b/README.md index c37f2ef42b9785..695f84f1628118 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,11 @@ Check [system requirements](https://docs.openvino.ai/2024/about-openvino/system- [OpenVINO Quickstart example](https://docs.openvino.ai/2024/get-started.html) will walk you through the basics of deploying your first model. Learn how to optimize and deploy popular models with the [OpenVINO Notebooks](https://github.com/openvinotoolkit/openvino_notebooks)📚: -- [Create an LLM-powered Chatbot using OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llm-chatbot/llm-chatbot.ipynb) -- [YOLOv8 Optimization](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/quantizing-model-with-accuracy-control/yolov8-quantization-with-accuracy-control.ipynb) -- [Text-to-Image Generation](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/controlnet-stable-diffusion/controlnet-stable-diffusion.ipynb) +- [Create an LLM-powered Chatbot using OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llm-chatbot/llm-chatbot-generate-api.ipynb) +- [YOLOv11 Optimization](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/yolov11-optimization/yolov11-object-detection.ipynb) +- [Text-to-Image Generation](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/text-to-image-genai/text-to-image-genai.ipynb) +- [Multimodal assistant with LLaVa and OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb) +- [Automatic speech recognition using Whisper and OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/whisper-asr-genai/whisper-asr-genai.ipynb) Here are easy-to-follow code examples demonstrating how to run PyTorch and TensorFlow model inference using OpenVINO: diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst index 75d1882b8cee89..75c7ba90db7e76 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks.rst @@ -64,7 +64,7 @@ implemented in your solutions. Click the buttons below to see the chosen benchma :outline: :expand: - :material-regular:`bar_chart;1.4em` OVMS for GenAI (incoming) + :material-regular:`bar_chart;1.4em` OVMS for GenAI diff --git a/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst b/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst index c55d3f44451f1c..4bf0b3a0acb19a 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst @@ -58,11 +58,11 @@ Performance Information F.A.Q. - Hugginface - Causal Decoder-only - 2048 - * - `Llama-2-7b-chat `__ + * - `Llama-2-7b-chat `__ - Meta AI - Auto regressive language - 4096 - * - `Llama-3-8b `__ + * - `Llama-3-8b `__ - Meta AI - Auto regressive language - 8192 @@ -74,7 +74,7 @@ Performance Information F.A.Q. - Huggingface - Auto regressive language - 4096 - * - `Stable-Diffusion-V1-5 `__ + * - `Stable-Diffusion-V1-5 `__ - Hugginface - Latent Diffusion Model - 77 @@ -118,7 +118,7 @@ Performance Information F.A.Q. - YOLO V5 Medium - object detection - 640x640 - * - `yolov8n `__ + * - `yolov8n `__ - Yolov8nano - object detection - 608x608 diff --git a/docs/articles_en/about-openvino/release-notes-openvino.rst b/docs/articles_en/about-openvino/release-notes-openvino.rst index 4bd0b5d32c0f0e..6685a4325d57fe 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino.rst @@ -943,7 +943,7 @@ Previous 2024 releases deployed in an arbitrary path without any code changes. * KServe REST API support has been extended to properly handle the string format in JSON body, just like the binary format compatible with NVIDIA Triton™. - * `A demo showcasing a full RAG algorithm `__ + * `A demo showcasing a full RAG algorithm `__ fully delegated to the model server has been added. **Neural Network Compression Framework** @@ -1000,7 +1000,7 @@ Previous 2024 releases * `RMBG background removal `__ * `AnimateAnyone: pose guided image to video generation `__ * `LLaVA-Next visual-language assistant `__ - * `TripoSR: single image 3d reconstruction `__ + * `TripoSR: single image 3d reconstruction `__ * `RAG system with OpenVINO and LangChain `__ *Known Issues* @@ -1309,7 +1309,7 @@ Discontinued in 2024 * `Accuracy Checker `__. * `Post-Training Optimization Tool `__ (POT). Neural Network Compression Framework (NNCF) should be used instead. - * A `Git patch `__ + * A `Git patch `__ for NNCF integration with `huggingface/transformers `__. The recommended approach is to use `huggingface/optimum-intel `__ for applying NNCF optimization on top of models from Hugging Face. @@ -1360,25 +1360,25 @@ Deprecated and to be removed in the future * See alternative: `PaddleOCR with OpenVINO™ `__, * See alternative: `Handwritten Text Recognition Demo `__ - * `Image In-painting with OpenVINO™ `__ + * `Image In-painting with OpenVINO™ `__ * See alternative: `Image Inpainting Python Demo `__ - * `Interactive Machine Translation with OpenVINO `__ + * `Interactive Machine Translation with OpenVINO `__ * See alternative: `Machine Translation Python* Demo `__ - * `Open Model Zoo Tools Tutorial `__ + * `Open Model Zoo Tools Tutorial `__ * No alternatives, demonstrates deprecated tools. - * `Super Resolution with OpenVINO™ `__ + * `Super Resolution with OpenVINO™ `__ * See alternative: `Super Resolution with PaddleGAN and OpenVINO `__ * See alternative: `Image Processing C++ Demo `__ - * `Image Colorization with OpenVINO Tutorial `__ - * `Interactive Question Answering with OpenVINO™ `__ + * `Image Colorization with OpenVINO Tutorial `__ + * `Interactive Question Answering with OpenVINO™ `__ * See alternative: `BERT Question Answering Embedding Python* Demo `__ * See alternative: `BERT Question Answering Python* Demo `__ @@ -1387,37 +1387,37 @@ Deprecated and to be removed in the future * See alternative: `Security Barrier Camera C++ Demo `__ - * `The attention center model with OpenVINO™ `_ - * `Image Generation with DeciDiffusion `_ - * `Image generation with DeepFloyd IF and OpenVINO™ `_ - * `Depth estimation using VI-depth with OpenVINO™ `_ + * `The attention center model with OpenVINO™ `_ + * `Image Generation with DeciDiffusion `_ + * `Image generation with DeepFloyd IF and OpenVINO™ `_ + * `Depth estimation using VI-depth with OpenVINO™ `_ * `Instruction following using Databricks Dolly 2.0 and OpenVINO™ `_ * See alternative: `LLM Instruction-following pipeline with OpenVINO `__ - * `Image generation with FastComposer and OpenVINO™ `__ + * `Image generation with FastComposer and OpenVINO™ `__ * `Video Subtitle Generation with OpenAI Whisper `__ * See alternative: `Automatic speech recognition using Distil-Whisper and OpenVINO `__ - * `Introduction to Performance Tricks in OpenVINO™ `__ - * `Speaker Diarization with OpenVINO™ `__ - * `Subject-driven image generation and editing using BLIP Diffusion and OpenVINO `__ - * `Text Prediction with OpenVINO™ `__ - * `Training to Deployment with TensorFlow and OpenVINO™ `__ - * `Speech to Text with OpenVINO™ `__ - * `Convert and Optimize YOLOv7 with OpenVINO™ `__ - * `Quantize Data2Vec Speech Recognition Model using NNCF PTQ API `__ + * `Introduction to Performance Tricks in OpenVINO™ `__ + * `Speaker Diarization with OpenVINO™ `__ + * `Subject-driven image generation and editing using BLIP Diffusion and OpenVINO `__ + * `Text Prediction with OpenVINO™ `__ + * `Training to Deployment with TensorFlow and OpenVINO™ `__ + * `Speech to Text with OpenVINO™ `__ + * `Convert and Optimize YOLOv7 with OpenVINO™ `__ + * `Quantize Data2Vec Speech Recognition Model using NNCF PTQ API `__ * See alternative: `Quantize Speech Recognition Models with accuracy control using NNCF PTQ API `__ - * `Semantic segmentation with LRASPP MobileNet v3 and OpenVINO `__ - * `Video Recognition using SlowFast and OpenVINO™ `__ + * `Semantic segmentation with LRASPP MobileNet v3 and OpenVINO `__ + * `Video Recognition using SlowFast and OpenVINO™ `__ * See alternative: `Live Action Recognition with OpenVINO™ `__ - * `Semantic Segmentation with OpenVINO™ using Segmenter `__ - * `Programming Language Classification with OpenVINO `__ + * `Semantic Segmentation with OpenVINO™ using Segmenter `__ + * `Programming Language Classification with OpenVINO `__ * `Stable Diffusion Text-to-Image Demo `__ * See alternative: `Stable Diffusion v2.1 using Optimum-Intel OpenVINO and multiple Intel Hardware `__ @@ -1426,10 +1426,10 @@ Deprecated and to be removed in the future * See alternative: `Stable Diffusion v2.1 using Optimum-Intel OpenVINO and multiple Intel Hardware `__ - * `Image generation with Segmind Stable Diffusion 1B (SSD-1B) model and OpenVINO `__ - * `Data Preparation for 2D Medical Imaging `__ - * `Train a Kidney Segmentation Model with MONAI and PyTorch Lightning `__ - * `Live Inference and Benchmark CT-scan Data with OpenVINO™ `__ + * `Image generation with Segmind Stable Diffusion 1B (SSD-1B) model and OpenVINO `__ + * `Data Preparation for 2D Medical Imaging `__ + * `Train a Kidney Segmentation Model with MONAI and PyTorch Lightning `__ + * `Live Inference and Benchmark CT-scan Data with OpenVINO™ `__ * See alternative: `Quantize a Segmentation Model and Show Live Inference `__ @@ -1458,7 +1458,7 @@ are available on request. Intel technologies' features and benefits depend on system configuration and may require enabled hardware, software or service activation. Learn more at -`www.intel.com `__ +`www.intel.com `__ or from the OEM or retailer. No computer system can be absolutely secure. diff --git a/docs/articles_en/documentation/legacy-features.rst b/docs/articles_en/documentation/legacy-features.rst index f859a3a4572f88..2457d28cf24c15 100644 --- a/docs/articles_en/documentation/legacy-features.rst +++ b/docs/articles_en/documentation/legacy-features.rst @@ -96,7 +96,7 @@ Discontinued: | *New solution:* API 2.0 launched in OpenVINO 2022.1 | *Old solution:* discontinued with OpenVINO 2024.0 - | `The last version supporting API 1.0 `__ + | `2023.2 is the last version supporting API 1.0 `__ .. dropdown:: Compile tool diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst index b5d3c08b39f480..fb9f41c755d4fb 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst @@ -120,7 +120,7 @@ Here are code examples of how to use these methods with different model formats: For more details on conversion, refer to the :doc:`guide <[legacy]-supported-model-formats/[legacy]-convert-tensorflow>` - and an example `tutorial `__ + and an example `tutorial `__ on this topic. * The ``read_model()`` and ``compile_model()`` methods: @@ -592,7 +592,7 @@ to OpenVINO IR or ONNX before running inference should be considered the default OpenVINO versions of 2023 are mostly compatible with the old instructions, through a deprecated MO tool, installed with the deprecated OpenVINO Developer Tools package. - `OpenVINO 2023.0 `__ is the last + `OpenVINO 2023.0 `__ is the last release officially supporting the MO conversion process for the legacy formats. diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst index 711a060b7467b8..7880b261c80b81 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst @@ -14,7 +14,7 @@ Converting an ONNX Faster R-CNN Model The instructions below are applicable **only** to the Faster R-CNN model converted to the ONNX file format from the `maskrcnn-benchmark model `__: -1. Download the pretrained model file from `onnx/models `__ (commit-SHA: 8883e49e68de7b43e263d56b9ed156dfa1e03117). +1. Download the pretrained model file from `onnx/models `__ (commit-SHA: 8883e49e68de7b43e263d56b9ed156dfa1e03117). 2. Generate the Intermediate Representation of the model, by changing your current working directory to the model conversion API installation directory, and running model conversion with the following parameters: diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst index 84392e92e620d2..4c10c941c7fb47 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst @@ -12,7 +12,7 @@ Converting an ONNX GPT-2 Model This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. -`Public pre-trained GPT-2 model `__ is a large +`Public pre-trained GPT-2 model `__ is a large transformer-based language model with a simple objective: predict the next word, given all of the previous words within some text. Downloading the Pre-Trained Base GPT-2 Model diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst index de3af8ce5175f0..f1ee885dae0b26 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst @@ -20,7 +20,7 @@ Downloading the Pre-trained QuartzNet Model To download the pre-trained model, refer to the `NeMo Speech Models Catalog `__. Here are the instructions on how to obtain QuartzNet in ONNX format. -1. Install the NeMo toolkit, using the `instructions `__. +1. Install the NeMo toolkit, using the `instructions `__. 2. Run the following code: diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst index 4f33e510a40267..ad646568aed598 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst @@ -44,7 +44,7 @@ For UNIX-like systems, you can use ``wget``: The link was taken from ``setup.sh`` in the ``speech_recoginitin/rnnt`` subfolder. You will get exactly the same weights as -if you were following the `guide `__. +if you were following the `guide `__. **Step 4**. Install required Python packages: diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst index 955d5418d37270..2bcb6fde9b833b 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst @@ -59,7 +59,7 @@ To convert such TensorFlow model, run the `mo` script with a path to the MetaGra 3. **SavedModel format**. In this case, a model consists of a special directory with a ``.pb`` file -and several subfolders: ``variables``, ``assets``, and ``assets.extra``. For more information about the SavedModel directory, refer to the `README `__ file in the TensorFlow repository. +and several subfolders: ``variables``, ``assets``, and ``assets.extra``. For more information about the SavedModel directory, refer to the `README `__ file in the TensorFlow repository. To convert such TensorFlow model, run the ``mo`` script with a path to the SavedModel directory: .. code-block:: sh diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst index fc78b12640771a..3d2365f45ffe3b 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst @@ -160,7 +160,7 @@ It is important to mention that sometimes it seems like transformation cannot be because the actual values of inputs or shapes are needed. In fact, manipulations of shapes or values can be implemented using operations that are added to the graph. Consider the ``extensions/front/onnx/flattenONNX_to_reshape.py`` transformation, which replaces an ONNX -`Flatten `__ operation with a sub-graph of operations performing +`Flatten `__ operation with a sub-graph of operations performing the following (when ``axis`` is not equal to 0 and 1): 1. Calculate a shape of the ``Flatten`` input tensor, using the :doc:`ShapeOf <../../openvino-ir-format/operation-sets/operation-specs/shape/shape-of-3>` operation. diff --git a/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst b/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst index 2d5598a5eb8e9d..3959ebefb09a4a 100644 --- a/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst +++ b/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst @@ -580,7 +580,7 @@ Building OpenVINO™ Security Add-on depends on OpenVINO™ Model Server docker 1. Download the `OpenVINO™ Model Server software `__ -2. Build the `OpenVINO™ Model Server Docker images `__ +2. Build the `OpenVINO™ Model Server Docker images `__ .. code-block:: sh diff --git a/docs/articles_en/documentation/openvino-ecosystem/openvino-training-extensions.rst b/docs/articles_en/documentation/openvino-ecosystem/openvino-training-extensions.rst index a7a81acd9ba3a7..8a5bd91f9c1b7b 100644 --- a/docs/articles_en/documentation/openvino-ecosystem/openvino-training-extensions.rst +++ b/docs/articles_en/documentation/openvino-ecosystem/openvino-training-extensions.rst @@ -32,9 +32,9 @@ If the results are unsatisfactory, add datasets and perform the same steps, star OpenVINO Training Extensions Components ####################################### -* `OpenVINO Training Extensions API `__ +* `OpenVINO Training Extensions API `__ * `OpenVINO Training Extensions CLI `__ -* `OpenVINO Training Extensions Algorithms `__ +* `OpenVINO Training Extensions Algorithms `__ Tutorials ######### diff --git a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst index 6ba9e0a9b60f52..9451fabd6219d8 100644 --- a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst +++ b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst @@ -35,7 +35,7 @@ The goal of Low Precision Transformations (LPT) is to transform a quantized mode As result, operation input tensor precisions will be changed from original to low precision and operations can be inferred by OpenVINO™ plugin in low precision. -For a more detailed description on how to quantize a model, see the `Low precision tools <#low-precision-tools>`__ section below. For more information about model quantization, refer to **Brief History of Lower Precision in Deep Learning** section in `this whitepaper `__. +For a more detailed description on how to quantize a model, see the `Low precision tools <#low-precision-tools>`__ section below. For more information about model quantization, refer to **Brief History of Lower Precision in Deep Learning** section in `this whitepaper `__. Input model requirements ######################## diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/infrastructure/loop-5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/infrastructure/loop-5.rst index 5cc1b024f158b1..f02c5414ac4369 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/infrastructure/loop-5.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/infrastructure/loop-5.rst @@ -11,7 +11,7 @@ Loop **Category**: *Infrastructure* **Short description**: *Loop* operation performs recurrent execution of the network, which is described in the ``body``, iterating through the data. -The operation has similar semantic to the ONNX Loop `operation `__. +The operation has similar semantic to the ONNX Loop `operation `__. **Detailed description** @@ -73,7 +73,7 @@ Loop operation description in the IR also has several special sections: ``body`` 1. The body operation getting an input from the main graph should have an entry in the ``port_map`` section of the Loop operation. These edges connect input ports of the Loop with the body ``Parameter``\ s. 2. Input tensors to the Loop can be sliced along a specified axis, the Loop can iterates over all sliced parts. The corresponding ``input`` entry in the ``port_map`` should have ``axis`` attribute specifying the axis to slice. Therefore, inputs to the Loop operation corresponding to ``input`` entries in the ``port_map`` without ``axis`` attribute are used "as is" (without slicing). 3. The body operation producing tensor to be used in the subsequent iterations (like in RNN models) should have a back edge described in the ``back_edges`` section of the operation. The back edge connects the respective body ``Parameter`` and ``Result`` operations. For such a case the Loop operation node provides input for the first iteration, while corresponding Loop operation output produces the tensor computed during the last iteration. -4. Output tensors produced by a particular body operation across all iterations can be concatenated and returned as a Loop operation output (this is a "scan output" according to the ONNX* Loop operation `specification `__ ). The corresponding ``output`` entry in the ``port_map`` should have ``axis`` attribute specifying the axis to concatenate. Therefore, outputs from operations corresponding to ``output`` entries in the ``port_map`` without ``axis`` attribute are returned "as is" (without concatenation). +4. Output tensors produced by a particular body operation across all iterations can be concatenated and returned as a Loop operation output (this is a "scan output" according to the ONNX* Loop operation `specification `__ ). The corresponding ``output`` entry in the ``port_map`` should have ``axis`` attribute specifying the axis to concatenate. Therefore, outputs from operations corresponding to ``output`` entries in the ``port_map`` without ``axis`` attribute are returned "as is" (without concatenation). 5. There is one body ``Parameter`` operation not connected through the ``port_map``. This is a "current iteration" input. The Loop operation is responsible for providing the appropriate value for each iteration. 6. Connection of nodes inside the Loop body with the main graph should be done through ``Parameter`` and ``Result`` body operations. No other ways to connect graphs are allowed. diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-cell-3.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-cell-3.rst index 28dbec46289f89..f58418ee923a8b 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-cell-3.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-cell-3.rst @@ -64,7 +64,7 @@ GRUCell * *linear_before_reset* * **Description**: *linear_before_reset* flag denotes if the layer behaves according to the modification - of *GRUCell* described in the formula in the `ONNX documentation `__. + of *GRUCell* described in the formula in the `ONNX documentation `__. * **Range of values**: true or false * **Type**: ``boolean`` * **Default value**: false diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-sequence-5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-sequence-5.rst index 37c70087e121ea..f9b9a5ece850ec 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-sequence-5.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-sequence-5.rst @@ -19,7 +19,7 @@ represents a sequence of GRU cells. The sequence can be connected differently de ``direction`` attribute that specifies the direction of traversing of input data along sequence dimension or specifies whether it should be a bidirectional sequence. The most of the attributes are in sync with the specification of ONNX GRU operator defined -`GRUCell `__ +`GRUCell `__ **Attributes** @@ -69,7 +69,7 @@ are in sync with the specification of ONNX GRU operator defined * *linear_before_reset* * **Description**: *linear_before_reset* flag denotes if the layer behaves according to the modification - of *GRUCell* described in the formula in the `ONNX documentation `__. + of *GRUCell* described in the formula in the `ONNX documentation `__. * **Range of values**: True or False * **Type**: ``boolean`` * **Default value**: False diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst index c00b4c819cc66a..164033bdd2831c 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst @@ -14,7 +14,7 @@ LSTMSequence **Detailed description** -A single cell in the sequence is implemented in the same way as in :doc:`LSTM Cell ` operation. *LSTMSequence* represents a sequence of LSTM cells. The sequence can be connected differently depending on ``direction`` attribute that specifies the direction of traversing of input data along sequence dimension or specifies whether it should be a bidirectional sequence. The most of the attributes are in sync with the specification of ONNX LSTM operator defined `LSTMCell `__ . +A single cell in the sequence is implemented in the same way as in :doc:`LSTM Cell ` operation. *LSTMSequence* represents a sequence of LSTM cells. The sequence can be connected differently depending on ``direction`` attribute that specifies the direction of traversing of input data along sequence dimension or specifies whether it should be a bidirectional sequence. The most of the attributes are in sync with the specification of ONNX LSTM operator defined `LSTMCell `__ . **Attributes** diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/rnn-sequence-5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/rnn-sequence-5.rst index fc9829dd999bda..a3dfc062de2dcd 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/rnn-sequence-5.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/rnn-sequence-5.rst @@ -14,7 +14,7 @@ RNNSequence **Detailed description** -A single cell in the sequence is implemented in the same way as in :doc:`RNNCell ` operation. *RNNSequence* represents a sequence of RNN cells. The sequence can be connected differently depending on `direction` attribute that specifies the direction of traversing of input data along sequence dimension or specifies whether it should be a bidirectional sequence. The most of the attributes are in sync with the specification of ONNX RNN operator defined `RNNCell `__. +A single cell in the sequence is implemented in the same way as in :doc:`RNNCell ` operation. *RNNSequence* represents a sequence of RNN cells. The sequence can be connected differently depending on `direction` attribute that specifies the direction of traversing of input data along sequence dimension or specifies whether it should be a bidirectional sequence. The most of the attributes are in sync with the specification of ONNX RNN operator defined `RNNCell `__. **Attributes** diff --git a/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst b/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst index dc43881780b1e6..e10a67fddadb53 100644 --- a/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst +++ b/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst @@ -37,7 +37,7 @@ Below are the instructions on how to install the OpenCL packages on supported Li and install the apt package `ocl-icd-libopencl1` with the OpenCl ICD loader. Alternatively, you can add the apt repository by following the - `installation guide `__. + `installation guide `__. Then install the `ocl-icd-libopencl1`, `intel-opencl-icd`, `intel-level-zero-gpu` and `level-zero` apt packages: diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst b/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst index 6326513fa3cea1..cd3fd41fed03e0 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst @@ -119,7 +119,7 @@ to see if your case needs any of them. .. code-block:: python - from openvino import get_cmake_path + from openvino.utils import get_cmake_path cmake_path = get_cmake_path() For detailed instructions on how to use these configurations in your build setup, check out the diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst b/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst index 0ff1b95c8eb212..475f623ef86598 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst @@ -108,6 +108,6 @@ Additional Resources - `Official Yocto Project documentation `__ - `BitBake Tool `__ - `Poky `__ -- `Meta-intel `__ +- `Meta-intel `__ - `Meta-openembedded `__ - `Meta-clang `__ \ No newline at end of file diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst index 7f220111f64b98..2476a0423e30e1 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst @@ -31,8 +31,8 @@ some examples of popular Generative AI scenarios: To write such pipelines, you can follow the examples provided as part of OpenVINO: -* `OpenVINO Latent Consistency Model C++ image generation pipeline `__ -* `OpenVINO Stable Diffusion (with LoRA) C++ image generation pipeline `__ +* `OpenVINO Latent Consistency Model C++ image generation pipeline `__ +* `OpenVINO Stable Diffusion (with LoRA) C++ image generation pipeline `__ To perform inference, models must be first converted to OpenVINO IR format using Hugging Face Optimum-Intel API. diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/ov-tokenizers.rst b/docs/articles_en/learn-openvino/llm_inference_guide/ov-tokenizers.rst index d6e23b3791d001..2064aa843a93d8 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/ov-tokenizers.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/ov-tokenizers.rst @@ -336,7 +336,7 @@ Additional Resources * `OpenVINO Tokenizers repo `__ * `OpenVINO Tokenizers Notebook `__ -* `Text generation C++ samples that support most popular models like LLaMA 2 `__ +* `Text generation C++ samples that support most popular models like LLaMA 3 `__ * `OpenVINO GenAI Repo `__ diff --git a/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst b/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst index 19c4a013c54aae..390fe00605f2c6 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst @@ -245,6 +245,13 @@ There are several options for setting the number of inference iterations: The more iterations a model runs, the better the statistics will be for determining average latency and throughput. +Maximum inference rate +++++++++++++++++++++++ + +By default, the benchmarking app will run inference at maximum rate based on device capabilities. +The maximum inferance rate can be configured by ``-max_irate `` option. +Tweaking this value allow better accuracy in power usage measurement by limiting the number of executions. + Inputs ++++++++++++++++++++ @@ -337,7 +344,7 @@ following usage message: [Step 1/11] Parsing and validating input arguments [ INFO ] Parsing input parameters usage: benchmark_app.py [-h [HELP]] [-i PATHS_TO_INPUT [PATHS_TO_INPUT ...]] -m PATH_TO_MODEL [-d TARGET_DEVICE] - [-hint {throughput,cumulative_throughput,latency,none}] [-niter NUMBER_ITERATIONS] [-t TIME] [-b BATCH_SIZE] [-shape SHAPE] + [-hint {throughput,cumulative_throughput,latency,none}] [-niter NUMBER_ITERATIONS] [-max_irate MAXIMUM_INFERENCE_RATE] [-t TIME] [-b BATCH_SIZE] [-shape SHAPE] [-data_shape DATA_SHAPE] [-layout LAYOUT] [-extensions EXTENSIONS] [-c PATH_TO_CLDNN_CONFIG] [-cdir CACHE_DIR] [-lfile [LOAD_FROM_FILE]] [-api {sync,async}] [-nireq NUMBER_INFER_REQUESTS] [-nstreams NUMBER_STREAMS] [-inference_only [INFERENCE_ONLY]] [-infer_precision INFER_PRECISION] [-ip {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}] @@ -536,6 +543,9 @@ following usage message: 'none': no device performance mode will be set. Using explicit 'nstreams' or other device-specific options, please set hint to 'none' -niter Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device. + -max_irate Optional. Maximum inference rate by frame per second. + If not specified, default value is 0, the inference will run at maximium rate depending on a device capabilities. + Tweaking this value allow better accuracy in power usage measurement by limiting the execution. -t Optional. Time in seconds to execute topology. Input shapes diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst index 5033d24ba3785a..2a551d7aa44eb5 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst @@ -76,7 +76,7 @@ of optimization methods (`"compression"` section). :fragment: [nncf_congig] Here is a brief description of the required parameters of the Filter Pruning method. For a full description refer to the -`GitHub `__ page. +`GitHub `__ page. * ``pruning_init`` - initial pruning rate target. For example, value ``0.1`` means that at the begging of training, convolutions that can be pruned will have 10% of their filters set to zero. diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst index 41d43f7eea37d6..aa8e9cdabfda64 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst @@ -83,7 +83,7 @@ Accordingly, the code that loops over all available devices of the "GPU" type on Additional Resources #################### -* `OpenVINO™ Runtime API Tutorial <./../../notebooks/openvino-api-with-output.html>`__ -* `AUTO Device Tutorial <./../../notebooks/auto-device-with-output.html>`__ -* `GPU Device Tutorial <./../../notebooks/gpu-device-with-output.html>`__ -* `NPU Device Tutorial <./../../notebooks/hello-npu-with-output.html>`__ \ No newline at end of file +* `OpenVINO™ Runtime API Tutorial <../../notebooks/openvino-api-with-output.html>`__ +* `AUTO Device Tutorial <../../notebooks/auto-device-with-output.html>`__ +* `GPU Device Tutorial <../../notebooks/gpu-device-with-output.html>`__ +* `NPU Device Tutorial <../../notebooks/hello-npu-with-output.html>`__ \ No newline at end of file diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst index a440f77bc79984..46b541d84d4035 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst @@ -63,19 +63,19 @@ the model precision and the ratio of P-cores and E-cores. Then the default settings for low-level performance properties on Windows and Linux are as follows: -+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+ -| Property | Windows | Linux | -+======================================+========================================================================+====================================================================+ -| ``ov::num_streams`` | 1 | 1 | -+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+ -| ``ov::inference_num_threads`` | is equal to the number of P-cores or P-cores+E-cores on one socket | is equal to the number of P-cores or P-cores+E-cores on one socket | -+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+ -| ``ov::hint::scheduling_core_type`` | :ref:`Core Type Table of Latency Hint ` | :ref:`Core Type Table of Latency Hint ` | -+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+ -| ``ov::hint::enable_hyper_threading`` | No | No | -+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+ -| ``ov::hint::enable_cpu_pinning`` | No / Not Supported | Yes except using P-cores and E-cores together | -+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+ ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| Property | Windows | Linux | ++======================================+=======================================================================+=======================================================================+ +| ``ov::num_streams`` | 1 | 1 | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| ``ov::inference_num_threads`` | is equal to the number of P-cores or P-cores+E-cores on one numa node | is equal to the number of P-cores or P-cores+E-cores on one numa node | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| ``ov::hint::scheduling_core_type`` | :ref:`Core Type Table of Latency Hint ` | :ref:`Core Type Table of Latency Hint ` | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| ``ov::hint::enable_hyper_threading`` | No | No | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| ``ov::hint::enable_cpu_pinning`` | No / Not Supported | Yes except using P-cores and E-cores together | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ .. note:: @@ -91,6 +91,16 @@ Then the default settings for low-level performance properties on Windows and Li enabled on Linux. Such default settings are aligned with typical workloads running in the corresponding environments to guarantee better out-of-the-box (OOB) performance. +.. note:: + + Starting from 5th Gen Intel Xeon Processors, new microarchitecture enabled new sub-NUMA clusters + feature. A sub-NUMA cluster (SNC) can create two or more localization domains (numa nodes) + within a socket by BIOS configuration. + By default OpenVINO with latency hint uses single NUMA node for inference. Although such + behavior allows to achive best performance for most of the models, there might be corner + cases which require manual tuning of ``ov::num_streams`` and ``ov::hint::enable_hyper_threading parameters``. + Please find more detail about `Sub-NUMA Clustering `__ + Throughput Hint ##################### diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst index 78cf0632f61b2b..b4e1c7ac15afcc 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst @@ -19,7 +19,7 @@ For an in-depth description of the GPU plugin, see: - `GPU plugin developer documentation `__ - `OpenVINO Runtime GPU plugin source files `__ -- `Accelerate Deep Learning Inference with Intel® Processor Graphics `__ +- `Start AI Development with Intel `__ The GPU plugin is a part of the Intel® Distribution of OpenVINO™ toolkit. For more information on how to configure a system to use it, see the :doc:`GPU configuration <../../../get-started/configurations/configurations-intel-gpu>`. diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst index 26a09214ea462a..e45f51a37afa5e 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst @@ -119,9 +119,6 @@ The hints are used on the presumption that the application queries ``ov::optimal While an application is free to create more requests if needed (for example to support asynchronous inputs population) **it is very important to at least run the** ``ov::optimal_number_of_infer_requests`` **of the inference requests in parallel**. It is recommended for efficiency, or device utilization, reasons. -Keep in mind that ``ov::hint::PerformanceMode::LATENCY`` does not necessarily imply using single inference request. For example, multi-socket CPUs can deliver as many requests at the same minimal latency as the number of NUMA nodes in the system. -To make your application fully scalable, make sure to query the ``ov::optimal_number_of_infer_requests`` directly. - .. _prefer-async-api: Prefer Async API diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput/advanced_throughput_options.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput/advanced_throughput_options.rst index 7466d00efe5eb7..cad5633e11f85b 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput/advanced_throughput_options.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput/advanced_throughput_options.rst @@ -85,12 +85,12 @@ Number of Streams Considerations * Select the number of streams that is **less or equal** to the number of requests that the application would be able to run simultaneously. * To avoid wasting resources, the number of streams should be enough to meet the *average* parallel slack rather than the peak load. -* Use the `ov::streams::AUTO `__ as a more portable option (that also respects the underlying hardware configuration). +* Use the `ov::streams::AUTO <../../../../api/c_cpp_api/group__ov__runtime__cpp__prop__api.html#_CPPv44AUTO>`__ as a more portable option (that also respects the underlying hardware configuration). * It is very important to keep these streams busy, by running as many inference requests as possible (for example, start the newly-arrived inputs immediately): - * A bare minimum of requests to saturate the device can be queried as the `ov::optimal_number_of_infer_requests `__ of the ``ov:Compiled_Model``. + * A bare minimum of requests to saturate the device can be queried as the `ov::optimal_number_of_infer_requests <../../../../api/c_cpp_api/group__ov__runtime__cpp__prop__api.html#_CPPv432optimal_number_of_infer_requests>`__ of the ``ov:Compiled_Model``. -* *The maximum number of streams* for the device (per model) can be queried as the `ov::range_for_streams `__. +* *The maximum number of streams* for the device (per model) can be queried as the `ov::range_for_streams <../../../../api/c_cpp_api/group__ov__runtime__cpp__prop__api.html#_CPPv417range_for_streams>`__. Batch Size Considerations +++++++++++++++++++++++++ @@ -99,7 +99,7 @@ Batch Size Considerations * Otherwise (or if the number of "available" requests fluctuates), you may need to keep several instances of the network (reshaped to the different batch size) and select the properly sized instance in the runtime accordingly. -* For OpenVINO devices that implement a dedicated heuristic internally, the `ov::optimal_batch_size `__ is a *device* property (that accepts the actual model as a parameter) to query the recommended batch size for the model. +* For OpenVINO devices that implement a dedicated heuristic internally, the `ov::optimal_batch_size <../../../../api/c_cpp_api/group__ov__runtime__cpp__prop__api.html#_CPPv418optimal_batch_size>`__ is a *device* property (that accepts the actual model as a parameter) to query the recommended batch size for the model. A Few Device-specific Details diff --git a/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst b/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst index 86788b20249a3f..d00fd19c4d636d 100644 --- a/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst +++ b/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst @@ -139,5 +139,5 @@ sequences. You can find more examples demonstrating how to work with states in other articles: -* `LLM Chatbot notebook <../../notebooks/stable-zephyr-3b-chatbot-with-output.html>`__ +* `LLaVA-NeXT Multimodal Chatbot notebook <../../notebooks/llava-next-multimodal-chatbot-with-output.html>`__ * :doc:`Serving Stateful Models with OpenVINO Model Server <../../openvino-workflow/model-server/ovms_docs_stateful_models>` diff --git a/docs/articles_en/openvino-workflow/running-inference/string-tensors.rst b/docs/articles_en/openvino-workflow/running-inference/string-tensors.rst index 438c9ea9ec0bd3..3032add547f8a8 100644 --- a/docs/articles_en/openvino-workflow/running-inference/string-tensors.rst +++ b/docs/articles_en/openvino-workflow/running-inference/string-tensors.rst @@ -201,6 +201,6 @@ Additional Resources * Learn about the :doc:`basic steps to integrate inference in your application `. -* Use `OpenVINO tokenizers `__ to produce models that use string tensors to work with textual information as pre- and post-processing for the large language models. +* Use `OpenVINO tokenizers `__ to produce models that use string tensors to work with textual information as pre- and post-processing for the large language models. -* Check out `GenAI Samples `__ to see how string tensors are used in real-life applications. +* Check out `GenAI Samples `__ to see how string tensors are used in real-life applications. diff --git a/docs/articles_en/openvino-workflow/torch-compile.rst b/docs/articles_en/openvino-workflow/torch-compile.rst index 5bdb51a596d5d8..e5bc0ca901a5aa 100644 --- a/docs/articles_en/openvino-workflow/torch-compile.rst +++ b/docs/articles_en/openvino-workflow/torch-compile.rst @@ -288,7 +288,7 @@ PyTorch supports ``torch.compile`` officially on Windows from version 2.3.0 onwa For PyTorch versions below 2.3.0, the ``torch.compile`` feature is not supported on Windows officially. However, it can be accessed by running the following instructions: -1. Install the PyTorch nightly wheel file - `2.1.0.dev20230713 `__ , +1. Install the PyTorch nightly wheel file - `2.1.0.dev20230713 `__ , 2. Update the file at ``/Lib/site-packages/torch/_dynamo/eval_frames.py`` 3. Find the function called ``check_if_dynamo_supported()``: @@ -374,7 +374,7 @@ The ``torch.compile`` feature is part of PyTorch 2.0, and is based on: (PEP 523) to dynamically modify Python bytecode right before it is executed (PyTorch operators that cannot be extracted to FX graph are executed in the native Python environment). It maintains the eager-mode capabilities using - `Guards `__ to ensure the + `Guards `__ to ensure the generated graphs are valid. * **AOTAutograd** - generates the backward graph corresponding to the forward graph captured by TorchDynamo. diff --git a/docs/dev/ci/github_actions/overview.md b/docs/dev/ci/github_actions/overview.md index 8daf56a3a2252f..e65c085ede30d5 100644 --- a/docs/dev/ci/github_actions/overview.md +++ b/docs/dev/ci/github_actions/overview.md @@ -11,6 +11,7 @@ detailed instructions where necessary. * [Required workflows](#required-workflows) * [Workflow structure](#structure-of-the-workflows) * [Workflow and job organisation](#workflows-and-jobs-organisation) + * [Security considerations](#security-considerations) * [Finding results, artifacts and logs](#finding-results-artifacts-and-logs) * [Custom actions overview](#custom-actions) * [Machines overview](#machines) @@ -205,6 +206,10 @@ Overview of the [Linux workflow's](../../../../.github/workflows/ubuntu_22.yml) * All the steps are executed in the shell specified by the `shell` key under `defaults: run:` unless a shell is specified directly in a step. +### Security considerations + +Please consult [workflow security guidelines](security.md) before submitting a PR with GitHub Actions workflows changes. + ## Finding Results, Artifacts, and Logs ### Results diff --git a/docs/dev/ci/github_actions/security.md b/docs/dev/ci/github_actions/security.md new file mode 100644 index 00000000000000..d46cf6fd865c41 --- /dev/null +++ b/docs/dev/ci/github_actions/security.md @@ -0,0 +1,99 @@ +# Security best practices for GitHub Actions Workflows + +There are a few simple steps that we should follow to ensure our workflows are not vulnerable to common attacks. + +## Adjust `GITHUB_TOKEN` permissions + +Use the `permissions` key to make sure the `GITHUB_TOKEN` is configured with the least privileges for each job. + +Start with relatively safe permissions: + +```yaml +permissions: read-all +``` + +If you need more permissions, declare them at the job level when possible, for example: + +```yaml +jobs: + stale: + runs-on: ubuntu-latest + + # GITHUB_TOKEN will have only these permissions for + # `stale` job + permissions: + issues: write + pull-requests: write + + steps: + - uses: actions/stale@f7176fd3007623b69d27091f9b9d4ab7995f0a06 + +``` + +Check [GitHub documentation](https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/controlling-permissions-for-github_token) on this also. + +## Reduce the scope of environment variables + +Environment variables should be declared at the step level when possible (e.g. the variable is used only in this exact step). Only put variables on the job level when they're used by a few steps, and on the workflow level when they're used by most of the steps. + +Example from [the official GitHub documentation](https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables): + +```yaml +name: Greeting on variable day + +on: + workflow_dispatch + +# Workflow level variables. Avoid using these. +env: + DAY_OF_WEEK: Monday + +jobs: + greeting_job: + runs-on: ubuntu-latest + # Job level variables + env: + Greeting: Hello + steps: + - name: "Say Hello Mona it's Monday" + run: echo "$Greeting $First_Name. Today is $DAY_OF_WEEK!" + # Step level variables. Prefer this approach + env: + First_Name: Mona + +``` + +## Avoid using `pull_request_target` + +**Never** use `pull_request_target` trigger event for workflows. If you want to use `pull_request_target`, contact a member of the OpenVINO GitHub Actions task force first. Check [GitHub blog post](https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/) on this as well. + +## Handle secrets correctly + +**Never ever** use plain-text secrets hard-coded in GitHub Actions Workflow. If you need to use secrets, contact a member of the OpenVINO GitHub Actions task force first. + +## Be careful with user input. + +Most of GitHub context variables propagated from user input. That means they should be treated as an untrusted and potentially malicious. There are some tactics you can use to mitigate the risk: +- Instead of using inline scripts, create an action and pass the variable as an argument +- Put the value into an environment variable for the step, and use the variable in the script + +More details are available in [this](https://securitylab.github.com/resources/github-actions-untrusted-input/) blog post. + +## Pin versions for GitHub Actions + +When using third-party actions, pin the version with a commit hash rather than a tag to shield your workflow from potential supply-chain compromise. + +For example, instead of this: + +```yaml +uses: actions/checkout@v4.2.2 +``` + +use this: + +```yaml +uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 +``` + +## Further reading +Follow general [recommendations from GitHub itself](https://docs.github.com/en/actions/security-for-github-actions/security-guides/security-hardening-for-github-actions) diff --git a/docs/sphinx_setup/_static/html/modal.html b/docs/sphinx_setup/_static/html/modal.html index ac425599b821ce..38eb673824f97e 100644 --- a/docs/sphinx_setup/_static/html/modal.html +++ b/docs/sphinx_setup/_static/html/modal.html @@ -11,9 +11,6 @@

Configure Graphs

-
- Clear All -
diff --git a/docs/sphinx_setup/_static/html/modalLLM.html b/docs/sphinx_setup/_static/html/modalLLM.html index e3395a16931188..37b569d0bd4078 100644 --- a/docs/sphinx_setup/_static/html/modalLLM.html +++ b/docs/sphinx_setup/_static/html/modalLLM.html @@ -11,9 +11,6 @@

Configure Graphs

-
- Clear All -
diff --git a/docs/sphinx_setup/_static/js/graphs.js b/docs/sphinx_setup/_static/js/graphs.js index 4d621ce0780261..7171aed374dd99 100644 --- a/docs/sphinx_setup/_static/js/graphs.js +++ b/docs/sphinx_setup/_static/js/graphs.js @@ -1,11 +1,180 @@ -// =================== ADDITIONAL OUTPUT CONFIG ========================= +// =================== GENERAL OUTPUT CONFIG ========================= + +class Filter { + // param: GraphData[], networkModels[] + static FilterByNetworkModel(graphDataArr, networkModels) { + const optionMap = new Map(); + networkModels.map((model) => graphDataArr.filter((graphData => graphData.Model === model))) + .flat(1) + .forEach(item => optionMap.set(item.Platform, item)); + return Array.from(optionMap.values()); + } + // param: GraphData[], ieType + static ByIeTypes(graphDataArr, ieTypes) { + const optionMap = new Map(); + graphDataArr + .filter(graphData => ieTypes.includes(graphData.PlatformType)) + .forEach(item => optionMap.set(item.Platform, item)); + return Array.from(optionMap.values()); + } + // param: GraphData[], ieType, networkModels + static ByTypesAndModels(graphDataArr, ieTypes, models) { + return Array.from( + graphDataArr + .filter(({ PlatformType, Model }) => ieTypes.includes(PlatformType) && models.includes(Model)) + .reduce((map, item) => map.set(item.Platform, item), new Map()) + .values() + ); + } + // param: GraphData[], clientPlatforms + static ByIeKpis(graphDataArr, clientPlatforms) { + return Array.from( + graphDataArr.reduce((kpiSet, data) => { + if (clientPlatforms.some(platformName => data.Platform.includes(platformName))) { + Object.keys(data.Parameters).forEach(key => kpiSet.add(key)); + } + return kpiSet; + }, new Set()) + ); + } + // param: GraphData[] + static getParameters(graphDataArr) { + var parameters = [] + graphDataArr.filter((data) => { + for (var key in data.Parameters) { + if (!parameters.includes(Graph.capitalizeFirstLetter(key))) parameters.push(Graph.capitalizeFirstLetter(key)) + } + }) + return parameters; + } + // param: GraphData[] + static getIeTypes(graphDataArr) { + var kpis = [] + graphDataArr.filter((data) => { + for (var key in data.Parameters) { + if (!kpis.includes(Graph.capitalizeFirstLetter(key))) kpis.push(Graph.capitalizeFirstLetter(key)) + } + }) + return kpis; + } + // param: GraphData[], clientPlatforms[] + static ByClientPlatforms(graphDataArr, platformsArr) { + return graphDataArr.filter((data) => { + return platformsArr.includes(data.Platform) + }); + } +} + +class Modal { + static getPrecisionsLabels(graphDataArr) { + const kpisSet = new Set(); + graphDataArr.forEach(data => { + Object.values(data.Parameters).forEach(param => { + param.Precisions.forEach(precision => { + Object.keys(precision).forEach(key => { + kpisSet.add(key.toUpperCase()); + }); + }); + }); + }); + return Array.from(kpisSet); + } + + static getPrecisions(appConfig, labels) { + return labels.map((label) => { + var prec = appConfig.PrecisionsMap[label]; + if (prec !== undefined) { + return prec; + } + else { + return "no name"; + } + }); + } +} + +class Graph { + // functions to get unique keys + static getNetworkModels(graphDataArr) { + return Array.from(new Set(graphDataArr.map(obj => obj.Model))) + .sort((a, b) => a.localeCompare(b)); + } + static getIeTypes(graphDataArr) { + return Array.from(new Set(graphDataArr.map((obj) => obj.PlatformType))).sort((a, b) => a.localeCompare(b)); + } + + // param: GraphData[] + static getPlatformNames(graphDataArr) { + return graphDataArr.map((data) => data.Platform) + .sort((a, b) => a.localeCompare(b)); + } + + // param: GraphData[], engine: string, precisions: list + static getDatabyParameter(graphDataArr, engine, array) { + if (!Array.isArray(array[engine])) { + array[engine] = []; + } + array[engine].push(graphDataArr.Parameters[engine].Precisions); + return array; + } + + // this returns an object that is used to ender the chart + static getGraphConfig(engine, precisions, appConfig) { + return { + chartTitle: 'Throughput vs Latency', + iconClass: 'latency-icon', + datasets: precisions.map((precision) => appConfig.PrecisionData[engine][precision]), + unit: "None" + }; + } + + // param: GraphData[], parameterName: string, precisions: list + static getDatabyParameterOld(graphDataArr, parameterName, precisions) { + var array = []; + graphDataArr.forEach((item) => { + if (item.Parameters[parameterName] !== undefined) { + array.push(item.Parameters[parameterName].Precisions); + } + else { + var obj = {}; + precisions.forEach((prec) => { + obj[prec] = 0; + }) + array.push([obj]) + } + }) + return array; + + } + + // this returns an object that is used to ender the chart + static getGraphConfigOld(parameterName, item, precisions, appConfig) { + return { + chartTitle: Graph.capitalizeFirstLetter(parameterName), + iconClass: parameterName + '-icon', + unit: item.Parameters[parameterName]?.Unit, + datasets: precisions.map((precision) => appConfig.PrecisionData[precision]), + }; + } + static capitalizeFirstLetter(string) { + return string.charAt(0).toUpperCase() + string.slice(1); + } +} + +class ChartDisplay { + constructor(mode, numberOfCharts) { + this.mode = mode; + this.numberOfChartsInRow = numberOfCharts; + } +} $(document).ready(function () { - $('.ov-toolkit-benchmark-results').on('click', () => showModal("graph-data-ov.json")); - $('.ovms-toolkit-benchmark-results').on('click', () => showModal("graph-data-ovms.json")); - function clickBuildGraphs(graph, appConfig, networkModels, ieTypes, platforms, kpis, precisions) { - renderData(graph, appConfig, networkModels, ieTypes, platforms, kpis, precisions); + $('.ov-toolkit-benchmark-results').on('click', () => showModal("graph-data-ov.json", false)); + $('.ovms-toolkit-benchmark-results').on('click', () => showModal("graph-data-ovms.json", false)); + $('.ovms-toolkit-benchmark-llm-result').on('click', () => showModal("graph-data-ovms-genai.json", true)); + function clickBuildGraphs(graph, appConfig, networkModels, ieTypes, platforms, kpis, precisions, isLLM) { + renderData(graph, appConfig, networkModels, ieTypes, platforms, kpis, precisions, isLLM); $('.modal-footer').show(); $('#modal-display-graphs').show(); $('.edit-settings-btn').off('click').on('click', (event) => { @@ -34,16 +203,16 @@ $(document).ready(function () { $('body').css('overflow', 'auto'); } - function showModal(file) { + function showModal(file, isLLM) { $('body').css('overflow', 'hidden'); - fetch('../_static/benchmarks_files/data/'+ file) + fetch('../_static/benchmarks_files/data/' + file) .then((response) => response.json()) .then((jsonData) => { fetch('../_static/benchmarks_files/graph-config.json') .then((configResponse) => configResponse.json()) .then((appConfig) => { - renderModal(jsonData, appConfig) + renderModal(jsonData, appConfig, isLLM) }) }); } @@ -93,11 +262,12 @@ $(document).ready(function () { $('#build-graphs-btn').prop('disabled', true); } - function renderModal(graph, appConfig) { + function renderModal(graph, appConfig, isLLM) { + var modalPath = isLLM === true ? '../_static/html/modalLLM.html' : '../_static/html/modal.html' new Graph(graph); var networkModels = Graph.getNetworkModels(graph); var ieTypes = Graph.getIeTypes(graph); - fetch('../_static/html/modal.html').then((response) => response.text()).then((text) => { + fetch(modalPath).then((response) => response.text()).then((text) => { // generate and configure modal container var modal = $('
'); @@ -111,13 +281,13 @@ $(document).ready(function () { const models = networkModels.map((networkModel) => createCheckMark(networkModel, 'networkmodel')); modal.find('.models-column').append(models); - const selectAllModelsButton = createCheckMark('', 'networkmodel'); + const selectAllModelsButton = createCheckMark('', 'networkmodel', false , false); modal.find('.models-selectall').append(selectAllModelsButton); - const selectAllPlatformsButton = createCheckMark('', 'platform'); + const selectAllPlatformsButton = createCheckMark('', 'platform', false , false); modal.find('.platforms-selectall').append(selectAllPlatformsButton); - const precisions = Modal.getPrecisionsLabels(graph).map((precision) => createCheckMark(precision, 'precision', false)); + const precisions = Modal.getPrecisionsLabels(graph).map((precision) => createCheckMark(precision, 'precision', false , false)); modal.find('.precisions-column').append(precisions); selectAllCheckboxes(precisions); @@ -132,24 +302,20 @@ $(document).ready(function () { modal.find('#modal-display-graphs').hide(); modal.find('.ietype-column input').first().prop('checked', true); - const kpiLabels = Filter.getParameters(graph).map((parameter) => createCheckMark(parameter, 'kpi', false)); + const kpiLabels = Filter.getParameters(graph).map((parameter) => createCheckMark(parameter, 'kpi', false , true)); modal.find('.kpi-column').append(kpiLabels); $('body').prepend(modal); - preselectDefaultSettings(graph, modal, appConfig); - - //is not generic solution :( if (appConfig.DefaultSelections.platformTypes?.data?.includes('Select All')) { selectAllCheckboxes(iefilter); - }; + preselectDefaultSettings(graph, modal, appConfig); renderClientPlatforms(graph, modal); - $('.clear-all-btn').on('click', clearAll); $('#build-graphs-btn').on('click', () => { $('#modal-configure-graphs').hide(); - clickBuildGraphs(graph, appConfig, getSelectedNetworkModels(), getSelectedIeTypes(), getSelectedClientPlatforms(), getSelectedKpis(), Modal.getPrecisions(appConfig, getSelectedPrecisions())); + clickBuildGraphs(graph, appConfig, getSelectedNetworkModels(), getSelectedIeTypes(), getSelectedClientPlatforms(), getSelectedKpis(), Modal.getPrecisions(appConfig, getSelectedPrecisions()), isLLM); }); $('.modal-close').on('click', hideModal); $('.close-btn').on('click', hideModal); @@ -163,18 +329,18 @@ $(document).ready(function () { modal.find('.models-selectall input').on('click', function () { if ($(this).prop('checked')) selectAllCheckboxes(models); else deSelectAllCheckboxes(models); - + renderClientPlatforms(graph, modal) }); modal.find('.platforms-selectall input').on('click', function () { - if ($(this).prop('checked')) + if ($(this).prop('checked')) renderClientPlatforms(graph, modal) else { var enabledPlatforms = modal.find('.platforms-column .checkmark-container'); deSelectCheckbox(enabledPlatforms); }; - + }); modal.find('.models-column input').on('click', function () { @@ -223,49 +389,20 @@ $(document).ready(function () { precisions.prop('disabled', false); } - function clearAll() { - $('.modal-content-grid-container input:checkbox').each((index, object) => $(object).prop('checked', false)); - validatePrecisionSelection(); - validateSelections(); - } - function preselectDefaultSettings(graph, modal, appConfig) { - - const defaultSelections = appConfig.DefaultSelections; - - selectDefaultPlatformType(defaultSelections.platformTypes, graph, modal); - - applyPlatformFilters(defaultSelections.platformFilters, modal, graph); - - clearAllSettings(defaultSelections); - + selectDefaultPlatformType(appConfig.DefaultSelections.platformTypes, graph, modal); + clearAllSettings(appConfig.DefaultSelections); validateSelections(); validatePrecisionSelection(); } - function selectDefaultPlatformType(platformTypes, graph, modal) { if (!platformTypes) return; - const type = platformTypes.data[0]; $(`input[data-ietype="${type}"]`).prop('checked', true); renderClientPlatforms(graph, modal); } - - function applyPlatformFilters(platformFilters, modal, graph) { - if (!platformFilters) return; - - const filters = modal.find('.selectable-box-container').children('.selectable-box'); - filters.removeClass('selected'); - - platformFilters.data.forEach(selection => { - filters.filter(`[data-${platformFilters.name}="${selection}"]`).addClass('selected'); - }); - - renderClientPlatforms(graph, modal); - } - + function clearAllSettings(defaultSelections) { - clearAll(); Object.keys(defaultSelections).forEach(setting => { const { name, data } = defaultSelections[setting]; data.forEach(selection => { @@ -287,14 +424,15 @@ $(document).ready(function () { var platformNames = Graph.getPlatformNames(fPlatforms); $('.platforms-column .checkmark-container').remove(); - const clientPlatforms = platformNames.map((platform) => createCheckMark(platform, 'platform', true)); - + const clientPlatforms = platformNames.map((platform) => createCheckMark(platform, 'platform', true, false)); + var enabledPlatforms = filterPlatforms(graph, getSelectedIeTypes(), getSelectedNetworkModels()); enableCheckBoxes(clientPlatforms, enabledPlatforms); modal.find('.platforms-column').append(clientPlatforms); enableParmeters(graph, getSelectedClientPlatforms()); modal.find('.platforms-column input').on('click', validateSelections); + validateSelections(); } function enableParmeters(graph, clientPlatforms) { @@ -310,11 +448,12 @@ $(document).ready(function () { }) } - function createCheckMark(itemLabel, modelLabel, disabled) { + function createCheckMark(itemLabel, modelLabel, disabled, checked = false) { const item = $('