diff --git a/.github/actions/openvino_provider/action.yml b/.github/actions/openvino_provider/action.yml index dd1078bb0d4353..a17986f35d3723 100644 --- a/.github/actions/openvino_provider/action.yml +++ b/.github/actions/openvino_provider/action.yml @@ -177,7 +177,7 @@ runs: else ov_package_url=$(curl -s ${{ inputs.nightly_package_source }} | jq -r '.${{ inputs.platform }}_${{ inputs.arch }}') fi - cd ${{ inputs.install_dir || env.GITHUB_WORKSPACE }} + cd ${{ inputs.install_dir || github.workspace }} package_basename=$(basename $ov_package_url) wget $ov_package_url --progress=bar:force:noscroll -O $package_basename package_folder=${package_basename%.*} @@ -196,7 +196,7 @@ runs: uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: name: ${{ steps.openvino_s3_download.outputs.ov_artifact_name }} - path: ${{ steps.openvino_s3_download.outputs.ov_package_path }} + path: ${{ github.workspace }}/${{ steps.openvino_s3_download.outputs.ov_package_path }} if-no-files-found: 'error' - name: Get wheel diff --git a/.github/workflows/debian_10_arm.yml b/.github/workflows/debian_10_arm.yml index 73426222253adb..cf628d12c29b89 100644 --- a/.github/workflows/debian_10_arm.yml +++ b/.github/workflows/debian_10_arm.yml @@ -49,7 +49,7 @@ jobs: Docker: needs: Smart_CI if: "!needs.smart_ci.outputs.skip_workflow" - runs-on: aks-linux-16-cores-arm-docker-build + runs-on: aks-linux-4-cores-8gb-arm-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 volumes: @@ -75,7 +75,7 @@ jobs: if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-16-cores-32gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.debian_10_arm }}", "volumes": ["/mount:/mount"], "options": "-e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING"}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} event-name: ${{ github.event_name }} @@ -104,7 +104,7 @@ jobs: needs: [ Docker, Build, Smart_CI ] uses: ./.github/workflows/job_cxx_unit_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-8-cores-16gb-arm' image: ${{ fromJSON(needs.docker.outputs.images).ov_test.debian_10_arm }} affected-components: ${{ needs.smart_ci.outputs.affected_components }} os: 'debian_10' @@ -116,6 +116,8 @@ jobs: needs: [ Docker, Build, Smart_CI ] uses: ./.github/workflows/job_cpu_functional_tests.yml with: + # Additional investigation needed why CPU functional tests are failing on v6 VM size's version, + # so leave it as it is for now runner: 'aks-linux-16-cores-arm' image: ${{ fromJSON(needs.docker.outputs.images).ov_test.debian_10_arm }} python-version: '3.7' diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 10de6867c7d0e2..66ce9461f05fe8 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -53,7 +53,7 @@ jobs: Docker: needs: Smart_CI if: "!needs.smart_ci.outputs.skip_workflow" - runs-on: aks-linux-16-cores-arm-docker-build + runs-on: aks-linux-4-cores-8gb-arm-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 volumes: @@ -78,7 +78,7 @@ jobs: needs: [ Docker, Smart_CI ] uses: ./.github/workflows/job_build_linux.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-16-cores-32gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"], "options": "-e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING"}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} event-name: ${{ github.event_name }} @@ -105,7 +105,7 @@ jobs: if: ${{ 'false' }} uses: ./.github/workflows/job_debian_packages.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-4-cores-8gb-arm' image: 'openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04' Samples: @@ -113,7 +113,7 @@ jobs: if: fromJSON(needs.smart_ci.outputs.affected_components).samples uses: ./.github/workflows/job_samples_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-8-cores-16gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} @@ -123,7 +123,7 @@ jobs: if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API uses: ./.github/workflows/job_openvino_js.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-4-cores-8gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}"}' ONNX_Runtime: @@ -133,7 +133,7 @@ jobs: needs: [ Build, Smart_CI, Docker ] uses: ./.github/workflows/job_onnx_runtime.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-16-cores-32gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"], "options": "-e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING"}' sccache-azure-key-prefix: 'ubuntu20_aarch64_onnxruntime' @@ -142,7 +142,7 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_tokenizers.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-8-cores-16gb-arm' shell: bash container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} @@ -154,7 +154,7 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_cxx_unit_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-8-cores-16gb-arm' image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }} affected-components: ${{ needs.smart_ci.outputs.affected_components }} os: 'ubuntu_20_04' @@ -164,7 +164,7 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_python_unit_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-8-cores-16gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' @@ -174,7 +174,7 @@ jobs: needs: [ Build, Docker, Smart_CI, Openvino_tokenizers ] uses: ./.github/workflows/job_tensorflow_layer_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-16-cores-32gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' @@ -184,7 +184,7 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_pytorch_layer_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-16-cores-32gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' @@ -195,6 +195,8 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_cpu_functional_tests.yml with: + # Additional investigation needed why CPU functional tests are failing on v6 VM size's version, + # so leave it as it is for now runner: 'aks-linux-16-cores-arm' image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }} python-version: '3.11' @@ -207,7 +209,7 @@ jobs: needs: [ Build, Docker, Smart_CI, Openvino_tokenizers] uses: ./.github/workflows/job_tensorflow_models_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-16-cores-32gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}"}' model_scope: 'precommit' @@ -218,7 +220,7 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_pytorch_models_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-16-cores-32gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}"}' model_scope: 'precommit' diff --git a/.github/workflows/workflows_scans.yml b/.github/workflows/workflows_scans.yml new file mode 100644 index 00000000000000..0a293a4152b9a0 --- /dev/null +++ b/.github/workflows/workflows_scans.yml @@ -0,0 +1,38 @@ +name: GitHub Actions Workflows Scans +on: + workflow_dispatch: {} + push: + paths: + - '.github/workflows/**' + branches: + - 'master' + - 'releases/**' + pull_request: + paths: + - '.github/workflows/**' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: read-all + +jobs: + semgrep: + name: github_actions_workflows_scan/semgrep + runs-on: ubuntu-latest + if: ${{ github.repository_owner == 'openvinotoolkit' }} + + container: + image: semgrep/semgrep + + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + submodules: 'false' + sparse-checkout: .github/workflows + + - name: Semgrep scan + run: | + semgrep scan --error -j 8 --config "p/github-actions" .github/workflows/* diff --git a/docs/articles_en/assets/images/genai_main_diagram.svg b/docs/articles_en/assets/images/genai_main_diagram.svg new file mode 100644 index 00000000000000..b01cbd827acb3c --- /dev/null +++ b/docs/articles_en/assets/images/genai_main_diagram.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07ce964e115f1e3942cdf381f44b4dc6d466df62c70396306a4f241fb07ea3ed +size 392244 diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst index 9998b3989486d2..16290b17eca323 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst @@ -11,9 +11,16 @@ Inference with OpenVINO GenAI NPU inference of LLMs -This article provides reference code and guidance on running generative AI models, -using OpenVINO GenAI. Note that the base OpenVINO version will not work with these instructions, -make sure to :doc:`install OpenVINO GenAI <../../get-started/install-openvino/install-openvino-genai>`. +OpenVINO™ GenAI is a library of pipelines and methods, extending the OpenVINO runtime to work +with generative AI models more efficiently. This article provides reference code and guidance +on its usage. Note that the base OpenVINO version will not work with these instructions, +make sure to :doc:`install OpenVINO with GenAI <../../get-started/install-openvino/install-openvino-genai>`. + +.. image:: ../../assets/images/genai_main_diagram.svg + :align: center + :width: 500 + :alt: OpenVINO workflow diagram for convenience + | Here is sample code for several Generative AI use case scenarios. Note that these are very basic examples and may need adjustments for your specific needs, like changing the inference device. diff --git a/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation.rst b/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation.rst index 6ab924a61ef150..259f605d46c2f7 100644 --- a/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation.rst +++ b/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation.rst @@ -247,57 +247,50 @@ OpenVINO™ provides several debug capabilities: * Model can be visualized to image from the xDot format: -.. tab-set:: - - .. tab-item:: Python - :sync: py - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.py - :language: python - :fragment: [ov:visualize] - - .. tab-item:: C++ - :sync: cpp - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp - :language: cpp - :fragment: [ov:visualize] - - -.. code-block:: sh + .. tab-set:: - `ov::pass::VisualizeTree` can be parametrized via environment variables: + .. tab-item:: Python + :sync: py - OV_VISUALIZE_TREE_OUTPUT_SHAPES=1 - visualize shapes + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.py + :language: python + :fragment: [ov:visualize] - OV_VISUALIZE_TREE_OUTPUT_TYPES=1 - visualize types + .. tab-item:: C++ + :sync: cpp - OV_VISUALIZE_TREE_MIN_MAX_DENORMAL=1 - pretty denormal values + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp + :language: cpp + :fragment: [ov:visualize] - OV_VISUALIZE_TREE_RUNTIME_INFO=1 - print runtime information - OV_VISUALIZE_TREE_IO=1 - print I/O ports + ``ov::pass::VisualizeTree`` can be parametrized via environment variables: - OV_VISUALIZE_TREE_MEMBERS_NAME=1 - print member names + * ``OV_VISUALIZE_TREE_OUTPUT_SHAPES=1`` - visualize shapes + * ``OV_VISUALIZE_TREE_OUTPUT_TYPES=1`` - visualize types + * ``OV_VISUALIZE_TREE_MIN_MAX_DENORMAL=1`` - pretty denormal values + * ``OV_VISUALIZE_TREE_RUNTIME_INFO=1`` - print runtime information + * ``OV_VISUALIZE_TREE_IO=1`` - print I/O ports + * ``OV_VISUALIZE_TREE_MEMBERS_NAME=1`` - print member names * Also model can be serialized to IR: -.. tab-set:: + .. tab-set:: - .. tab-item:: Python - :sync: py + .. tab-item:: Python + :sync: py - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.py - :language: python - :fragment: [ov:serialize] + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.py + :language: python + :fragment: [ov:serialize] - .. tab-item:: C++ - :sync: cpp + .. tab-item:: C++ + :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp - :language: cpp - :fragment: [ov:serialize] + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp + :language: cpp + :fragment: [ov:serialize] Additional Resources @@ -306,5 +299,3 @@ Additional Resources * :doc:`Available Operation Sets <../../../documentation/openvino-ir-format/operation-sets/available-opsets>`. * :doc:`OpenVINO™ Runtime Extensibility Developer Guide <../../../documentation/openvino-extensibility>`. * :doc:`Transformations Developer Guide <../../../documentation/openvino-extensibility/transformation-api>`. - - diff --git a/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv index 0d16c5c4998329..fa5ae359fa45c0 100644 --- a/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv +++ b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv @@ -1,156 +1,95 @@ -Topology,Precision,Input Size,max rss memory,1st latency (ms),2nd latency (ms),2nd tok/sec -opt-125m-gptq,INT4-MIXED,32,965.9,29,7.7,129.87 -opt-125m-gptq,INT4-MIXED,1024,1507.9,113.1,7.8,128.21 -tiny-llama-1.1b-chat,INT4-MIXED,32,1831.8,46.5,16.7,59.88 -tiny-llama-1.1b-chat,INT4-MIXED,1024,1806.3,635,17.8,56.18 -qwen2-0.5b,INT4-MIXED,32,2551.7,61.4,18.3,54.64 -qwen2-0.5b,INT4-MIXED,1024,2976.6,356.1,19.2,52.08 -tiny-llama-1.1b-chat,INT8-CW,32,1987.4,56,21.6,46.30 -tiny-llama-1.1b-chat,INT8-CW,1024,2209.1,772.7,22.6,44.25 -qwen2-0.5b,INT8-CW,32,2484.9,57.3,22.8,43.86 -qwen2-0.5b,INT8-CW,1024,3102.5,407.1,23.9,41.84 -qwen2-1.5b,INT4-MIXED,32,4265.2,71.7,25.5,39.22 -qwen2-1.5b,INT4-MIXED,1024,4884.5,862.4,26.8,37.31 -dolly-v2-3b,INT4-MIXED,32,2401.3,89.6,27.5,36.36 -red-pajama-incite-chat-3b-v1,INT4-MIXED,32,2511.5,78.6,28.2,35.46 -phi-2,INT4-MIXED,32,2279.5,95.7,29.1,34.36 -minicpm-1b-sft,INT4-MIXED,31,2759.9,104.4,30.9,32.36 -phi-2,INT4-MIXED,32,2620.1,100.8,31,32.26 -stable-zephyr-3b-dpo,INT4-MIXED,30,2636.5,86.8,31.7,31.55 -dolly-v2-3b,INT4-MIXED,1024,3137.1,1782.9,32.2,31.06 -red-pajama-incite-chat-3b-v1,INT4-MIXED,1020,3118.5,1831.7,33.3,30.03 -red-pajama-incite-chat-3b-v1,INT4-MIXED,1024,2862.7,1821.1,33.5,29.85 -qwen2-1.5b,INT8-CW,32,4831.2,87,33.8,29.59 -opt-2.7b,INT4-MIXED,31,2898.3,73.2,33.9,29.50 -phi-2,INT4-MIXED,1024,2797.4,1887,34,29.41 -orca-mini-3b,INT4-MIXED,32,2877.8,100.3,35,28.57 -stablelm-3b-4e1t,INT4-MIXED,32,2669.4,94.7,35.3,28.33 -qwen2-1.5b,INT8-CW,1024,5455.8,1047.6,35.3,28.33 -minicpm-1b-sft,INT8-CW,31,3104.1,103.5,35.3,28.33 -phi-2,INT4-MIXED,1024,3039.8,1917.4,35.9,27.86 -stable-zephyr-3b-dpo,INT4-MIXED,946,3411.4,1695,37,27.03 -gemma-2b-it,INT4-MIXED,32,3991.7,116.1,37.9,26.39 -opt-2.7b,INT4-MIXED,937,3617.5,1764.9,38.2,26.18 -phi-3-mini-4k-instruct,INT4-MIXED,31,2935.3,111.6,38.2,26.18 -phi-3-mini-4k-instruct,INT4-MIXED,38,3102.4,134,38.4,26.04 -phi-3-mini-4k-instruct,INT4-MIXED,31,2986.1,114.1,38.9,25.71 -phi-3-mini-4k-instruct,INT4-MIXED,38,2977.4,131.1,39,25.64 -gemma-2b-it,INT4-MIXED,1024,4973.3,1249.2,39.7,25.19 -stablelm-3b-4e1t,INT4-MIXED,1024,3196.9,2045.4,39.9,25.06 -dolly-v2-3b,INT8-CW,32,3490.2,107.4,41.5,24.10 -red-pajama-incite-chat-3b-v1,INT8-CW,32,3457.9,105,42.5,23.53 -opt-2.7b,INT8-CW,31,3686.8,107.5,44.1,22.68 -phi-2,INT8-CW,32,3554.9,116.6,44.1,22.68 -phi-3-mini-4k-instruct,INT4-MIXED,1023,3390.7,2277.1,44.2,22.62 -phi-3-mini-4k-instruct,INT4-MIXED,1061,3643.6,2485,44.4,22.52 -phi-3-mini-4k-instruct,INT4-MIXED,1023,3516.4,2280.9,44.5,22.47 -phi-3-mini-4k-instruct,INT4-MIXED,1061,3537.2,2522.4,44.7,22.37 -orca-mini-3b,INT4-MIXED,1024,3557.3,1898.9,45,22.22 -minicpm-1b-sft,FP16,31,3814.4,97.9,45.4,22.03 -stablelm-3b-4e1t,INT8-CW,32,3486.9,100.5,46.1,21.69 -stable-zephyr-3b-dpo,INT8-CW,30,3516.7,101.9,46.1,21.69 -dolly-v2-3b,INT8-CW,1024,4265.9,2178.6,46.2,21.65 -red-pajama-incite-chat-3b-v1,INT8-CW,1020,3979.1,2219.7,47.2,21.19 -red-pajama-incite-chat-3b-v1,INT8-CW,1024,3975.5,2199.7,47.3,21.14 -opt-2.7b,INT8-CW,937,4358.6,1981.8,48.4,20.66 -phi-2,INT8-CW,1024,4058.1,2280.1,48.9,20.45 -gemma-2b-it,INT8-CW,32,4786.8,119.8,49.4,20.24 -chatglm3-6b,INT4-MIXED,32,4141.5,166.6,49.7,20.12 -stablelm-3b-4e1t,INT8-CW,1024,4054.8,2243.5,50.7,19.72 -stable-zephyr-3b-dpo,INT8-CW,946,4521.8,1816.4,51.3,19.49 -gemma-2b-it,INT8-CW,1024,5810.7,1580,51.3,19.49 -chatglm3-6b,INT4-MIXED,32,4651.4,164.7,51.6,19.38 -chatglm3-6b,INT4-MIXED,1024,4235.1,2818.7,52.3,19.12 -orca-mini-3b,INT8-CW,32,4162,109.2,53.3,18.76 -chatglm3-6b,INT4-MIXED,1024,4783.8,2869,54.4,18.38 -gpt-j-6b,INT4-MIXED,32,4667.3,176.7,56.3,17.76 -chatglm3-6b-gptq,INT4-MIXED,32,5369.4,173.9,58.9,16.98 -llama-2-7b-chat-hf,INT4-MIXED,32,4280,173.2,60.1,16.64 -phi-3-mini-4k-instruct,INT8-CW,31,4585.1,123,60.5,16.53 -phi-3-mini-4k-instruct,INT8-CW,38,4597,152,60.5,16.53 -chatglm2-6b,INT4-MIXED,32,4847.8,158.7,60.6,16.50 -vicuna-7b-v1.5,INT4-MIXED,32,4476.9,178.2,61.2,16.34 -chatglm3-6b-gptq,INT4-MIXED,1024,5217.6,2863.7,61.3,16.31 -mistral-7b-v0.1,INT4-MIXED,31,4413.6,194,61.7,16.21 -qwen2-7b,INT4-MIXED,32,7044.7,184.4,61.7,16.21 -mistral-7b-v0.1,INT4-MIXED,32,4427.6,193.3,61.8,16.18 -orca-mini-3b,INT8-CW,1024,4821.6,2239.1,62,16.13 -codegen25-7b,INT4-MIXED,32,4687.2,176.2,62.7,15.95 -chatglm2-6b,INT4-MIXED,1024,5165.9,3148,63,15.87 -llama-2-7b-gptq,INT4-MIXED,32,4632.8,175.2,63.4,15.77 -stablelm-7b,INT4-MIXED,32,5219.5,206.3,63.4,15.77 -qwen-7b-chat,INT4-MIXED,32,7805.6,193.8,63.6,15.72 -gpt-j-6b,INT4-MIXED,1024,5314.9,3111.8,63.6,15.72 -qwen2-7b,INT4-MIXED,1024,7716.2,3548.3,64.1,15.60 -llama-3-8b,INT4-MIXED,32,4910.9,204.8,64.7,15.46 -mistral-7b-v0.1,INT4-MIXED,1024,4720.8,3667.1,64.8,15.43 -mistral-7b-v0.1,INT4-MIXED,1007,4704.7,3685.4,64.9,15.41 -llama-3.1-8b,INT4-MIXED,31,4850.3,211.5,64.9,15.41 -phi-3-mini-4k-instruct,INT8-CW,1023,5128.6,2815.2,65.7,15.22 -phi-3-mini-4k-instruct,INT8-CW,1061,5155,3407.9,65.9,15.17 -mistral-7b-v0.1,INT4-MIXED,32,4939.3,192,66.5,15.04 -llama-3-8b,INT4-MIXED,33,4919.4,261.9,67.2,14.88 -llama-2-7b-chat-hf,INT4-MIXED,1024,4948.2,3811,67.3,14.86 -qwen1.5-7b-chat,INT4-MIXED,32,5943.1,180.5,67.7,14.77 -qwen-7b-chat-gptq,INT4-MIXED,32,8057,187,68.1,14.68 -llama-3-8b,INT4-MIXED,32,5503.5,198.4,68.1,14.68 -qwen-7b-chat,INT4-MIXED,32,8091.6,185.9,68.1,14.68 -llama-3-8b,INT4-MIXED,1024,5569.1,3920.5,68.2,14.66 -llama-3.1-8b,INT4-MIXED,31,5358.6,201,68.2,14.66 -stablelm-7b,INT4-MIXED,1020,5804.4,3726.6,68.8,14.53 -llama-3.1-8b,INT4-MIXED,31,5452.6,202.9,68.8,14.53 -llama-2-7b-chat-hf,INT4-MIXED,32,5023,165.7,69,14.49 -llama-3-8b,INT4-MIXED,32,5413.6,202,69.1,14.47 -llama-3-8b,INT4-MIXED,33,5440.4,262.1,69.2,14.45 -codegen25-7b,INT4-MIXED,1024,5434.6,3513.2,69.9,14.31 -mistral-7b-v0.1,INT4-MIXED,1024,5614.9,3819.1,70,14.29 -mistral-7b-v0.1,INT4-MIXED,31,4927.8,205,70.5,14.18 -llama-3-8b,INT4-MIXED,33,5498.9,270.7,70.6,14.16 -llama-3-8b,INT4-MIXED,1025,5577.4,4271.2,70.6,14.16 -llama-2-7b-gptq,INT4-MIXED,1024,5302.2,3529.4,70.7,14.14 -zephyr-7b-beta,INT4-MIXED,32,5212.4,190.6,71.2,14.04 -llama-3-8b,INT4-MIXED,1024,6161.1,3918,71.5,13.99 -llama-3-8b,INT4-MIXED,1025,6098,4441.8,72.3,13.83 -llama-3-8b,INT4-MIXED,1024,6071.7,3972.2,72.4,13.81 -mistral-7b-v0.1,INT4-MIXED,1007,5224.1,4153.4,73.8,13.55 -llama-3-8b,INT4-MIXED,1025,6156.9,4357,73.9,13.53 -zephyr-7b-beta,INT4-MIXED,1024,5511.6,3978,74.4,13.44 -opt-2.7b,FP16,31,9220.3,107.8,74.7,13.39 -dolly-v2-3b,FP16,32,6058.9,109.9,74.7,13.39 -qwen1.5-7b-chat,INT4-MIXED,1024,7063.2,3791.7,75,13.33 -qwen-7b-chat,INT4-MIXED,1024,8919.5,3763.9,75,13.33 -red-pajama-incite-chat-3b-v1,FP16,32,6036.5,107.5,75.9,13.18 -llama-2-7b-chat-hf,INT4-MIXED,1024,5716.8,4231.7,76.2,13.12 -phi-2,FP16,32,6090.1,115.2,77.1,12.97 -stable-zephyr-3b-dpo,FP16,30,6113.1,112.1,78.6,12.72 -qwen-7b-chat,INT4-MIXED,1024,9212.9,3857.4,78.6,12.72 -stablelm-3b-4e1t,FP16,32,6065.4,110.2,78.7,12.71 -opt-2.7b,FP16,937,9733.8,3750.8,78.8,12.69 -dolly-v2-3b,FP16,1024,6615.2,2230.9,79.1,12.64 -red-pajama-incite-chat-3b-v1,FP16,1020,6588.3,2259.4,80.2,12.47 -glm-4-9b,INT4-MIXED,33,6386.2,328,80.4,12.44 -red-pajama-incite-chat-3b-v1,FP16,1024,6570.3,2268.7,80.4,12.44 -baichuan2-7b-chat,INT4-MIXED,32,5977.9,201.7,81,12.35 -glm-4-9b,INT4-MIXED,32,6389.7,248.1,81,12.35 -phi-2,FP16,1024,6646.2,2406.7,81.4,12.29 -stable-zephyr-3b-dpo,FP16,946,6875.7,1868.2,82.9,12.06 -stablelm-3b-4e1t,FP16,1024,6636.1,2036.9,83,12.05 -chatglm2-6b,INT8-CW,32,6731.8,159.2,84.4,11.85 -glm-4-9b,INT4-MIXED,1025,7061.4,4939.2,85.2,11.74 -qwen-7b-chat-gptq,INT4-MIXED,1024,9175.3,3898,85.3,11.72 -gemma-7b-it,INT4-MIXED,32,7883.9,230.5,86,11.63 -gemma-7b-it,INT4-MIXED,32,8002.6,235,86.1,11.61 -glm-4-9b,INT4-MIXED,1024,7064.9,4411.2,86.2,11.60 -gpt-j-6b,INT8-CW,32,7009.2,176.8,86.4,11.57 -chatglm2-6b,INT8-CW,1024,7050.5,3871.6,86.8,11.52 -chatglm3-6b,INT8-CW,32,6755.9,159,86.8,11.52 -baichuan2-7b-chat,INT4-MIXED,1024,7033.3,4049,88.8,11.26 -chatglm3-6b,INT8-CW,1024,7076.5,3865.9,89.2,11.21 -qwen-7b-chat,INT4-MIXED,32,9245.7,176.3,90,11.11 -gemma-7b-it,INT4-MIXED,1024,9449.4,4305.8,93.2,10.73 -gpt-j-6b,INT8-CW,1024,7672.3,4181.1,93.5,10.70 -gemma-7b-it,INT4-MIXED,1024,9330.5,4222.5,93.7,10.67 -orca-mini-3b,FP16,32,7416.5,122.3,94.7,10.56 -codegen25-7b,INT8-CW,32,7557.6,170.7,98.4,10.16 -qwen-7b-chat,INT4-MIXED,1024,10371.1,4271.7,98.9,10.11 -llama-2-7b-chat-hf,INT8-CW,32,7390.6,171.6,99.9,10.01 +opt-125m-gptq,INT4-MIXED,32,1116,25.8,8.1,123.5 +opt-125m-gptq,INT4-MIXED,1024,1187.1,75.2,8.2,122.0 +qwen2-0.5b,INT4-MIXED,32,1587.4,45.1,15.4,64.9 +qwen2-0.5b,INT4-MIXED,1024,1587.8,228.2,15.6,64.1 +tiny-llama-1.1b-chat,INT4-MIXED,32,1704.2,42.4,17.6,56.8 +tiny-llama-1.1b-chat,INT4-MIXED,1024,1616.3,489.2,18.9,52.9 +qwen2-0.5b,INT8-CW,32,1477.3,51.5,20.2,49.5 +qwen2-0.5b,INT8-CW,1024,1592,263.7,20.6,48.5 +tiny-llama-1.1b-chat,INT8-CW,32,1855.6,60.2,20.7,48.3 +tiny-llama-1.1b-chat,INT8-CW,1024,1992.6,618.2,21.7,46.1 +qwen2-1.5b,INT4-MIXED,32,2024.2,59.6,23.1,43.3 +bloomz-560m,FP16,1024,2773.1,647.8,23.8,42.0 +qwen2-1.5b,INT4-MIXED,1024,2177.7,577.4,23.8,42.0 +bloomz-560m,FP16,32,2582.7,44.2,25.1,39.8 +dolly-v2-3b,INT4-MIXED,32,2507.9,79.8,29.4,34.0 +phi-2,INT4-MIXED,32,2568.9,74.6,29.7,33.7 +qwen2-1.5b,INT8-CW,32,2577.3,81.6,30.5,32.8 +red-pajama-incite-chat-3b-v1,INT4-MIXED,32,2489.4,69.9,30.5,32.8 +minicpm-1b-sft,INT4-MIXED,31,2442.1,84.7,31,32.3 +qwen2-1.5b,INT8-CW,1024,2739.8,773.3,31.2,32.1 +gemma-2b-it,INT4-MIXED,32,2998.2,103.5,31.4,31.8 +dolly-v2-3b,INT4-MIXED,1024,2508.1,1396.6,32,31.3 +gemma-2b-it,INT4-MIXED,1024,3171.5,822.3,32.2,31.1 +phi-2,INT4-MIXED,1024,2940.5,1395.3,32.2,31.1 +red-pajama-incite-chat-3b-v1,INT4-MIXED,1023,2489.6,1435.5,33.1,30.2 +minicpm-1b-sft,INT8-CW,31,2818.6,86.9,33.4,29.9 +stable-zephyr-3b-dpo,INT4-MIXED,32,2638.2,87.4,33.8,29.6 +stablelm-3b-4e1t,INT4-MIXED,32,2750.5,89.4,35.6,28.1 +stablelm-3b-4e1t,INT4-MIXED,1023,3115.5,1473.1,38.1,26.2 +phi-3-mini-4k-instruct,INT4-MIXED,32,3039.1,109.2,40.4,24.8 +phi-2,INT8-CW,32,3599.7,107.5,42.1,23.8 +gemma-2b-it,INT8-CW,32,3845.4,111.3,42.2,23.7 +dolly-v2-3b,INT8-CW,32,3596.4,110.1,42.5,23.5 +gemma-2b-it,INT8-CW,1024,3844.6,1183,43,23.3 +red-pajama-incite-chat-3b-v1,INT8-CW,32,3590,111,43.3,23.1 +phi-3-mini-4k-instruct,INT4-MIXED,1024,3467.6,1721.6,43.5,23.0 +stablelm-3b-4e1t,INT8-CW,32,3582.8,111,44.3,22.6 +stable-zephyr-3b-dpo,INT8-CW,32,3607.2,110.2,44.5,22.5 +phi-2,INT8-CW,1024,3982,1508,44.6,22.4 +dolly-v2-3b,INT8-CW,1024,3596.5,1529.1,44.9,22.3 +minicpm-1b-sft,FP16,31,3769.9,84,45.4,22.0 +red-pajama-incite-chat-3b-v1,INT8-CW,1023,3952,2064.5,45.7,21.9 +stablelm-3b-4e1t,INT8-CW,1023,3934.5,2286.3,46.8,21.4 +gpt-j-6b,INT4-MIXED,32,4443.5,159.3,56.7,17.6 +phi-3-mini-4k-instruct,INT8-CW,32,4545,117.1,57.6,17.4 +phi-3-mini-4k-instruct,INT8-CW,1024,4810.4,2068.8,60.5,16.5 +gpt-j-6b,INT4-MIXED,1024,4746.4,2397,60.6,16.5 +falcon-7b-instruct,INT4-MIXED,32,5014,203.7,61.3,16.3 +qwen2-7b,INT4-MIXED,32,5269.4,203.8,62.3,16.1 +codegen25-7b,INT4-MIXED,32,4641.1,170.6,63.5,15.7 +llama-2-7b-gptq,INT4-MIXED,32,4597.3,172.1,63.5,15.7 +falcon-7b-instruct,INT4-MIXED,1024,5230.6,2695.3,63.6,15.7 +qwen2-7b,INT4-MIXED,1024,5370.8,2505.9,63.9,15.6 +decilm-7b-instruct,INT4-MIXED,36,4614.2,301.1,65.3,15.3 +codegen25-7b,INT4-MIXED,1024,4641.9,2629.6,67.4,14.8 +llama-2-7b-gptq,INT4-MIXED,1024,4928.1,2584.3,67.6,14.8 +mistral-7b-v0.1,INT4-MIXED,32,4928.5,180.9,69.2,14.5 +llama-2-7b-chat-hf,INT4-MIXED,32,4985.7,160.3,69.5,14.4 +qwen-7b-chat-gptq,INT4-MIXED,32,5426.7,188.3,69.5,14.4 +llama-3-8b,INT4-MIXED,33,5473.4,285.7,70,14.3 +flan-t5-xxl,INT4-MIXED,33,19293.8,211.7,70.1,14.3 +llama-3-8b,INT4-MIXED,33,5389.2,281,70.8,14.1 +mistral-7b-v0.1,INT4-MIXED,1024,5225.4,2713.3,71.8,13.9 +zephyr-7b-beta,INT4-MIXED,32,5306.1,177.9,72.1,13.9 +llama-3-8b,INT4-MIXED,1025,5615.2,2937.8,72.4,13.8 +llama-3-8b,INT4-MIXED,1025,5531.7,2815.4,73.2,13.7 +llama-2-7b-chat-hf,INT4-MIXED,1024,5319.5,2736.2,73.6,13.6 +phi-2,FP16,32,6197,104.6,74.7,13.4 +zephyr-7b-beta,INT4-MIXED,1024,5306.4,2802.3,74.7,13.4 +qwen-7b-chat-gptq,INT4-MIXED,1024,5934.9,2606.9,75,13.3 +dolly-v2-3b,FP16,32,6195.1,105.3,75.3,13.3 +baichuan2-7b-chat,INT4-MIXED,32,5837.9,188.5,76.8,13.0 +red-pajama-incite-chat-3b-v1,FP16,32,6178.6,118,76.8,13.0 +gemma-7b-it,INT4-MIXED,32,6495.9,230.6,77,13.0 +stablelm-3b-4e1t,FP16,32,6174.2,105.9,77.1,13.0 +stable-zephyr-3b-dpo,FP16,32,6217.8,107.9,77.2,13.0 +glm-4-9b-chat,INT4-MIXED,32,6333.4,225,77.3,12.9 +phi-2,FP16,1024,6411.5,2065.2,77.3,12.9 +dolly-v2-3b,FP16,1024,6410.1,2075,77.7,12.9 +llama-3.1-8b,INT4-MIXED,32,6324.6,182.2,78.8,12.7 +red-pajama-incite-chat-3b-v1,FP16,1023,6394.2,2752.4,79.2,12.6 +stablelm-3b-4e1t,FP16,1023,6386.9,2953.3,79.5,12.6 +glm-4-9b-chat,INT4-MIXED,1024,6439.5,3282.2,80,12.5 +baichuan2-7b-chat,INT4-MIXED,1024,6174.1,2752.6,80.6,12.4 +gemma-7b-it,INT4-MIXED,1024,6795.4,3118.3,80.6,12.4 +llama-3.1-8b,INT4-MIXED,1024,6324.8,2865.7,81.3,12.3 +gpt-j-6b,INT8-CW,32,6793.2,167.6,85,11.8 +qwen-7b-chat,INT4-MIXED,32,7274.8,168.8,85.2,11.7 +gpt-j-6b,INT8-CW,1024,6793.3,2668.4,88.8,11.3 +qwen-7b-chat,INT4-MIXED,1024,7610.3,2991.9,90.6,11.0 +flan-t5-xxl,INT4-MIXED,1139,23514,540.8,94.9,10.5 +falcon-7b-instruct,INT8-CW,32,7764.1,181.3,95.5,10.5 +llama-2-7b-chat-hf,INT8-CW,32,7330.9,172,96.1,10.4 +falcon-7b-instruct,INT8-CW,1024,7987.4,3072.8,98.1,10.2 +qwen2-7b,INT8-CW,32,8175.3,211.3,99.6,10.0 diff --git a/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv index 09799a2de31fe6..9aa769e4dd61b9 100644 --- a/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv +++ b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv @@ -1,182 +1,116 @@ -Topology,Precision,Input Size,max rss memory,1st latency (ms),2nd latency (ms),2nd tok/sec -opt-125m-gptq,INT4-MIXED,1024,1513.6,81.9,7.8,128.21 -opt-125m-gptq,INT4-MIXED,32,979.9,50.4,7.9,126.58 -tiny-llama-1.1b-chat,INT4-MIXED,1024,1943.3,176.3,16.8,59.52 -tiny-llama-1.1b-chat,INT4-MIXED,32,1982.2,59.5,17.1,58.48 -qwen2-0.5b,INT4-MIXED,32,2678,117.3,18.7,53.48 -tiny-llama-1.1b-chat,INT8-CW,32,2080.9,59.4,19,52.63 -qwen2-0.5b,INT4-MIXED,1024,3036.1,165.5,19.2,52.08 -tiny-llama-1.1b-chat,INT8-CW,1024,2287,241.4,19.6,51.02 -qwen2-0.5b,INT8-CW,1024,3084.9,172.1,20,50.00 -qwen2-0.5b,INT8-CW,32,2518,105.5,21.4,46.73 -red-pajama-incite-chat-3b-v1,INT4-MIXED,32,2793.6,141.8,23.9,41.84 -qwen2-1.5b,INT4-MIXED,32,4515.4,118.7,24,41.67 -qwen2-1.5b,INT4-MIXED,1024,4930.1,229.6,24.3,41.15 -dolly-v2-3b,INT4-MIXED,32,2486.1,174,25.4,39.37 -phi-2,INT4-MIXED,32,2552.9,210.6,26.9,37.17 -red-pajama-incite-chat-3b-v1,INT4-MIXED,1020,2934.1,464.5,27.5,36.36 -qwen2-1.5b,INT8-CW,32,4813.4,119.1,27.8,35.97 -opt-2.7b,INT4-MIXED,31,3172.5,131.9,28.5,35.09 -red-pajama-incite-chat-3b-v1,INT4-MIXED,1024,3038.2,447.1,28.6,34.97 -dolly-v2-3b,INT4-MIXED,1024,2947.4,409,28.8,34.72 -qwen2-1.5b,INT8-CW,1024,5394.8,327.9,29.3,34.13 -stable-zephyr-3b-dpo,INT4-MIXED,30,2728.1,131.2,29.8,33.56 -phi-2,INT4-MIXED,32,2805.1,208.3,30.2,33.11 -minicpm-1b-sft,INT8-CW,31,3104.2,147.8,30.9,32.36 -phi-2,INT4-MIXED,1024,3058.9,602.9,31.1,32.15 -minicpm-1b-sft,INT4-MIXED,31,2970.1,183.7,31.1,32.15 -stablelm-3b-4e1t,INT4-MIXED,32,3077.1,183.2,31.6,31.65 -opt-2.7b,INT4-MIXED,937,3416.7,429.4,31.6,31.65 -stable-zephyr-3b-dpo,INT4-MIXED,946,3211.8,428.8,32.3,30.96 -phi-3-mini-4k-instruct,INT4-MIXED,31,3014.5,116,32.5,30.77 -phi-3-mini-4k-instruct,INT4-MIXED,38,2957.4,153.9,32.5,30.77 -phi-2,INT4-MIXED,1024,3278.9,613.3,33.4,29.94 -phi-3-mini-4k-instruct,INT4-MIXED,38,3288.5,152.9,33.4,29.94 -phi-3-mini-4k-instruct,INT4-MIXED,31,3265.1,123.6,34.1,29.33 -gemma-2b-it,INT4-MIXED,32,4162.1,208.8,34.2,29.24 -stablelm-3b-4e1t,INT4-MIXED,1024,3525.8,524.5,35,28.57 -phi-3-mini-4k-instruct,INT4-MIXED,1061,3427.8,777.5,36.5,27.40 -phi-3-mini-4k-instruct,INT4-MIXED,1023,3405.4,554.1,36.7,27.25 -gemma-2b-it,INT4-MIXED,1024,5053.1,354.8,36.9,27.10 -minicpm-1b-sft,FP16,31,3595.5,124.9,36.9,27.10 -phi-3-mini-4k-instruct,INT4-MIXED,1061,3547.2,755.8,37.1,26.95 -phi-3-mini-4k-instruct,INT4-MIXED,1023,3528.4,536.4,37.4,26.74 -red-pajama-incite-chat-3b-v1,INT8-CW,32,3747.7,189.9,38.1,26.25 -opt-2.7b,INT8-CW,31,3810.7,145.7,38.5,25.97 -chatglm3-6b,INT4-MIXED,32,4120.7,67.3,38.7,25.84 -dolly-v2-3b,INT8-CW,32,3747,188.4,39.2,25.51 -chatglm3-6b,INT4-MIXED,32,4482.9,69.9,40.7,24.57 -chatglm3-6b,INT4-MIXED,1024,4146,606.8,41,24.39 -opt-2.7b,INT8-CW,937,4458.9,587.8,41.8,23.92 -red-pajama-incite-chat-3b-v1,INT8-CW,1024,4088.4,634.1,41.9,23.87 -red-pajama-incite-chat-3b-v1,INT8-CW,1020,4086.8,653.4,42,23.81 -phi-2,INT8-CW,32,3794.6,202.7,42.1,23.75 -chatglm3-6b,INT4-MIXED,1024,4446.7,598.6,42.3,23.64 -stablelm-3b-4e1t,INT8-CW,32,3652.5,146,42.6,23.47 -stable-zephyr-3b-dpo,INT8-CW,30,3768.6,151.9,42.6,23.47 -dolly-v2-3b,INT8-CW,1024,4092,603.1,42.9,23.31 -stablelm-3b-4e1t,INT8-CW,1024,4143.2,671.7,45.2,22.12 -gemma-2b-it,INT8-CW,32,4878.4,221.6,45.6,21.93 -phi-2,INT8-CW,1024,4153.6,810.3,46,21.74 -llama-2-7b-chat-hf,INT4-MIXED,32,4394.6,109.7,46.2,21.65 -chatglm3-6b-gptq,INT4-MIXED,32,5218.9,79.7,46.7,21.41 -stable-zephyr-3b-dpo,INT8-CW,946,4360.1,627.8,46.8,21.37 -vicuna-7b-v1.5,INT4-MIXED,32,4482.3,101.2,47.2,21.19 -gemma-2b-it,INT8-CW,1024,5837.1,507.1,48,20.83 -llama-2-7b-gptq,INT4-MIXED,32,4734.3,102.8,48.1,20.79 -orca-mini-3b,INT4-MIXED,32,2720.1,132,48.1,20.79 -qwen-7b-chat,INT4-MIXED,32,7803.7,178.5,48.3,20.70 -mistral-7b-v0.1,INT4-MIXED,31,4537.5,99,48.5,20.62 -codegen25-7b,INT4-MIXED,32,4723.3,108.5,48.5,20.62 -chatglm3-6b-gptq,INT4-MIXED,1024,5150.8,614.2,48.8,20.49 -mistral-7b-v0.1,INT4-MIXED,32,4572,102.9,48.8,20.49 -llama-3-8b,INT4-MIXED,33,4991.2,252.2,50.9,19.65 -qwen-7b-chat-gptq,INT4-MIXED,32,8088.4,212.6,51,19.61 -chatglm2-6b,INT4-MIXED,32,4960.6,105.5,51.2,19.53 -gpt-j-6b,INT4-MIXED,32,4699.5,259.2,51.4,19.46 -llama-3.1-8b,INT4-MIXED,31,4897.8,106.9,51.5,19.42 -llama-3-8b,INT4-MIXED,32,4999.7,105.9,51.6,19.38 -qwen-7b-chat,INT4-MIXED,32,8085.9,193.5,51.7,19.34 -falcon-7b-instruct,INT4-MIXED,32,5416.2,175,52.5,19.05 -mistral-7b-v0.1,INT4-MIXED,1007,4772.6,803,52.6,19.01 -qwen1.5-7b-chat,INT4-MIXED,32,6027.3,174.9,53,18.87 -mistral-7b-v0.1,INT4-MIXED,1024,4775,717.6,53,18.87 -llama-2-7b-chat-hf,INT4-MIXED,1024,4976.5,992.1,53.1,18.83 -qwen2-7b,INT4-MIXED,32,7087.1,138.1,53.3,18.76 -llama-2-7b-gptq,INT4-MIXED,1024,5351.2,711.6,53.7,18.62 -llama-3-8b,INT4-MIXED,32,5472.8,109.4,53.7,18.62 -phi-3-mini-4k-instruct,INT8-CW,38,4575.3,115.9,53.7,18.62 -stablelm-7b,INT4-MIXED,32,5213.7,128.5,53.8,18.59 -phi-3-mini-4k-instruct,INT8-CW,31,4571.8,118.9,53.8,18.59 -llama-3-8b,INT4-MIXED,33,5480.4,246.8,53.9,18.55 -llama-3-8b,INT4-MIXED,32,5528.2,144.9,54.3,18.42 -llama-3.1-8b,INT4-MIXED,31,5377.3,112.8,54.3,18.42 -chatglm2-6b,INT4-MIXED,1024,5232.3,759.6,54.6,18.32 -llama-3.1-8b,INT4-MIXED,31,5440.4,126.4,54.8,18.25 -llama-3-8b,INT4-MIXED,33,5532.8,248.2,54.9,18.21 -codegen25-7b,INT4-MIXED,1024,5412.9,714.8,55,18.18 -mistral-7b-v0.1,INT4-MIXED,32,4998.5,117.3,55.2,18.12 -mistral-7b-v0.1,INT4-MIXED,31,5000.2,122.4,55.6,17.99 -llama-3-8b,INT4-MIXED,1024,5594,953.5,56.6,17.67 -gpt-j-6b,INT4-MIXED,1024,5323.8,1254,56.8,17.61 -llama-3-8b,INT4-MIXED,1025,5596.7,1192.3,56.8,17.61 -qwen2-7b,INT4-MIXED,1024,7722.1,714.2,57,17.54 -phi-3-mini-4k-instruct,INT8-CW,1023,5067.1,818.5,57.4,17.42 -phi-3-mini-4k-instruct,INT8-CW,1061,5086.1,975.1,57.4,17.42 -llama-2-7b-chat-hf,INT4-MIXED,32,5087.7,126.2,57.9,17.27 -stablelm-7b,INT4-MIXED,1020,5780.5,1248.4,59,16.95 -llama-3-8b,INT4-MIXED,1025,6088.9,1381.5,59,16.95 -llama-3-8b,INT4-MIXED,1024,6084.8,931.2,59.2,16.89 -llama-3-8b,INT4-MIXED,1025,6141.2,1494.3,59.4,16.84 -llama-3-8b,INT4-MIXED,1024,6133.8,1075.2,59.6,16.78 -mistral-7b-v0.1,INT4-MIXED,1024,5472.6,794.3,59.7,16.75 -zephyr-7b-beta,INT4-MIXED,32,5328.5,103.5,59.8,16.72 -falcon-7b-instruct,INT4-MIXED,1024,5677.5,686.2,59.8,16.72 -mistral-7b-v0.1,INT4-MIXED,1007,5243.5,1074,59.9,16.69 -qwen1.5-7b-chat,INT4-MIXED,1024,7096.7,1132.7,60,16.67 -qwen-7b-chat,INT4-MIXED,1024,8872.6,792.8,61,16.39 -qwen-7b-chat,INT4-MIXED,1024,9164.4,822.6,63.3,15.80 -orca-mini-3b,INT8-CW,32,4221.7,170.6,63.5,15.75 -llama-2-7b-chat-hf,INT4-MIXED,1024,5708.1,1397.9,63.6,15.72 -glm-4-9b,INT4-MIXED,33,6402.9,307.1,63.8,15.67 -zephyr-7b-beta,INT4-MIXED,1024,5572.4,1156.4,64.3,15.55 -glm-4-9b,INT4-MIXED,32,6383.1,256.2,64.5,15.50 -baichuan2-7b-chat,INT4-MIXED,32,5926.3,191.8,65.8,15.20 -opt-2.7b,FP16,31,5886,112.2,68,14.71 -dolly-v2-3b,FP16,32,6161.5,147.5,69.5,14.39 -red-pajama-incite-chat-3b-v1,FP16,32,6265.4,146.2,69.6,14.37 -glm-4-9b,INT4-MIXED,1024,6994.5,1013.7,69.8,14.33 -opt-2.7b,FP16,937,6345,379.5,71.6,13.97 -glm-4-9b,INT4-MIXED,1025,7014.9,1416.8,72.5,13.79 -phi-2,FP16,32,6204.7,189.2,72.9,13.72 -stable-zephyr-3b-dpo,FP16,30,6221.4,159.7,73,13.70 -dolly-v2-3b,FP16,1024,6669.9,424.3,73.3,13.64 -red-pajama-incite-chat-3b-v1,FP16,1020,6658.8,484.7,73.4,13.62 -stablelm-3b-4e1t,FP16,32,6216.3,145.4,73.5,13.61 -qwen-7b-chat,INT4-MIXED,32,9294.9,144.4,73.8,13.55 -red-pajama-incite-chat-3b-v1,FP16,1024,6755.1,469.1,73.9,13.53 -qwen-7b-chat-gptq,INT4-MIXED,1024,9152.1,827.2,75.1,13.32 -gemma-7b-it,INT4-MIXED,32,7991.4,128.6,75.8,13.19 -chatglm2-6b,INT8-CW,32,6854.4,110.2,76.3,13.11 -chatglm3-6b,INT8-CW,32,6754.8,112.3,76.4,13.09 -stable-zephyr-3b-dpo,FP16,946,6940,428.6,76.7,13.04 -baichuan2-7b-chat,INT4-MIXED,1024,6930.2,1229.5,76.7,13.04 -gemma-7b-it,INT4-MIXED,32,8061.5,125.6,76.7,13.04 -stablelm-3b-4e1t,FP16,1024,6722.9,480.8,77,12.99 -phi-2,FP16,1024,6709.4,624.1,77.2,12.95 -chatglm2-6b,INT8-CW,1024,7132.9,1361.9,78.7,12.71 -chatglm3-6b,INT8-CW,1024,7037.5,1389.2,78.7,12.71 -qwen-7b-chat,INT4-MIXED,1024,10374.1,1357.5,81.1,12.33 -gemma-7b-it,INT4-MIXED,1024,9398,1268.5,82.7,12.09 -gemma-7b-it,INT4-MIXED,1024,9469.5,1268,83.2,12.02 -gpt-j-6b,INT8-CW,32,7126.5,255.2,87.2,11.47 -falcon-7b-instruct,INT8-CW,32,8287.6,131.1,88.4,11.31 -llama-2-7b-chat-hf,INT8-CW,32,7474.9,139.5,89.7,11.15 -codegen25-7b,INT8-CW,32,7559.4,138,90.8,11.01 -vicuna-7b-v1.5,INT8-CW,32,7390.8,136.6,90.8,11.01 -falcon-7b-instruct,INT8-CW,1024,8546.8,1205.9,92.2,10.85 -stablelm-7b,INT8-CW,32,8356.4,143,92.4,10.82 -qwen2-7b,INT8-CW,32,9940.7,132,92.5,10.81 -baichuan2-13b-chat,INT4-MIXED,32,9879.2,184.9,93.3,10.72 -phi-3-mini-4k-instruct,FP16,38,8290,125.2,93.4,10.71 -phi-3-mini-4k-instruct,FP16,31,8290.5,109.5,93.5,10.70 -gpt-j-6b,INT8-CW,1024,7759,1996.8,93.9,10.65 -llama-2-7b-chat-hf,INT8-CW,1024,8097.8,1701.6,94.7,10.56 -phi-3-medium-4k-instruct,INT4-MIXED,38,8210.4,527,95.1,10.52 -mistral-7b-v0.1,INT8-CW,31,7882.4,128.6,95.1,10.52 -vicuna-7b-v1.5,INT8-CW,1024,8013.2,1558.1,95.1,10.52 -mistral-7b-v0.1,INT8-CW,32,7886.9,140.6,95.2,10.50 -qwen2-7b,INT8-CW,1024,10573.1,1564.5,95.3,10.49 -codegen25-7b,INT8-CW,1024,8253.1,1526.3,95.7,10.45 -zephyr-7b-beta,INT8-CW,32,7785.3,144.4,95.8,10.44 -stablelm-7b,INT8-CW,1020,8921.9,1845,96.9,10.32 -mistral-7b-v0.1,INT8-CW,1007,8127.4,1648.4,97.4,10.27 -qwen-7b-chat,INT8-CW,32,11083.2,140.6,97.7,10.24 -qwen1.5-7b-chat,INT8-CW,32,8870,156.4,98.1,10.19 -llama-3.1-8b,INT8-CW,31,8600.3,189.2,98.4,10.16 -mistral-7b-v0.1,INT8-CW,1024,8134.7,1554.1,98.4,10.16 -qwen-14b-chat,INT4-MIXED,32,9876.2,192.3,98.6,10.14 -zephyr-7b-beta,INT8-CW,1024,8035.2,1580.4,98.8,10.12 -llama-3-8b,INT8-CW,32,8694.2,150.7,99.5,10.05 -llama-3-8b,INT8-CW,33,8700.4,175.4,99.8,10.02 -phi-3-mini-4k-instruct,FP16,1023,8795.2,601.3,99.9,10.01 +opt-125m-gptq,INT4-MIXED,32,1150.2,35.1,8.2,122.0 +opt-125m-gptq,INT4-MIXED,1024,1228,67,8.2,122.0 +qwen2-0.5b,INT4-MIXED,1024,1596.2,83.6,14.4,69.4 +qwen2-0.5b,INT4-MIXED,32,1675.6,63.6,14.9,67.1 +qwen2-0.5b,INT8-CW,32,1857.5,56.9,15,66.7 +qwen2-0.5b,INT8-CW,1024,1663.5,87,15,66.7 +bloomz-560m,INT8-CW,32,1761.1,62.4,15.1,66.2 +tiny-llama-1.1b-chat,INT4-MIXED,1024,1687.9,158.7,15.3,65.4 +bloomz-560m,INT4-MIXED,32,1894.2,40.1,15.4,64.9 +tiny-llama-1.1b-chat,INT4-MIXED,32,1833,74.5,15.7,63.7 +bloomz-560m,INT8-CW,1024,1689.2,146.2,15.8,63.3 +bloomz-560m,INT4-MIXED,1024,1791,150.1,16.4,61.0 +tiny-llama-1.1b-chat,INT8-CW,32,2132.3,35.6,18.1,55.2 +bloomz-560m,FP16,32,2395,36,18.4,54.3 +tiny-llama-1.1b-chat,INT8-CW,1024,1986.4,149.3,19.2,52.1 +bloomz-560m,FP16,1024,2344.4,157.4,19.3,51.8 +qwen2-1.5b,INT4-MIXED,1024,2175.1,184.9,20.4,49.0 +qwen2-1.5b,INT4-MIXED,32,2066.2,94.9,20.6,48.5 +red-pajama-incite-chat-3b-v1,INT4-MIXED,32,2599.8,118.1,25,40.0 +qwen2-1.5b,INT8-CW,32,2377.4,83.3,25.1,39.8 +qwen2-1.5b,INT8-CW,1024,2483.3,189.6,25.3,39.5 +gemma-2b-it,INT4-MIXED,32,2594.3,181.4,26.1,38.3 +phi-2,INT4-MIXED,32,2912.4,77.7,26.8,37.3 +gemma-2b-it,INT4-MIXED,1024,2594.4,248.2,26.9,37.2 +dolly-v2-3b,INT4-MIXED,32,2610.3,141.3,27,37.0 +stable-zephyr-3b-dpo,INT4-MIXED,32,2956.2,149.2,27.4,36.5 +minicpm-1b-sft,INT4-MIXED,31,2625.8,159.2,28.1,35.6 +red-pajama-incite-chat-3b-v1,INT4-MIXED,1023,3069.7,413.5,28.2,35.5 +minicpm-1b-sft,INT8-CW,31,2868.2,74.1,28.9,34.6 +dolly-v2-3b,INT4-MIXED,1024,3081.5,386,29.4,34.0 +phi-2,INT4-MIXED,1024,3136.2,340,29.6,33.8 +stablelm-3b-4e1t,INT4-MIXED,32,3035.9,150.5,30.6,32.7 +phi-3-mini-4k-instruct,INT4-MIXED,32,3373.2,57.9,32.6,30.7 +stablelm-3b-4e1t,INT4-MIXED,1023,3296.5,456.2,34.4,29.1 +phi-3-mini-4k-instruct,INT4-MIXED,1024,3707.1,432,36.1,27.7 +gemma-2b-it,INT8-CW,32,3370.5,203.8,36.6,27.3 +minicpm-1b-sft,FP16,31,3679.6,80.6,36.9,27.1 +gemma-2b-it,INT8-CW,1024,3503.2,258.5,37.9,26.4 +dolly-v2-3b,INT8-CW,32,3893.3,142.9,39.4,25.4 +red-pajama-incite-chat-3b-v1,INT8-CW,32,3760.7,117.2,39.4,25.4 +phi-2,INT8-CW,32,3765.6,121,39.7,25.2 +stablelm-3b-4e1t,INT8-CW,32,3641.2,123,39.9,25.1 +stable-zephyr-3b-dpo,INT8-CW,32,3743.3,120.1,39.9,25.1 +red-pajama-incite-chat-3b-v1,INT8-CW,1023,4083.1,422.9,41.9,23.9 +dolly-v2-3b,INT8-CW,1024,4211.5,384.1,42.2,23.7 +phi-2,INT8-CW,1024,4096.8,367.2,42.5,23.5 +stablelm-3b-4e1t,INT8-CW,1023,4086.6,459.9,43.5,23.0 +llama-2-7b-gptq,INT4-MIXED,32,4754.8,75.1,46.2,21.6 +codegen25-7b,INT4-MIXED,32,4738.5,74.9,46.9,21.3 +gpt-j-6b,INT4-MIXED,32,4506.5,221.4,47.3,21.1 +decilm-7b-instruct,INT4-MIXED,36,4794.9,199.3,48.5,20.6 +qwen-7b-chat-gptq,INT4-MIXED,32,5615.8,100.5,49.8,20.1 +falcon-7b-instruct,INT4-MIXED,32,4738,79.9,50.7,19.7 +phi-3-mini-4k-instruct,INT8-CW,32,4589.9,83,50.8,19.7 +llama-2-7b-gptq,INT4-MIXED,1024,5246,640,52.1,19.2 +llama-3-8b,INT4-MIXED,33,5475.8,114.7,52.2,19.2 +codegen25-7b,INT4-MIXED,1024,5241.9,643.7,52.5,19.0 +mistral-7b-v0.1,INT4-MIXED,32,5015.3,94.6,52.6,19.0 +qwen2-7b,INT4-MIXED,32,5330.7,86.3,52.7,19.0 +gpt-j-6b,INT4-MIXED,1024,4926.5,867.2,53.2,18.8 +llama-2-7b-chat-hf,INT4-MIXED,32,5100.7,78.7,54.2,18.5 +llama-3-8b,INT4-MIXED,33,5527.1,114.9,54.3,18.4 +phi-3-mini-4k-instruct,INT8-CW,1024,4959.2,450.6,54.6,18.3 +falcon-7b-instruct,INT4-MIXED,1024,4863.4,660.5,54.9,18.2 +qwen2-7b,INT4-MIXED,1024,5375.4,659.8,55.4,18.1 +mistral-7b-v0.1,INT4-MIXED,1024,5286.8,662.8,55.6,18.0 +llama-3-8b,INT4-MIXED,1025,5601,992.5,56.1,17.8 +llama-3-8b,INT4-MIXED,1025,5646.8,1047.1,56.7,17.6 +baichuan2-7b-chat,INT4-MIXED,32,5913.7,86.5,57.2,17.5 +zephyr-7b-beta,INT4-MIXED,32,5339.7,88.5,58.2,17.2 +qwen-7b-chat-gptq,INT4-MIXED,1024,6315.8,664.2,60.1,16.6 +glm-4-9b-chat,INT4-MIXED,32,6349.7,86.5,60.5,16.5 +llama-2-7b-chat-hf,INT4-MIXED,1024,5592.7,856.8,60.9,16.4 +zephyr-7b-beta,INT4-MIXED,1024,5459.1,898.6,61.6,16.2 +baichuan2-7b-chat,INT4-MIXED,1024,6410.3,942.2,63.5,15.7 +gemma-7b-it,INT4-MIXED,32,5816.3,104.5,63.5,15.7 +glm-4-9b-chat,INT4-MIXED,1024,6368.8,1128.2,63.8,15.7 +llama-3.1-8b,INT4-MIXED,32,6315.3,97.4,65,15.4 +llama-3.1-8b,INT4-MIXED,1024,6421.8,902.9,68.2,14.7 +gemma-7b-it,INT4-MIXED,1024,6233.2,1052.7,68.7,14.6 +qwen-7b-chat,INT4-MIXED,32,7320.5,132.3,68.8,14.5 +red-pajama-incite-chat-3b-v1,FP16,32,6318.9,79.2,70.7,14.1 +phi-2,FP16,32,6330.2,83.2,70.8,14.1 +dolly-v2-3b,FP16,32,6327.2,92.7,71.9,13.9 +stable-zephyr-3b-dpo,FP16,32,6356.4,79.8,72.2,13.9 +stablelm-3b-4e1t,FP16,32,6261.9,74.6,72.6,13.8 +phi-2,FP16,1024,6654.4,379.3,73.9,13.5 +red-pajama-incite-chat-3b-v1,FP16,1023,6640.3,442.6,74.4,13.4 +dolly-v2-3b,FP16,1024,6653.9,441.9,74.9,13.4 +qwen-7b-chat,INT4-MIXED,1024,7814.1,909.4,75.5,13.2 +stablelm-3b-4e1t,FP16,1023,6575.3,449.5,75.8,13.2 +falcon-7b-instruct,INT8-CW,32,7487.6,109.4,84.3,11.9 +gpt-j-6b,INT8-CW,32,6918.7,185.3,85.3,11.7 +llama-2-7b-chat-hf,INT8-CW,32,7494.7,110.6,87.9,11.4 +qwen2-7b,INT8-CW,32,8177.7,117.8,88.2,11.3 +falcon-7b-instruct,INT8-CW,1024,7621.2,675.4,88.3,11.3 +codegen25-7b,INT8-CW,32,7582.1,114.6,89,11.2 +qwen2-7b,INT8-CW,1024,8226.2,842,90.4,11.1 +gpt-j-6b,INT8-CW,1024,7353.1,1093.9,90.8,11.0 +phi-3-medium-4k-instruct,INT4-MIXED,38,8184.1,270.2,90.8,11.0 +qwen-7b-chat,INT8-CW,32,9223.8,138.4,91.3,11.0 +baichuan2-7b-chat,INT8-CW,32,8188.4,122.9,91.8,10.9 +phi-3-mini-4k-instruct,FP16,32,8311.5,98.2,92,10.9 +llama-2-7b-chat-hf,INT8-CW,1024,7984.3,874.9,92.8,10.8 +mistral-7b-v0.1,INT8-CW,32,7908.6,116.3,93.1,10.7 +baichuan2-13b-chat,INT4-MIXED,32,10016.5,165.7,93.2,10.7 +zephyr-7b-beta,INT8-CW,32,7812.6,117,93.4,10.7 +codegen25-7b,INT8-CW,1024,8074.3,870.2,94,10.6 +decilm-7b-instruct,INT8-CW,36,7885.2,181.4,94.9,10.5 +mistral-7b-v0.1,INT8-CW,1024,8023.7,906.4,95.7,10.4 +zephyr-7b-beta,INT8-CW,1024,7930.8,915.2,96.3,10.4 +phi-3-medium-4k-instruct,INT4-MIXED,1061,8384.5,2225.7,96.7,10.3 +baichuan2-7b-chat,INT8-CW,1024,8678.3,956.7,96.8,10.3 +llama-3.1-8b,INT8-CW,32,8615.4,121.6,97.7,10.2 +llama-3-8b,INT8-CW,33,8615.1,131.3,97.7,10.2 +phi-3-mini-4k-instruct,FP16,1024,8695.2,509,99.9,10.0 diff --git a/docs/sphinx_setup/_static/benchmarks_files/llm_models_9-288V.csv b/docs/sphinx_setup/_static/benchmarks_files/llm_models_9-288V.csv index b16312fa09457c..dfc98271bcd21b 100644 --- a/docs/sphinx_setup/_static/benchmarks_files/llm_models_9-288V.csv +++ b/docs/sphinx_setup/_static/benchmarks_files/llm_models_9-288V.csv @@ -1,146 +1,82 @@ -Topology,Precision,Input Size,max rss memory,1st latency (ms),2nd latency (ms),2nd tok/sec -opt-125m-gptq,INT4-MIXED,1024,1610.2,146,9.4,106.38 -opt-125m-gptq,INT4-MIXED,32,1087.6,60.8,9.5,105.26 -tiny-llama-1.1b-chat,INT4-MIXED,32,1977,85.7,20.2,49.50 -tiny-llama-1.1b-chat,INT4-MIXED,1024,1940.8,367.7,20.3,49.26 -tiny-llama-1.1b-chat,INT8-CW,32,1855.2,70.2,21.8,45.87 -qwen2-0.5b,INT4-MIXED,1024,3029.3,226.4,22.3,44.84 -qwen2-0.5b,INT8-CW,1024,3093,222,22.3,44.84 -qwen2-0.5b,FP16,1024,2509.5,234.3,22.4,44.64 -qwen2-0.5b,FP16,32,1933.8,146.4,22.4,44.64 -tiny-llama-1.1b-chat,INT8-CW,1024,2288.3,368.6,22.9,43.67 -qwen2-0.5b,INT4-MIXED,32,2670.9,115.1,23,43.48 -qwen2-0.5b,INT8-CW,32,2530,157.9,24.3,41.15 -red-pajama-incite-chat-3b-v1,INT4-MIXED,32,2677.3,186.1,27.9,35.84 -qwen2-1.5b,INT4-MIXED,32,4515.1,179.8,28.7,34.84 -qwen2-1.5b,INT4-MIXED,1024,4927.5,254.3,29.1,34.36 -dolly-v2-3b,INT4-MIXED,32,2420.9,245.6,30.8,32.47 -qwen2-1.5b,INT8-CW,32,4824.9,165.1,31.2,32.05 -phi-2,INT4-MIXED,32,2523.5,233.9,31.5,31.75 -qwen2-1.5b,INT8-CW,1024,5401.8,331.1,32,31.25 -stable-zephyr-3b-dpo,INT4-MIXED,30,2816.2,151.3,32.9,30.40 -red-pajama-incite-chat-3b-v1,INT4-MIXED,1020,2646.7,860.6,33,30.30 -opt-2.7b,INT4-MIXED,31,2814.5,174.7,33.1,30.21 -phi-2,INT4-MIXED,32,2363.6,236.6,34,29.41 -stablelm-3b-4e1t,INT4-MIXED,32,3079.1,220,34,29.41 -minicpm-1b-sft,INT4-MIXED,31,2971,185.1,34.1,29.33 -minicpm-1b-sft,INT8-CW,31,3103.6,233.5,34.3,29.15 -dolly-v2-3b,INT4-MIXED,1024,2152.3,876.6,34.7,28.82 -phi-3-mini-4k-instruct,INT4-MIXED,38,2951,155.4,35.9,27.86 -phi-2,INT4-MIXED,1024,2689.9,971.7,36.5,27.40 -stablelm-3b-4e1t,INT4-MIXED,1024,3335.9,519.3,37.3,26.81 -opt-2.7b,INT4-MIXED,937,3227.5,639.5,37.7,26.53 -phi-3-mini-4k-instruct,INT4-MIXED,38,3289.7,161,37.9,26.39 -gemma-2b-it,INT4-MIXED,32,4099.6,258.6,38,26.32 -tiny-llama-1.1b-chat,FP16,32,3098.7,143.9,38.2,26.18 -stable-zephyr-3b-dpo,INT4-MIXED,946,3548.5,453.9,38.8,25.77 -tiny-llama-1.1b-chat,FP16,1024,3388.6,523,39,25.64 -phi-2,INT4-MIXED,1024,2594.7,964.2,39.1,25.58 -minicpm-1b-sft,FP16,31,3597.7,164.8,39.8,25.13 -gemma-2b-it,INT4-MIXED,1024,5059.1,669.1,40.5,24.69 -phi-3-mini-4k-instruct,INT4-MIXED,1061,3431.8,840.1,40.6,24.63 -phi-3-mini-4k-instruct,INT4-MIXED,1061,3555.6,836.3,41.8,23.92 -qwen2-1.5b,FP16,32,3979.4,111.8,42.5,23.53 -red-pajama-incite-chat-3b-v1,INT8-CW,32,3639.9,199.1,43.6,22.94 -qwen2-1.5b,FP16,1024,4569.8,250.5,44.1,22.68 -dolly-v2-3b,INT8-CW,32,3727,248.2,44.5,22.47 -opt-2.7b,INT8-CW,31,3746.3,175.6,44.6,22.42 -stablelm-3b-4e1t,INT8-CW,32,3651.3,178,45.4,22.03 -chatglm3-6b,INT4-MIXED,32,4050.3,88.1,47.4,21.10 -phi-2,INT8-CW,32,3608.7,232,48.3,20.70 -red-pajama-incite-chat-3b-v1,INT8-CW,1020,2951,816.6,48.4,20.66 -stablelm-3b-4e1t,INT8-CW,1024,4142.8,658.7,48.5,20.62 -opt-2.7b,INT8-CW,937,4019,640.7,48.8,20.49 -stable-zephyr-3b-dpo,INT8-CW,30,3264.5,150.7,48.8,20.49 -gemma-2b-it,INT8-CW,32,4874.7,249.4,48.9,20.45 -chatglm3-6b,INT4-MIXED,32,3902.1,84.9,49.5,20.20 -dolly-v2-3b,INT8-CW,1024,2931.4,865.2,49.7,20.12 -gemma-2b-it,INT8-CW,1024,5834,545.4,50.7,19.72 -vicuna-7b-v1.5,INT4-MIXED,32,4560.3,119.4,50.7,19.72 -chatglm3-6b,INT4-MIXED,1024,4070.1,895.9,50.9,19.65 -chatglm3-6b,INT4-MIXED,1024,3832.1,854.4,52,19.23 -orca-mini-3b,INT4-MIXED,32,2345.5,132.8,52.2,19.16 -phi-2,INT8-CW,1024,3511.6,989.7,53.1,18.83 -chatglm2-6b,INT4-MIXED,32,4960.2,91.5,54.2,18.45 -qwen1.5-7b-chat,INT4-MIXED,32,5936.5,195.7,54.8,18.25 -stable-zephyr-3b-dpo,INT8-CW,946,3700.5,677.9,54.8,18.25 -llama-2-7b-chat-hf,INT4-MIXED,32,4010.5,113.7,55.6,17.99 -qwen-7b-chat,INT4-MIXED,32,7393,132.7,56.1,17.83 -chatglm2-6b,INT4-MIXED,1024,5234.5,747.3,56.2,17.79 -qwen2-7b,INT4-MIXED,32,7086.2,183,56.3,17.76 -phi-3-mini-4k-instruct,INT8-CW,38,4574.4,132.9,56.9,17.57 -llama-2-7b-gptq,INT4-MIXED,32,4134.1,120,58,17.24 -chatglm3-6b-gptq,INT4-MIXED,32,4288.1,99.4,58.1,17.21 -qwen2-7b,INT4-MIXED,1024,7716.4,734.9,58.3,17.15 -mistral-7b-v0.1,INT4-MIXED,31,4509.3,115,58.6,17.06 -codegen25-7b,INT4-MIXED,32,4211.8,136.5,59,16.95 -qwen1.5-7b-chat,INT4-MIXED,1024,7007.2,792.7,60.6,16.50 -chatglm3-6b-gptq,INT4-MIXED,1024,4545.4,860.3,60.9,16.42 -phi-3-mini-4k-instruct,INT8-CW,1061,5087.2,1029.5,60.9,16.42 -gpt-j-6b,INT4-MIXED,32,4013.5,316.1,61.1,16.37 -mistral-7b-v0.1,INT4-MIXED,1007,876.5,984.4,61.7,16.21 -llama-3-8b,INT4-MIXED,32,4357.1,132.8,62,16.13 -llama-2-7b-chat-hf,INT4-MIXED,1024,3564.8,1163.7,62.5,16.00 -qwen-7b-chat-gptq,INT4-MIXED,32,7384.1,217.8,62.9,15.90 -zephyr-7b-beta,INT4-MIXED,32,5331.6,125,62.9,15.90 -qwen-7b-chat,INT4-MIXED,32,6545.8,218.7,63,15.87 -llama-3.1-8b,INT4-MIXED,31,5076.3,110.4,63.4,15.77 -llama-3.1-8b,INT4-MIXED,31,4419,145.6,63.5,15.75 -llama-2-7b-gptq,INT4-MIXED,1024,3434.2,921.6,64.4,15.53 -llama-3-8b,INT4-MIXED,32,4886.7,132.3,65.4,15.29 -stablelm-7b,INT4-MIXED,32,4768.4,132.1,65.5,15.27 -codegen25-7b,INT4-MIXED,1024,1429.7,967.5,65.7,15.22 -zephyr-7b-beta,INT4-MIXED,1024,5575.6,837.2,65.7,15.22 -llama-3-8b,INT4-MIXED,32,4888.3,161.8,66.2,15.11 -mistral-7b-v0.1,INT4-MIXED,31,4401.4,142.7,66.2,15.11 -llama-3-8b,INT4-MIXED,1024,3782.4,1091.5,66.8,14.97 -llama-3.1-8b,INT4-MIXED,31,4781.4,159.4,67,14.93 -glm-4-9b,INT4-MIXED,33,6392.6,298.7,67.2,14.88 -qwen-7b-chat,INT4-MIXED,1024,8472.8,1331.2,67.4,14.84 -gpt-j-6b,INT4-MIXED,1024,1237.8,1638.8,68.1,14.68 -llama-2-7b-chat-hf,INT4-MIXED,32,4497.4,153.2,68.7,14.56 -llama-3-8b,INT4-MIXED,1024,4526.9,1060.3,69.8,14.33 -mistral-7b-v0.1,INT4-MIXED,1007,3968.7,1033.1,69.9,14.31 -llama-3-8b,INT4-MIXED,1024,4297.9,1041.7,70,14.29 -orca-mini-3b,INT8-CW,32,3744.3,174,70.5,14.18 -stablelm-7b,INT4-MIXED,1020,4402.1,1186.4,70.5,14.18 -gemma-2b-it,FP16,32,5806.3,117.6,71.8,13.93 -glm-4-9b,INT4-MIXED,1025,7003.5,1354.2,72.5,13.79 -gemma-2b-it,FP16,1024,6804.7,490.6,73.4,13.62 -stablelm-3b-4e1t,FP16,32,6217,207.5,75.2,13.30 -llama-2-7b-chat-hf,INT4-MIXED,1024,4320.9,1247.7,75.8,13.19 -gemma-7b-it,INT4-MIXED,32,8050.6,134.6,76.1,13.14 -gemma-7b-it,INT4-MIXED,32,7992.6,146.4,76.1,13.14 -qwen-7b-chat,INT4-MIXED,1024,5712.7,1144.4,77.1,12.97 -stablelm-3b-4e1t,FP16,1024,6722.9,491.4,77.7,12.87 -chatglm2-6b,INT8-CW,32,6856.2,111.6,78.9,12.67 -opt-2.7b,FP16,31,5377.5,138,79.6,12.56 -chatglm2-6b,INT8-CW,1024,7133.8,1012.1,81,12.35 -red-pajama-incite-chat-3b-v1,FP16,32,5672.5,211,81.2,12.32 -gemma-7b-it,INT4-MIXED,1024,9399.5,1726.7,82.2,12.17 -dolly-v2-3b,FP16,32,5573,230.6,82.5,12.12 -gemma-7b-it,INT4-MIXED,1024,9460,1241.2,82.7,12.09 -opt-2.7b,FP16,937,4727.8,618.8,84.6,11.82 -baichuan2-7b-chat,INT4-MIXED,32,5782.4,274.1,84.8,11.79 -phi-2,FP16,32,5497.3,244.9,85,11.76 -stable-zephyr-3b-dpo,FP16,30,5714.8,173.1,86,11.63 -red-pajama-incite-chat-3b-v1,FP16,1020,5262.2,817.4,86.2,11.60 -dolly-v2-3b,FP16,1024,2376.1,935.5,87,11.49 -qwen-7b-chat,INT4-MIXED,32,8597.4,226.2,87.7,11.40 -phi-2,FP16,1024,4063.9,969.8,89.7,11.15 -chatglm3-6b,INT8-CW,32,6158.8,123.4,89.8,11.14 -stable-zephyr-3b-dpo,FP16,946,5337.1,781.4,90.5,11.05 -baichuan2-7b-chat,INT4-MIXED,1024,807.4,1725.7,91.8,10.89 -vicuna-7b-v1.5,INT8-CW,32,7391,171.3,92.5,10.81 -chatglm3-6b,INT8-CW,1024,550.7,1210.9,93.3,10.72 -phi-3-mini-4k-instruct,FP16,38,8299.3,142,94.1,10.63 -qwen2-7b,INT8-CW,32,9941.1,139.1,94.9,10.54 -qwen-7b-chat-gptq,INT4-MIXED,1024,6545,1103.9,95.8,10.44 -qwen2-7b,INT8-CW,1024,10575.1,1183,96.7,10.34 -qwen-7b-chat,INT4-MIXED,1024,6777.4,1309.6,96.9,10.32 -vicuna-7b-v1.5,INT8-CW,1024,8013.7,1154.6,96.9,10.32 -phi-3-medium-4k-instruct,INT4-MIXED,38,8212.8,448.3,97,10.31 -zephyr-7b-beta,INT8-CW,32,7888,144.8,97.4,10.27 -phi-3-mini-4k-instruct,FP16,1061,8814.8,1195.7,98.7,10.13 -zephyr-7b-beta,INT8-CW,1024,8136.7,1191.6,99.4,10.06 -llama-2-13b-chat-hf,INT4-MIXED,32,6927.5,165.3,99.9,10.01 +opt-125m-gptq,INT4-MIXED,32,833.1,15.6,3.9,256.4 +opt-125m-gptq,INT4-MIXED,1024,955.9,553.8,4.8,208.3 +bloomz-560m,INT4-MIXED,32,1457.5,48.5,11.1,90.1 +qwen2-0.5b,INT4-MIXED,32,1167.8,95.7,11.5,87.0 +qwen2-0.5b,INT4-MIXED,1024,1266,2330.3,12.7,78.7 +qwen2-0.5b,INT8-CW,32,1496.3,90.5,12.8,78.1 +bloomz-560m,INT8-CW,32,1724.2,84,13.9,71.9 +qwen2-0.5b,INT8-CW,1024,1593,2370.7,14,71.4 +bloomz-560m,INT4-MIXED,1024,1691,2005.3,15.2,65.8 +qwen2-0.5b,FP16,32,2989.8,94.6,15.9,62.9 +bloomz-560m,INT8-CW,1024,1941,2343.4,16.1,62.1 +qwen2-0.5b,FP16,1024,3088.1,2376.8,17.4,57.5 +bloomz-560m,FP16,32,3857,86.7,17.5,57.1 +bloomz-560m,FP16,1024,4085.6,2373.4,19.8,50.5 +tiny-llama-1.1b-chat,INT4-MIXED,32,1738.9,237.4,20,50.0 +tiny-llama-1.1b-chat,INT8-CW,32,2471.2,224.6,22.6,44.2 +tiny-llama-1.1b-chat,INT4-MIXED,1024,1929.3,5993,22.7,44.1 +tiny-llama-1.1b-chat,INT8-CW,1024,2661.8,6238.8,25.2,39.7 +qwen2-1.5b,INT4-MIXED,32,2429,312.8,28.4,35.2 +tiny-llama-1.1b-chat,FP16,32,4834.9,231.7,28.9,34.6 +tiny-llama-1.1b-chat,FP16,1024,5023.2,6191.5,31.7,31.5 +qwen2-1.5b,INT4-MIXED,1024,2600.3,7597.3,31.8,31.4 +stablelm-3b-4e1t,INT4-MIXED,32,3982.1,348.4,32.1,31.2 +qwen2-1.5b,INT8-CW,32,3619,301,32.7,30.6 +qwen2-1.5b,INT8-CW,1024,3790.3,7990.5,34.6,28.9 +stablelm-3b-4e1t,INT4-MIXED,1023,4455.4,11963.2,39.2,25.5 +minicpm-1b-sft,INT4-MIXED,31,5815.4,214.3,40.1,24.9 +qwen2-1.5b,FP16,32,7582.3,304.4,42.2,23.7 +minicpm-1b-sft,INT8-CW,31,6609.6,210.6,43.3,23.1 +qwen2-1.5b,FP16,1024,7753.4,7915.3,44.2,22.6 +gemma-2b-it,INT4-MIXED,32,3728.2,523,46.2,21.6 +stable-zephyr-3b-dpo,INT4-MIXED,32,3689.3,656.5,47.4,21.1 +gemma-2b-it,INT4-MIXED,1024,4207.3,11867.9,47.5,21.1 +minicpm-1b-sft,FP16,31,8999.8,222.2,49.1,20.4 +red-pajama-incite-chat-3b-v1,INT4-MIXED,32,3448.1,1028.9,49.6,20.2 +dolly-v2-3b,INT4-MIXED,32,3448.4,714.8,49.9,20.0 +gemma-2b-it,INT8-CW,32,5423.2,488.8,51,19.6 +gemma-2b-it,INT8-CW,1024,5902.7,12434.4,52.3,19.1 +stable-zephyr-3b-dpo,INT8-CW,32,5630.3,694.5,54.4,18.4 +phi-2,INT4-MIXED,32,3732.9,723.2,54.5,18.3 +phi-2,INT8-CW,32,5600.4,747,55.7,18.0 +dolly-v2-3b,INT8-CW,32,5589.7,1009.8,55.9,17.9 +red-pajama-incite-chat-3b-v1,INT8-CW,32,5590.1,698.9,55.9,17.9 +stablelm-3b-4e1t,INT8-CW,32,5630.1,660.7,56.1,17.8 +dolly-v2-3b,INT4-MIXED,1024,3984.5,15502.8,56.5,17.7 +red-pajama-incite-chat-3b-v1,INT4-MIXED,1023,3915.6,15363.9,56.6,17.7 +llama-2-7b-gptq,INT4-MIXED,32,8618.5,782.9,56.9,17.6 +phi-2,INT4-MIXED,1024,4251.3,15317,61,16.4 +phi-2,INT8-CW,1024,6119.4,15886.6,62,16.1 +red-pajama-incite-chat-3b-v1,INT8-CW,1023,6056.9,15984.9,62.2,16.1 +dolly-v2-3b,INT8-CW,1024,6124.9,16099.7,62.5,16.0 +stablelm-3b-4e1t,INT8-CW,1023,6097.1,16206.9,62.5,16.0 +gemma-2b-it,FP16,32,12208.2,501.4,65.5,15.3 +llama-3-8b,INT4-MIXED,33,8741.2,869,65.7,15.2 +llama-2-7b-gptq,INT4-MIXED,1024,9468.1,26350.7,66.1,15.1 +qwen-7b-chat-gptq,INT4-MIXED,32,8561,773.7,67,14.9 +gemma-2b-it,FP16,1024,12687.8,12168.7,67.1,14.9 +mistral-7b-v0.1,INT4-MIXED,32,8588.7,1020.6,67.4,14.8 +llama-2-7b-chat-hf,INT4-MIXED,32,8626.8,1100,69.4,14.4 +phi-2,FP16,32,11385.9,693.8,70.2,14.2 +dolly-v2-3b,FP16,32,11359,688.5,70.5,14.2 +stable-zephyr-3b-dpo,FP16,32,11432.9,648.5,70.6,14.2 +red-pajama-incite-chat-3b-v1,FP16,32,11364,692.4,70.7,14.1 +stablelm-3b-4e1t,FP16,32,11432.6,649,71.1,14.1 +llama-3-8b,INT4-MIXED,1025,9254.8,29700.3,71.9,13.9 +mistral-7b-v0.1,INT4-MIXED,1024,9121.9,29492.9,73.3,13.6 +phi-3-mini-4k-instruct,INT8-CW,32,7646.1,952.6,75.7,13.2 +qwen-7b-chat-gptq,INT4-MIXED,1024,10458.7,29022.2,75.9,13.2 +zephyr-7b-beta,INT4-MIXED,32,9217.5,1196.6,76.2,13.1 +phi-2,FP16,1024,11902.2,15868,77,13.0 +dolly-v2-3b,FP16,1024,11892.5,15987.1,77.1,13.0 +baichuan2-7b-chat,INT4-MIXED,32,9440.3,1118.1,77.3,12.9 +red-pajama-incite-chat-3b-v1,FP16,1023,11829.1,16008.7,77.3,12.9 +stablelm-3b-4e1t,FP16,1023,11897.5,16030,77.7,12.9 +phi-3-mini-4k-instruct,INT4-MIXED,32,4961.9,968.8,78.2,12.8 +llama-2-7b-chat-hf,INT4-MIXED,1024,9478.1,28958.6,78.6,12.7 +zephyr-7b-beta,INT4-MIXED,1024,9764.2,30982,82.3,12.2 +phi-3-mini-4k-instruct,INT8-CW,1024,8255.7,23200.5,83.1,12.0 +phi-3-mini-4k-instruct,INT4-MIXED,1024,5570.2,22277.1,85.7,11.7 +baichuan2-7b-chat,INT4-MIXED,1024,10305.2,29010,86.4,11.6 +phi-3-mini-4k-instruct,FP16,32,15292.6,934.7,96.4,10.4 +qwen-7b-chat,INT4-MIXED,32,10964.7,1413,97.8,10.2 \ No newline at end of file diff --git a/docs/sphinx_setup/_static/benchmarks_files/llm_models_platform_list_.pdf b/docs/sphinx_setup/_static/benchmarks_files/llm_models_platform_list_.pdf index bedd9c28286476..53198c7ddb7089 100644 Binary files a/docs/sphinx_setup/_static/benchmarks_files/llm_models_platform_list_.pdf and b/docs/sphinx_setup/_static/benchmarks_files/llm_models_platform_list_.pdf differ diff --git a/docs/sphinx_setup/_static/css/custom.css b/docs/sphinx_setup/_static/css/custom.css index f922069c45e354..de8a05732a4d06 100644 --- a/docs/sphinx_setup/_static/css/custom.css +++ b/docs/sphinx_setup/_static/css/custom.css @@ -923,6 +923,8 @@ h5 { position: relative; bottom: -16px; left: 0; + margin-left: auto; + padding-right: 30px; } .modal-footer-content { diff --git a/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf b/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf index 5b6178d85c504b..786f68fdbb86c7 100644 Binary files a/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf and b/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf differ diff --git a/docs/sphinx_setup/_static/html/modal.html b/docs/sphinx_setup/_static/html/modal.html index 38eb673824f97e..e7bcc1c1c16c58 100644 --- a/docs/sphinx_setup/_static/html/modal.html +++ b/docs/sphinx_setup/_static/html/modal.html @@ -87,6 +87,6 @@

Graph Results

- + + \ No newline at end of file diff --git a/docs/sphinx_setup/_static/html/modalLLM.html b/docs/sphinx_setup/_static/html/modalLLM.html index 37b569d0bd4078..e8535c87f16090 100644 --- a/docs/sphinx_setup/_static/html/modalLLM.html +++ b/docs/sphinx_setup/_static/html/modalLLM.html @@ -87,6 +87,6 @@

Graph Results

- + + \ No newline at end of file diff --git a/docs/sphinx_setup/_static/js/graphs.js b/docs/sphinx_setup/_static/js/graphs.js index 697911bad9402c..04e34d6c2fefe5 100644 --- a/docs/sphinx_setup/_static/js/graphs.js +++ b/docs/sphinx_setup/_static/js/graphs.js @@ -60,8 +60,8 @@ class Filter { // param: GraphData[], clientPlatforms[] static BySortPlatforms(graphDataArr, platformsArr) { return graphDataArr - .filter((data) => platformsArr.includes(data.Platform)) - .sort((a, b) => a.Platform.localeCompare(b.Platform)); + .filter((data) => platformsArr.includes(data.Platform)) + .sort((a, b) => a.Platform.localeCompare(b.Platform)); //sort is necessary } } @@ -145,8 +145,8 @@ class Graph { array.push([obj]) } }) - return array; + return array; } // this returns an object that is used to ender the chart @@ -283,13 +283,13 @@ $(document).ready(function () { const models = networkModels.map((networkModel) => createCheckMark(networkModel, 'networkmodel')); modal.find('.models-column').append(models); - const selectAllModelsButton = createCheckMark('', 'networkmodel', false , false); + const selectAllModelsButton = createCheckMark('', 'networkmodel', false, false); modal.find('.models-selectall').append(selectAllModelsButton); - const selectAllPlatformsButton = createCheckMark('', 'platform', false , false); + const selectAllPlatformsButton = createCheckMark('', 'platform', false, false); modal.find('.platforms-selectall').append(selectAllPlatformsButton); - const precisions = Modal.getPrecisionsLabels(graph).map((precision) => createCheckMark(precision, 'precision', false , false)); + const precisions = Modal.getPrecisionsLabels(graph).map((precision) => createCheckMark(precision, 'precision', false, false)); modal.find('.precisions-column').append(precisions); selectAllCheckboxes(precisions); @@ -304,7 +304,7 @@ $(document).ready(function () { modal.find('#modal-display-graphs').hide(); modal.find('.ietype-column input').first().prop('checked', true); - const kpiLabels = Filter.getParameters(graph).map((parameter) => createCheckMark(parameter, 'kpi', false , true)); + const kpiLabels = Filter.getParameters(graph).map((parameter) => createCheckMark(parameter, 'kpi', false, true)); modal.find('.kpi-column').append(kpiLabels); $('body').prepend(modal); @@ -511,6 +511,7 @@ $(document).ready(function () { listContainer.style.margin = 0; listContainer.style.padding = 0; listContainer.style.paddingLeft = '0px'; + listContainer.style.float = "right"; legendContainer.appendChild(listContainer); } @@ -521,57 +522,55 @@ $(document).ready(function () { const htmlLegendPlugin = { id: 'htmlLegend', afterUpdate(chart, args, options) { - + charts = [...new Set([...charts, ...[chart]])]; const ul = getOrCreateLegendList(chart, chart.options.plugins.htmlLegend.containerID); - // Remove old legend items while (ul.firstChild) { ul.firstChild.remove(); } - const items = chart.legend.legendItems; + const items = chart.options.plugins.legend.labels.generateLabels(chart); items.forEach(item => { const li = document.createElement('li'); li.style.alignItems = 'center'; li.style.display = 'block'; li.style.flexDirection = 'column'; - li.style.marginLeft = '4px'; - + li.style.marginLeft = '6px'; + li.style.cursor = "pointer"; + li.style.fontSize = '0.6rem'; + li.style.textDecoration = item.hidden ? 'line-through' : ''; li.onclick = () => { - chart.toggleDataVisibility(item.index); - chart.update(); + charts.forEach((chartItem) => { + chartItem.setDatasetVisibility(item.datasetIndex, !chartItem.isDatasetVisible(item.datasetIndex)); + chartItem.update(); + }) }; - - // Color box + const boxSpan = document.createElement('span'); boxSpan.style.background = item.fillStyle; boxSpan.style.borderColor = item.strokeStyle; - boxSpan.style.borderWidth = item.lineWidth + 'px'; boxSpan.style.display = 'inline-block'; boxSpan.style.height = '10px'; boxSpan.style.marginRight = '4px'; boxSpan.style.width = '30px'; - // Text - const textContainer = document.createElement('p'); - textContainer.style.color = '#666'; - textContainer.style.margin = 0; - textContainer.style.padding = 0; - textContainer.style.fontSize = '0.6rem'; - textContainer.style.marginLeft = '3px'; - textContainer.style.textDecoration = item.hidden ? 'line-through' : ''; + const textSpan = document.createElement('span'); + textSpan.style.bottom = '1px' + textSpan.style.position = 'relative' + textSpan.style.fontSize = '0.6rem'; + textSpan.style.textDecoration = item.hidden ? 'line-through' : ''; const text = document.createTextNode(item.text); - textContainer.appendChild(text); + textSpan.appendChild(text); li.appendChild(boxSpan); - li.appendChild(textContainer); + li.appendChild(textSpan); ul.appendChild(li); }); } }; - function getChartOptionsByEngines(containerId, allowedAxisIDs) { + function getChartOptionsByEngines(allowedAxisIDs) { const axisConfigs = { x: { title: { display: true, text: 'Request Rate' } @@ -602,11 +601,11 @@ $(document).ready(function () { }, {}), plugins: { legend: { display: false }, - htmlLegend: { containerID: containerId } + htmlLegend: { containerID: 'modal-footer' } } }; } - function getChartOptions(title, containerId) { + function getChartOptions(title) { return { responsive: true, indexAxis: 'y', @@ -633,7 +632,7 @@ $(document).ready(function () { display: false }, htmlLegend: { - containerID: containerId, + containerID: 'modal-footer', } } } @@ -838,7 +837,7 @@ $(document).ready(function () { new Chart(context, { type: 'bar', data: getChartData(labels, datasets), - options: getChartOptions(chartTitle, containerId), + options: getChartOptions(chartTitle), plugins: [htmlLegendPlugin] }); }); @@ -858,9 +857,9 @@ $(document).ready(function () { }) } } - + var charts = []; function processMetricByEngines(labels, datasets, container, widthClass, id) { - var heightRatio = (80 + (labels.length * 55)); + var heightRatio = (30 + (labels.length * 55)); var chart = $('
'); const containerId = `legend-container-${id}`; const legend = $(`
`); @@ -894,8 +893,7 @@ $(document).ready(function () { backgroundColor: precision.color, yAxisID: precision.label === "Throughput" ? 'y' : 'y1', fill: false - } - ) + }) }) }) @@ -914,9 +912,10 @@ $(document).ready(function () { labels: labels, datasets: graphDatas }, - options: getChartOptionsByEngines(containerId, allowedAxisIDs), + options: getChartOptionsByEngines(allowedAxisIDs), plugins: [htmlLegendPlugin] }); + }); } diff --git a/docs/sphinx_setup/_templates/layout.html b/docs/sphinx_setup/_templates/layout.html index 7f873c679c2e83..0d2331b2c83fe3 100644 --- a/docs/sphinx_setup/_templates/layout.html +++ b/docs/sphinx_setup/_templates/layout.html @@ -11,7 +11,7 @@ - + diff --git a/docs/sphinx_setup/index.rst b/docs/sphinx_setup/index.rst index 4da0aa8f29535c..ad98be58cde1cd 100644 --- a/docs/sphinx_setup/index.rst +++ b/docs/sphinx_setup/index.rst @@ -11,8 +11,8 @@ generative AI, video, audio, and language with models from popular frameworks li TensorFlow, ONNX, and more. Convert and optimize models, and deploy across a mix of Intel® hardware and environments, on-premises and on-device, in the browser or in the cloud. -Check out the `OpenVINO Cheat Sheet [PDF] `__ -Check out the `GenAI Quick-start Guide [PDF] `__ +| Check out the `OpenVINO Cheat Sheet [PDF] `__ +| Check out the `GenAI Quick-start Guide [PDF] `__ .. container:: diff --git a/src/bindings/python/src/openvino/passes/__init__.py b/src/bindings/python/src/openvino/passes/__init__.py new file mode 100644 index 00000000000000..037d9774c5b9a0 --- /dev/null +++ b/src/bindings/python/src/openvino/passes/__init__.py @@ -0,0 +1,19 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# type: ignore +# flake8: noqa + +from openvino._pyopenvino.passes import ModelPass, Matcher, MatcherPass, PassBase, WrapType, Or, AnyInput, Optional +from openvino._pyopenvino.passes import ( + consumers_count, + has_static_dim, + has_static_dims, + has_static_shape, + has_static_rank, + rank_equals, + type_matches, + type_matches_any, +) +from openvino._pyopenvino.passes import Serialize, ConstantFolding, VisualizeTree, MakeStateful, LowLatency2, ConvertFP32ToFP16, Version +from openvino.passes.manager import Manager +from openvino.passes.graph_rewrite import GraphRewrite, BackwardGraphRewrite diff --git a/src/bindings/python/src/openvino/runtime/passes/graph_rewrite.py b/src/bindings/python/src/openvino/passes/graph_rewrite.py similarity index 100% rename from src/bindings/python/src/openvino/runtime/passes/graph_rewrite.py rename to src/bindings/python/src/openvino/passes/graph_rewrite.py diff --git a/src/bindings/python/src/openvino/runtime/passes/manager.py b/src/bindings/python/src/openvino/passes/manager.py similarity index 100% rename from src/bindings/python/src/openvino/runtime/passes/manager.py rename to src/bindings/python/src/openvino/passes/manager.py diff --git a/src/bindings/python/src/openvino/runtime/passes/__init__.py b/src/bindings/python/src/openvino/runtime/passes/__init__.py index 19a28c7576decd..a74f91fdcfab2e 100644 --- a/src/bindings/python/src/openvino/runtime/passes/__init__.py +++ b/src/bindings/python/src/openvino/runtime/passes/__init__.py @@ -3,8 +3,8 @@ # type: ignore # flake8: noqa -from openvino._pyopenvino.passes import ModelPass, Matcher, MatcherPass, PassBase, WrapType, Or, AnyInput, Optional -from openvino._pyopenvino.passes import ( +from openvino.passes import ModelPass, Matcher, MatcherPass, PassBase, WrapType, Or, AnyInput, Optional +from openvino.passes import ( consumers_count, has_static_dim, has_static_dims, @@ -14,6 +14,6 @@ type_matches, type_matches_any, ) -from openvino._pyopenvino.passes import Serialize, ConstantFolding, VisualizeTree, MakeStateful, LowLatency2, ConvertFP32ToFP16, Version -from openvino.runtime.passes.manager import Manager -from openvino.runtime.passes.graph_rewrite import GraphRewrite, BackwardGraphRewrite +from openvino.passes import Serialize, ConstantFolding, VisualizeTree, MakeStateful, LowLatency2, ConvertFP32ToFP16, Version +from openvino.passes.manager import Manager +from openvino.passes.graph_rewrite import GraphRewrite, BackwardGraphRewrite diff --git a/src/bindings/python/src/openvino/runtime/passes/graph_rewrite/__init__.py b/src/bindings/python/src/openvino/runtime/passes/graph_rewrite/__init__.py new file mode 100644 index 00000000000000..a9690e891ff5e8 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/passes/graph_rewrite/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# type: ignore +from openvino.passes.graph_rewrite import GraphRewrite, BackwardGraphRewrite diff --git a/src/bindings/python/src/openvino/runtime/passes/manager/__init__.py b/src/bindings/python/src/openvino/runtime/passes/manager/__init__.py new file mode 100644 index 00000000000000..1a2674dd03c2b1 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/passes/manager/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# type: ignore +from openvino.passes.manager import Manager diff --git a/src/bindings/python/tests/test_graph/test_manager.py b/src/bindings/python/tests/test_graph/test_manager.py index ff72ef43158d6e..5101414228c06e 100644 --- a/src/bindings/python/tests/test_graph/test_manager.py +++ b/src/bindings/python/tests/test_graph/test_manager.py @@ -9,7 +9,7 @@ import openvino.runtime.opset10 as ops from openvino import Core, Model -from openvino.runtime.passes import Manager, Serialize, ConstantFolding, Version +from openvino.passes import Manager, Serialize, ConstantFolding, Version from tests.test_graph.util import count_ops_of_type from tests.utils.helpers import create_filenames_for_ir, compare_models @@ -48,6 +48,23 @@ def test_constant_folding(): assert np.allclose(values_out, values_expected) +def test_runtime_passes_manager(): + import openvino.runtime.passes as rt + node_constant = ops.constant(np.array([[0.0, 0.1, -0.1], [-2.5, 2.5, 3.0]], dtype=np.float32)) + node_ceil = ops.ceiling(node_constant) + model = Model(node_ceil, [], "TestModel") + + assert count_ops_of_type(model, node_ceil) == 1 + assert count_ops_of_type(model, node_constant) == 1 + + pass_manager = rt.Manager() + pass_manager.register_pass(rt.ConstantFolding()) + pass_manager.run_passes(model) + + assert count_ops_of_type(model, node_ceil) == 0 + assert count_ops_of_type(model, node_constant) == 1 + + # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request @pytest.fixture def prepare_ir_paths(request, tmp_path): diff --git a/src/bindings/python/tests/test_transformations/test_graph_rewrite.py b/src/bindings/python/tests/test_transformations/test_graph_rewrite.py index 4821dad33dff0a..5f5c100597adf2 100644 --- a/src/bindings/python/tests/test_transformations/test_graph_rewrite.py +++ b/src/bindings/python/tests/test_transformations/test_graph_rewrite.py @@ -2,7 +2,7 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 from openvino.runtime import opset8 -from openvino.runtime.passes import Manager, GraphRewrite, MatcherPass, WrapType, Matcher +from openvino.passes import Manager, GraphRewrite, MatcherPass, WrapType, Matcher from tests.test_transformations.utils.utils import count_ops, get_relu_model, PatternReplacement @@ -19,6 +19,19 @@ def test_graph_rewrite(): assert count_ops(model, "Relu") == [2] +def test_runtime_graph_rewrite(): + import openvino.runtime.passes as rt + model = get_relu_model() + + manager = rt.Manager() + # check that register pass returns pass instance + anchor = manager.register_pass(rt.GraphRewrite()) + anchor.add_matcher(PatternReplacement()) + manager.run_passes(model) + + assert count_ops(model, "Relu") == [2] + + def test_register_new_node(): class InsertExp(MatcherPass): def __init__(self): diff --git a/src/bindings/python/tests/test_transformations/test_manager.py b/src/bindings/python/tests/test_transformations/test_manager.py index e78c62d8c1a5c4..e8f113f30b381c 100644 --- a/src/bindings/python/tests/test_transformations/test_manager.py +++ b/src/bindings/python/tests/test_transformations/test_manager.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.passes import Manager, GraphRewrite, BackwardGraphRewrite, Serialize +from openvino.passes import Manager, GraphRewrite, BackwardGraphRewrite, Serialize from tests.test_transformations.utils.utils import MyModelPass, PatternReplacement, expect_exception diff --git a/src/bindings/python/tests/test_transformations/test_matcher_pass.py b/src/bindings/python/tests/test_transformations/test_matcher_pass.py index c32483be316658..8127e4b7612d56 100644 --- a/src/bindings/python/tests/test_transformations/test_matcher_pass.py +++ b/src/bindings/python/tests/test_transformations/test_matcher_pass.py @@ -2,7 +2,7 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 from openvino.runtime import opset8 -from openvino.runtime.passes import Manager, Matcher, MatcherPass, WrapType +from openvino.passes import Manager, Matcher, MatcherPass, WrapType from openvino.runtime.utils import replace_node from tests.test_transformations.utils.utils import count_ops, get_relu_model, PatternReplacement diff --git a/src/bindings/python/tests/test_transformations/test_model_pass.py b/src/bindings/python/tests/test_transformations/test_model_pass.py index 5df3d0a9024dc2..efc797535d8bb8 100644 --- a/src/bindings/python/tests/test_transformations/test_model_pass.py +++ b/src/bindings/python/tests/test_transformations/test_model_pass.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.passes import Manager +from openvino.passes import Manager from tests.test_transformations.utils.utils import get_relu_model, MyModelPass diff --git a/src/bindings/python/tests/test_transformations/test_pattern_ops.py b/src/bindings/python/tests/test_transformations/test_pattern_ops.py index 24b28061582c68..c445c281e47171 100644 --- a/src/bindings/python/tests/test_transformations/test_pattern_ops.py +++ b/src/bindings/python/tests/test_transformations/test_pattern_ops.py @@ -6,8 +6,8 @@ from openvino import PartialShape from openvino.runtime import opset13 as ops -from openvino.runtime.passes import Matcher, WrapType, Or, AnyInput, Optional -from openvino.runtime.passes import ( +from openvino.passes import Matcher, WrapType, Or, AnyInput, Optional +from openvino.passes import ( consumers_count, has_static_dim, has_static_dims, diff --git a/src/bindings/python/tests/test_transformations/test_public_transformations.py b/src/bindings/python/tests/test_transformations/test_public_transformations.py index a10fea786b9770..eac356cd1341f5 100644 --- a/src/bindings/python/tests/test_transformations/test_public_transformations.py +++ b/src/bindings/python/tests/test_transformations/test_public_transformations.py @@ -7,7 +7,7 @@ from openvino import Model, PartialShape, Shape, Core from openvino.runtime import opset13 as ops -from openvino.runtime.passes import ( +from openvino.passes import ( Manager, ConstantFolding, MakeStateful, diff --git a/src/bindings/python/tests/test_transformations/utils/utils.py b/src/bindings/python/tests/test_transformations/utils/utils.py index e0239ce05fdc9d..b5f09a68ff1511 100644 --- a/src/bindings/python/tests/test_transformations/utils/utils.py +++ b/src/bindings/python/tests/test_transformations/utils/utils.py @@ -4,7 +4,7 @@ from openvino import Model, PartialShape from openvino.runtime import opset13 as ops -from openvino.runtime.passes import ModelPass, Matcher, MatcherPass, WrapType +from openvino.passes import ModelPass, Matcher, MatcherPass, WrapType def get_relu_model(): diff --git a/src/common/transformations/include/ov_ops/glu.hpp b/src/common/transformations/include/ov_ops/glu.hpp index 760641978b574d..add8c3a0582525 100644 --- a/src/common/transformations/include/ov_ops/glu.hpp +++ b/src/common/transformations/include/ov_ops/glu.hpp @@ -75,10 +75,6 @@ class TRANSFORMATIONS_API GLU : public ov::op::Op { ov::element::Type m_output_type{}; }; -// TODO 157615: Move to shape_inference -TRANSFORMATIONS_API std::vector shape_infer(const GLU* op, - std::vector input_shapes); - } // namespace internal } // namespace op } // namespace ov diff --git a/src/common/transformations/src/ov_ops/glu.cpp b/src/common/transformations/src/ov_ops/glu.cpp index bc3dfb89ab8b9b..9b5fb780d36bb8 100644 --- a/src/common/transformations/src/ov_ops/glu.cpp +++ b/src/common/transformations/src/ov_ops/glu.cpp @@ -4,10 +4,9 @@ #include "ov_ops/glu.hpp" +#include "glu_shape_inference.hpp" #include "openvino/core/partial_shape.hpp" #include "openvino/core/validation_util.hpp" -#include "openvino/op/variadic_split.hpp" -#include "variadic_split_shape_inference.hpp" namespace ov { namespace op { @@ -38,11 +37,9 @@ bool GLU::visit_attributes(ov::AttributeVisitor& visitor) { void GLU::validate_and_infer_types() { auto output_type = m_output_type == ov::element::undefined ? get_input_element_type(0) : m_output_type; - std::vector input_shapes = {get_input_partial_shape(0), - ov::PartialShape(ov::Shape{}), - ov::PartialShape(ov::Shape{2})}; - - set_output_type(0, output_type, shape_infer(this, input_shapes)[0]); + const auto input_shapes = ov::util::get_node_input_partial_shapes(*this); + const auto output_shapes = shape_infer(this, input_shapes); + set_output_type(0, output_type, output_shapes[0]); } std::shared_ptr GLU::clone_with_new_inputs(const ov::OutputVector& new_args) const { @@ -54,21 +51,6 @@ std::shared_ptr GLU::clone_with_new_inputs(const ov::OutputVector& new_arg m_split_to_glu_idx, m_output_type); } - -std::vector shape_infer(const GLU* op, std::vector input_shapes) { - ov::op::v1::VariadicSplit variadic_split; - std::vector axis = {op->get_axis()}; - std::vector split_lengths = {op->get_split_lengths(), -1}; - - std::unordered_map const_data; - const_data.emplace(1, ov::Tensor(ov::element::i64, ov::Shape{}, static_cast(axis.data()))); - const_data.emplace( - 2, - ov::Tensor(ov::element::i64, ov::Shape{split_lengths.size()}, static_cast(split_lengths.data()))); - - return ov::op::v1::shape_infer(&variadic_split, input_shapes, ov::make_tensor_accessor(const_data)); -} - } // namespace internal } // namespace op } // namespace ov diff --git a/src/core/include/openvino/core/any.hpp b/src/core/include/openvino/core/any.hpp index 9badb007d526b9..e002756d361f1f 100644 --- a/src/core/include/openvino/core/any.hpp +++ b/src/core/include/openvino/core/any.hpp @@ -485,6 +485,7 @@ class OPENVINO_API Any { using Ptr = std::shared_ptr; virtual const std::type_info& type_info() const = 0; virtual std::vector base_type_info() const = 0; + bool is_base_type_info(const std::type_info& type_info) const; virtual const void* addressof() const = 0; void* addressof() { return const_cast(const_cast(this)->addressof()); @@ -506,6 +507,9 @@ class OPENVINO_API Any { std::string to_string() const; bool is(const std::type_info& other) const; + bool is_signed_integral() const; + bool is_unsigned_integral() const; + bool is_floating_point() const; template bool is() const { @@ -514,17 +518,24 @@ class OPENVINO_API Any { template T& as() & { - type_check(typeid(decay_t)); return *static_cast*>(addressof()); } template const T& as() const& { - type_check(typeid(decay_t)); return *static_cast*>(addressof()); } + template + T convert() const; + protected: + template + [[noreturn]] U convert_impl() const; + + template + U convert_impl() const; + virtual ~Base() = default; }; @@ -685,6 +696,92 @@ class OPENVINO_API Any { T value; }; + // Generic if there is no specialization for T. + template + T& as_impl(...) { + impl_check(); + if (is()) { + return _impl->as(); + } + + OPENVINO_THROW("Bad as from: ", _impl->type_info().name(), " to: ", typeid(T).name()); + } + + template ::value>::type* = nullptr> + T& as_impl(int) { + if (_impl != nullptr) { + if (_impl->is()) { + return _impl->as(); + } else { + _temp = std::make_shared>(); + _impl->read_to(*_temp); + return _temp->as(); + } + } else { + _temp = std::make_shared>(); + return _temp->as(); + } + } + + template < + class T, + typename std::enable_if>::value>::type* = nullptr> + T& as_impl(int) { + if (_impl == nullptr) { + _temp = std::make_shared>>(T{}); + return _temp->as(); + } else { + if (_impl->is()) { + return _impl->as(); + } else { + auto runtime_attribute = _impl->as_runtime_attribute(); + if (runtime_attribute == nullptr) { + OPENVINO_THROW("Any does not contains pointer to runtime_attribute. It contains ", + _impl->type_info().name()); + } + auto vptr = std::dynamic_pointer_cast(runtime_attribute); + if (vptr == nullptr && T::element_type::get_type_info_static() != runtime_attribute->get_type_info() && + T::element_type::get_type_info_static() != RuntimeAttribute::get_type_info_static()) { + OPENVINO_THROW("Could not as Any runtime_attribute to ", + typeid(T).name(), + " from ", + _impl->type_info().name(), + "; from ", + static_cast(runtime_attribute->get_type_info()), + " to ", + static_cast(T::element_type::get_type_info_static())); + } + _temp = std::make_shared>>( + std::static_pointer_cast(runtime_attribute)); + return _temp->as(); + } + } + } + + template ::value && + !std::is_same::type, bool>::value>::type* = nullptr> + T& as_impl(int); + + template ::value || util::Readable::value) && !std::is_same::value && + (!std::is_arithmetic::value || std::is_same::type, bool>::value)>::type* = + nullptr> + T& as_impl(int) { + impl_check(); + + if (is()) { + return _impl->as(); + } else if (_impl->is()) { + _temp = std::make_shared>>(); + _impl->read_to(*_temp); + return _temp->as(); + } + + OPENVINO_THROW("Bad as from: ", _impl->type_info().name(), " to: ", typeid(T).name()); + } + friend class ::ov::RuntimeAttribute; friend class ::ov::CompiledModel; friend class ::ov::proxy::CompiledModel; @@ -704,11 +801,11 @@ class OPENVINO_API Any { /// @brief Default constructor Any() = default; - /// @brief Сopy constructor + /// @brief Copy constructor /// @param other other Any object Any(const Any& other); - /// @brief Сopy assignment operator + /// @brief Copy assignment operator /// @param other other Any object /// @return reference to the current object Any& operator=(const Any& other); @@ -756,8 +853,8 @@ class OPENVINO_API Any { * @brief Inplace value construction function * * @tparam T Any type - * @tparam Args pack of paramter types passed to T constructor - * @param args pack of paramters passed to T constructor + * @tparam Args pack of parameter types passed to T constructor + * @param args pack of parameters passed to T constructor */ template static Any make(Args&&... args) { @@ -786,130 +883,21 @@ class OPENVINO_API Any { */ template bool is() const { - if (_impl != nullptr) { - if (_impl->is(typeid(decay_t))) { - return true; - } - for (const auto& type_index : _impl->base_type_info()) { - if (util::equal(type_index, typeid(decay_t))) { - return true; - } - } - } - return false; - } - - /** - * Dynamic cast to specified type - * @tparam T type - * @return casted object - */ - template - typename std::enable_if>::value, T>::type& as() { - if (_impl == nullptr) { - _temp = std::make_shared>>(T{}); - return *static_cast*>(_temp->addressof()); - } else { - if (_impl->is(typeid(decay_t))) { - return *static_cast*>(_impl->addressof()); - } else { - auto runtime_attribute = _impl->as_runtime_attribute(); - if (runtime_attribute == nullptr) { - OPENVINO_THROW("Any does not contains pointer to runtime_attribute. It contains ", - _impl->type_info().name()); - } - auto vptr = std::dynamic_pointer_cast(runtime_attribute); - if (vptr == nullptr && T::element_type::get_type_info_static() != runtime_attribute->get_type_info() && - T::element_type::get_type_info_static() != RuntimeAttribute::get_type_info_static()) { - OPENVINO_THROW("Could not cast Any runtime_attribute to ", - typeid(T).name(), - " from ", - _impl->type_info().name(), - "; from ", - static_cast(runtime_attribute->get_type_info()), - " to ", - static_cast(T::element_type::get_type_info_static())); - } - _temp = std::make_shared>>( - std::static_pointer_cast(runtime_attribute)); - return *static_cast*>(_temp->addressof()); - } - } - } - - /** - * Dynamic cast to specified type - * @tparam T type - * @return casted object - */ - template - typename std::enable_if>::value && - !std::is_same::value && std::is_default_constructible::value && - (util::Istreamable::value || util::Readable::value), - T>::type& - as() { - impl_check(); - if (_impl->is(typeid(decay_t))) { - return *static_cast*>(_impl->addressof()); - } else if (_impl->is(typeid(std::string))) { - _temp = std::make_shared>>(); - _impl->read_to(*_temp); - return *static_cast*>(_temp->addressof()); - } - for (const auto& type_index : _impl->base_type_info()) { - if (util::equal(type_index, typeid(decay_t))) { - return *static_cast*>(_impl->addressof()); - } - } - OPENVINO_THROW("Bad cast from: ", _impl->type_info().name(), " to: ", typeid(T).name()); - } - - /** - * Dynamic cast to specified type - * @tparam T type - * @return casted object - */ - template - typename std::enable_if< - !std::is_convertible>::value && !std::is_same::value && - (!std::is_default_constructible::value || (!util::Istreamable::value && !util::Readable::value)), - T>::type& - as() { - impl_check(); - if (_impl->is(typeid(decay_t))) { - return *static_cast*>(_impl->addressof()); - } - for (const auto& type_index : _impl->base_type_info()) { - if (util::equal(type_index, typeid(decay_t))) { - return *static_cast*>(_impl->addressof()); - } - } - OPENVINO_THROW("Bad cast from: ", _impl->type_info().name(), " to: ", typeid(T).name()); + return _impl && (_impl->is() || _impl->is_base_type_info(typeid(decay_t))); } /** - * Dynamic cast to specified type + * Dynamic as to specified type * @tparam T type - * @return casted object + * @return reference to caster object */ template - typename std::enable_if::value, T>::type& as() { - if (_impl != nullptr) { - if (_impl->is(typeid(decay_t))) { - return *static_cast*>(_impl->addressof()); - } else { - _temp = std::make_shared>(); - _impl->read_to(*_temp); - return *static_cast(_temp->addressof()); - } - } else { - _temp = std::make_shared>(); - return *static_cast(_temp->addressof()); - } + T& as() { + return as_impl(int{}); } /** - * Dynamic cast to specified type + * Dynamic as to specified type * @tparam T type * @return const reference to caster object */ @@ -983,4 +971,40 @@ inline static void PrintTo(const Any& any, std::ostream* os) { } /** @endcond */ +template <> +OPENVINO_API unsigned long long Any::Base::convert() const; + +template <> +OPENVINO_API long long Any::Base::convert() const; + +template <> +OPENVINO_API double Any::Base::convert() const; + +template ::value && + !std::is_same::type, bool>::value>::type*> +T& Any::as_impl(int) { + impl_check(); + if (is()) { + return _impl->as(); + } else if (util::Readable::value && _impl->is()) { + _temp = std::make_shared>>(); + _impl->read_to(*_temp); + return _temp->as(); + } else if (_impl->is_signed_integral()) { + auto value = _impl->convert(); + _temp = std::make_shared>>(static_cast(value)); + return _temp->as(); + } else if (_impl->is_unsigned_integral()) { + auto value = _impl->convert(); + _temp = std::make_shared>>(static_cast(value)); + return _temp->as(); + } else if (_impl->is_floating_point()) { + auto value = _impl->convert(); + _temp = std::make_shared>>(static_cast(value)); + return _temp->as(); + } + + OPENVINO_THROW("Bad as from: ", _impl->type_info().name(), " to: ", typeid(T).name()); +} } // namespace ov diff --git a/src/core/reference/include/openvino/reference/utils/registers_pool.hpp b/src/core/reference/include/openvino/reference/utils/registers_pool.hpp index 62dfe01ec4ef1d..4861ef4f7d999d 100644 --- a/src/core/reference/include/openvino/reference/utils/registers_pool.hpp +++ b/src/core/reference/include/openvino/reference/utils/registers_pool.hpp @@ -64,7 +64,13 @@ class RegistersPool { } void release() { if (auto pool = regPool.lock()) { - pool->return_to_pool(reg); + try { + pool->return_to_pool(reg); + } catch (...) { + // This function is called by destructor and should not throw. Well formed Reg object won't cause + // any exception throw from return_to_pool, while on badly formed object the destructor is most + // likely called during exception stack unwind. + } regPool.reset(); } } @@ -90,8 +96,10 @@ class RegistersPool { RegistersPool::WeakPtr regPool; }; + static thread_local bool is_created; + virtual ~RegistersPool() { - check_unique_and_update(false); + is_created = false; } template @@ -178,7 +186,7 @@ class RegistersPool { } } - void check_unique_and_update(bool isCtor = true); + void check_unique_and_update(); PhysicalSet m_general_set; PhysicalSet m_simd_set; diff --git a/src/core/reference/src/utils/registers_pool.cpp b/src/core/reference/src/utils/registers_pool.cpp index 413fdcc3ed83cf..a1e6462aa51a36 100644 --- a/src/core/reference/src/utils/registers_pool.cpp +++ b/src/core/reference/src/utils/registers_pool.cpp @@ -34,16 +34,12 @@ RegistersPool::RegistersPool(std::initializer_list regsToExclude, in m_general_set.exclude(Xbyak::Reg64(Xbyak::Operand::RSP)); } -void RegistersPool::check_unique_and_update(bool is_ctor) { - static thread_local bool is_created = false; - if (is_ctor) { - if (is_created) { - OPENVINO_THROW("There should be only one instance of RegistersPool per thread"); - } - is_created = true; - } else { - is_created = false; - } +thread_local bool RegistersPool::is_created = false; + +void RegistersPool::check_unique_and_update() { + OPENVINO_ASSERT(!is_created, "There should be only one instance of RegistersPool per thread"); + + is_created = true; } void RegistersPool::PhysicalSet::set_as_used(size_t reg_idx) { diff --git a/src/core/shape_inference/include/glu_shape_inference.hpp b/src/core/shape_inference/include/glu_shape_inference.hpp new file mode 100644 index 00000000000000..365b57244036a2 --- /dev/null +++ b/src/core/shape_inference/include/glu_shape_inference.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ov_ops/glu.hpp" +#include "utils.hpp" +#include "variadic_split_shape_inference.hpp" + +namespace ov { +namespace op { +namespace internal { +template > +std::vector shape_infer(const GLU* op, const std::vector& input_shapes) { + const auto inputs_count = input_shapes.size(); + NODE_SHAPE_INFER_CHECK(op, input_shapes, inputs_count == 1); + + int64_t axis = op->get_axis(); + std::vector split_lengths = {op->get_split_lengths(), -1}; + std::unordered_map const_data; + const_data.emplace(1, ov::Tensor(ov::element::i64, ov::Shape{}, &axis)); + const_data.emplace(2, ov::Tensor(ov::element::i64, ov::Shape{split_lengths.size()}, split_lengths.data())); + + const ov::Shape split_len_size{split_lengths.size()}; + const ov::Shape scalar{}; + std::vector variadic_split_input_shapes{input_shapes[0], scalar, split_len_size}; + + return {std::move( + ov::op::variadic_split::shape_infer(op, variadic_split_input_shapes, ov::make_tensor_accessor(const_data))[0])}; +} +} // namespace internal +} // namespace op +} // namespace ov diff --git a/src/core/shape_inference/include/variadic_split_shape_inference.hpp b/src/core/shape_inference/include/variadic_split_shape_inference.hpp index a0eff51f238e61..e0cd837003a331 100644 --- a/src/core/shape_inference/include/variadic_split_shape_inference.hpp +++ b/src/core/shape_inference/include/variadic_split_shape_inference.hpp @@ -10,10 +10,9 @@ namespace ov { namespace op { -namespace v1 { - +namespace variadic_split { template > -std::vector shape_infer(const VariadicSplit* op, +std::vector shape_infer(const Node* op, const std::vector& input_shapes, const ITensorAccessor& ta = make_tensor_accessor()) { constexpr bool is_dynamic_shape = std::is_base_of::value; @@ -120,6 +119,15 @@ std::vector shape_infer(const VariadicSplit* op, } return output_shapes; } +} // namespace variadic_split + +namespace v1 { +template > +std::vector shape_infer(const VariadicSplit* op, + const std::vector& input_shapes, + const ITensorAccessor& ta = make_tensor_accessor()) { + return op::variadic_split::shape_infer(op, input_shapes, ta); +} } // namespace v1 } // namespace op diff --git a/src/core/src/any.cpp b/src/core/src/any.cpp index 82dc01c99377fd..346819eced93e5 100644 --- a/src/core/src/any.cpp +++ b/src/core/src/any.cpp @@ -6,6 +6,17 @@ #include #include +namespace { +template +bool contains_type_index(Container&& types, const std::type_info& user_type) { + for (auto&& type : types) { + if (ov::util::equal(type, user_type)) { + return true; + } + } + return false; +} +} // namespace namespace ov { @@ -68,6 +79,48 @@ void Any::Base::read_to(Base& other) const { } } +bool Any::Base::is_base_type_info(const std::type_info& user_type) const { + return contains_type_index(base_type_info(), user_type); +} + +bool Any::Base::is_signed_integral() const { + return std::is_signed::value ? contains_type_index(std::initializer_list{typeid(char), + typeid(signed char), + typeid(short), + typeid(int), + typeid(long), + typeid(long long)}, + type_info()) + : contains_type_index(std::initializer_list{typeid(signed char), + typeid(short), + typeid(int), + typeid(long), + typeid(long long)}, + type_info()); +} + +bool Any::Base::is_unsigned_integral() const { + return std::is_signed::value + ? contains_type_index(std::initializer_list{typeid(unsigned char), + typeid(unsigned short), + typeid(unsigned int), + typeid(unsigned long), + typeid(unsigned long long)}, + type_info()) + : contains_type_index(std::initializer_list{typeid(char), + typeid(unsigned char), + typeid(unsigned short), + typeid(unsigned int), + typeid(unsigned long), + typeid(unsigned long long)}, + type_info()); +} +bool Any::Base::is_floating_point() const { + return contains_type_index( + std::initializer_list{typeid(float), typeid(double), typeid(long double)}, + type_info()); +} + Any::~Any() { _temp = {}; _impl = {}; @@ -293,4 +346,42 @@ void Write::operator()(std::ostream& os, const Any& any) const { } } // namespace util + +template +[[noreturn]] U Any::Base::convert_impl() const { + OPENVINO_THROW("Bad cast from: ", type_info().name(), " to: ", typeid(U).name()); +} + +template +U Any::Base::convert_impl() const { + return is() ? static_cast(as()) : convert_impl(); +} + +template <> +long long Any::Base::convert() const { + return std::is_signed::value ? convert_impl() + : convert_impl(); +} + +template <> +unsigned long long Any::Base::convert() const { + return std::is_signed::value ? convert_impl() + : convert_impl(); +} + +template <> +double Any::Base::convert() const { + return convert_impl(); +} } // namespace ov diff --git a/src/core/tests/any.cpp b/src/core/tests/any.cpp index 3914a617ff2982..33e928d60b872d 100644 --- a/src/core/tests/any.cpp +++ b/src/core/tests/any.cpp @@ -11,7 +11,8 @@ #include "common_test_utils/test_assertions.hpp" #include "openvino/core/runtime_attribute.hpp" -using namespace ov; +namespace ov { +namespace test { class DestructorTest { public: @@ -735,3 +736,70 @@ TEST_F(AnyTests, EmptyStringAsAny) { ASSERT_EQ(p.as>(), ref_f); ASSERT_EQ(p.as>(), ref_i); } + +template +class AnyConversionTest : public AnyTests {}; + +TYPED_TEST_SUITE_P(AnyConversionTest); + +using AnyArithmeticTypes = ::testing::Types; + +TYPED_TEST_P(AnyConversionTest, AnyToOtherValue) { + const TypeParam test_value{static_cast(23.15f)}; + const auto a = Any{test_value}; + + EXPECT_EQ(a.as(), static_cast(test_value)); + EXPECT_EQ(a.as(), static_cast(test_value)); + EXPECT_EQ(a.as(), static_cast(test_value)); + EXPECT_EQ(a.as(), static_cast(test_value)); + + EXPECT_EQ(a.as(), static_cast(test_value)); + EXPECT_EQ(a.as(), static_cast(test_value)); + EXPECT_EQ(a.as(), static_cast(test_value)); + EXPECT_EQ(a.as(), static_cast(test_value)); + EXPECT_EQ(a.as(), static_cast(test_value)); + + EXPECT_EQ(a.as(), static_cast(test_value)); + EXPECT_EQ(a.as(), static_cast(test_value)); +} + +REGISTER_TYPED_TEST_SUITE_P(AnyConversionTest, AnyToOtherValue); +INSTANTIATE_TYPED_TEST_SUITE_P(InstantiationName, AnyConversionTest, AnyArithmeticTypes); + +TEST_F(AnyTests, AnyAsOtherTypeIsIncosisoinet) { + // To show member `as` current behaviour. + // Maybe there should be two members `as` which return value + // and `cast` returns reference if casted type is same as Any underlying type + auto a = Any{10}; + + auto& a_int = a.as(); + auto& a_str = a.as(); + + EXPECT_EQ(a_int, 10); + EXPECT_EQ(a_str, "10"); + + a_int = 15; + EXPECT_EQ(a_int, 15); + // as string ref still has old value + EXPECT_EQ(a_str, "10"); + + a_str = "30"; + EXPECT_EQ(a_int, 15); + // as string ref has new value but is not in sync what any contains. + EXPECT_EQ(a_str, "30"); +} + +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp b/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp index b99e1bc62c4b11..2dccce257ae116 100644 --- a/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp @@ -56,6 +56,7 @@ #include "gather_nd_shape_inference.hpp" #include "gather_shape_inference.hpp" #include "gather_tree_shape_inference.hpp" +#include "glu_shape_inference.hpp" #include "grid_sample_shape_inference.hpp" #include "group_convolution_backprop_shape_inference.hpp" #include "group_convolution_shape_inference.hpp" @@ -575,6 +576,7 @@ const IStaticShapeInferFactory::TRegistry IStaticShapeInferFactory::registry{ _OV_OP_SHAPE_INFER_MASK_REG(ov::op::internal::AUGRUCell, ShapeInferTA, util::bit::mask()), _OV_OP_SHAPE_INFER_MASK_REG(ov::op::internal::AUGRUSequence, ShapeInferTA, util::bit::mask()), _OV_OP_SHAPE_INFER_MASK_REG(ov::op::internal::RMSNorm, ShapeInferTA, util::bit::mask(1)), + _OV_OP_SHAPE_INFER_MASK_REG(ov::op::internal::GLU, ShapeInferTA, util::bit::mask()), }; // clang-format on diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/glu_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/glu_shape_inference_test.cpp new file mode 100644 index 00000000000000..f7647d52dc5bae --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/glu_shape_inference_test.cpp @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_assertions.hpp" +#include "ov_ops/glu.hpp" +#include "utils.hpp" + +using namespace ov; +using namespace ov::intel_cpu; +using ov::op::v0::Constant; +using ov::op::v0::Parameter; +using testing::HasSubstr; + +TEST(StaticShapeInferenceTest, GLUStaticShapeInferenceTestDefaultCtor) { + constexpr int64_t axis = -1; + constexpr int64_t split_lengths = 48; + + const auto op = std::make_shared(); + const auto data = std::make_shared(element::f16, PartialShape::dynamic()); + + op->set_arguments(ov::OutputVector{data}); + op->set_axis(axis); + op->set_split_lengths(split_lengths); + + std::vector static_input_shapes = {StaticShape{20, 1, 96}}; + const auto static_output_shapes = shape_inference(op.get(), static_input_shapes); + ASSERT_EQ(static_output_shapes.size(), 1); + EXPECT_EQ(static_output_shapes[0], StaticShape({20, 1, 48})); +} + +TEST(StaticShapeInferenceTest, GLUStaticShapeInferenceTestBasic) { + constexpr int64_t axis = -1; + constexpr int64_t split_lengths = 48; + const auto glu_type = ov::op::internal::GLU::GluType::Swish; + + const auto data = std::make_shared(element::f16, PartialShape::dynamic()); + const auto op = std::make_shared(data, axis, split_lengths, glu_type, 1); + + std::vector static_input_shapes = {StaticShape{20, 1, 96}}; + const auto static_output_shapes = shape_inference(op.get(), static_input_shapes); + ASSERT_EQ(static_output_shapes.size(), 1); + EXPECT_EQ(static_output_shapes[0], StaticShape({20, 1, 48})); +} diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 6d2825a3e3e937..5680eedcb8f87c 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -623,6 +623,15 @@ void primitive_inst::realloc_if_needed(bool prev_execution_skipped) { _max_output_layout_count[j] = 0; } } else { + _outputs[0] = variable.get_memory(); + + if (auto compressed_cache_variable = dynamic_cast(&variable)) { + _outputs[2] = compressed_cache_variable->get_compression_scale_state()->get_memory(); + + if (compressed_cache_variable->has_zp_state()) { + _outputs[3] = compressed_cache_variable->get_compression_zp_state()->get_memory(); + } + } GPU_DEBUG_TRACE_DETAIL << id() << " : realloc_if_needed: can_be_optimized = false and memories are not being shared" << std::endl; } } else { diff --git a/src/plugins/intel_gpu/src/graph/swiglu.cpp b/src/plugins/intel_gpu/src/graph/swiglu.cpp index e82e4e974b1868..ffd5333318cee4 100644 --- a/src/plugins/intel_gpu/src/graph/swiglu.cpp +++ b/src/plugins/intel_gpu/src/graph/swiglu.cpp @@ -3,6 +3,7 @@ // #include "ov_ops/glu.hpp" +#include "glu_shape_inference.hpp" #include "swiglu_inst.h" #include "primitive_type_base.h" @@ -32,11 +33,7 @@ std::vector swiglu_inst::calc_output_layouts(swiglu_node const& /*node*/ op.set_axis(desc->axis); op.set_split_lengths(desc->split_lengths); - std::vector input_shapes = { - impl_param.get_input_layout(0).get(), - ShapeType(ov::Shape({})), - ShapeType(ov::Shape{2}) - }; + std::vector input_shapes = {impl_param.get_input_layout(0).get()}; std::vector output_shapes = shape_infer(&op, input_shapes); diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/resample_onnx.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/resample_onnx.cl index 3a7fb8be91954b..9f8f2ad5964bda 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/resample_onnx.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/resample_onnx.cl @@ -64,9 +64,6 @@ KERNEL (resample_onnx)(__global INPUT0_TYPE* input, const int in_size[5] = { INPUT0_BATCH_NUM, INPUT0_FEATURE_NUM, INPUT0_SIZE_Z, INPUT0_SIZE_Y, INPUT0_SIZE_X }; - if (feature_num >= OUTPUT_FEATURE_NUM) - return; - const int PADDED_Y = INPUT0_SIZE_Y + PADS_BEGIN[3] + PADS_END[3]; const int PADDED_X = INPUT0_SIZE_X + PADS_BEGIN[4] + PADS_END[4]; const ACCUMULATOR_TYPE iy = FUNC_CALL(get_original_coordinate)(y, SCALES[3], OUTPUT_SIZE_Y, PADDED_Y); diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp index 77477648fd4860..4945cc8d717be3 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp @@ -530,4 +530,81 @@ TEST_F(KVCacheTests, smoke_multipleIterations_stateful_with_set_state) { this->test_smoke_multipleIterations_stateful(false, true, true, 1, 2, ov::element::f16, 5, 1, true); } +class KVCacheIssueTests: public ::testing::Test { +public: + void test_smoke_conflicted_memory_for_two_inf_req() { + #if defined(ANDROID) + GTEST_SKIP(); + #endif + auto core = ov::test::utils::PluginCache::get().core(); + + ov::AnyMap properties = { + ov::hint::kv_cache_precision(ov::element::undefined) + }; + + const size_t n_batch = 1; + const size_t n_heads = 32; + const size_t n_features = 10; + const size_t context_size = 20; + ov::element::Type element_type = ov::element::f16; + + const bool stateful = true; + + auto model = tests::make_llm_kv_cache_pattern(n_batch, + n_heads, + n_features, + element_type, + 2, + stateful, + false, + stateful); + auto compiled_model = core->compile_model(model, ov::test::utils::DEVICE_GPU, properties); + + auto input0 = model->get_parameters().at(0); + auto input1 = model->get_parameters().at(1); + + auto ireq1 = compiled_model.create_infer_request(); + auto ireq2 = compiled_model.create_infer_request(); + + auto ireq1_input0 = ov::test::utils::create_and_fill_tensor_real_distribution(element_type, + {n_batch, context_size, n_heads, n_features}, -0.5f, 0.5f, 1); + auto ireq1_input1 = ov::test::utils::create_and_fill_tensor_real_distribution(element_type, + {n_batch, n_heads, context_size, context_size}, -0.5f, 0.5f, 1); + ireq1.set_tensor(input0, ireq1_input0); + ireq1.set_tensor(input1, ireq1_input1); + + auto ireq2_input0 = ov::test::utils::create_and_fill_tensor_real_distribution(element_type, + {n_batch, context_size + 1, n_heads, n_features}, -0.5f, 0.5f, 555); + auto ireq2_input1 = ov::test::utils::create_and_fill_tensor_real_distribution(element_type, + {n_batch, n_heads, context_size + 1, context_size + 1}, -0.5f, 0.5f, 555); + ireq2.set_tensor(input0, ireq2_input0); + ireq2.set_tensor(input1, ireq2_input1); + + std::stringstream oss1; + std::stringstream oss2; + for (auto&& state : ireq1.query_state()) { + state.reset(); + } + ireq1.infer(); + for (auto&& state : ireq1.query_state()) { + oss1.write(reinterpret_cast(state.get_state().data()), state.get_state().get_byte_size()); + } + + for (auto&& state : ireq2.query_state()) { + state.reset(); + } + ireq2.infer(); + for (auto&& state : ireq1.query_state()) { + oss2.write(reinterpret_cast(state.get_state().data()), state.get_state().get_byte_size()); + } + + ASSERT_TRUE(oss1.str() == oss2.str()); + } +}; + +TEST_F(KVCacheIssueTests, conflicted_memory_for_two_inf_req) { + this->test_smoke_conflicted_memory_for_two_inf_req(); +} + + } // namespace diff --git a/tools/ovc/openvino/tools/ovc/convert.py b/tools/ovc/openvino/tools/ovc/convert.py index 77693ad4be2ca1..1bd61ff567e5d0 100644 --- a/tools/ovc/openvino/tools/ovc/convert.py +++ b/tools/ovc/openvino/tools/ovc/convert.py @@ -85,7 +85,11 @@ def convert_model( list of paths, objects derived from BaseExtension class or lists of objects. :param verbose: - Print detailed information about conversion. + Print detailed information about conversion. The detailed information is logged via standard logging library. + The log level can be changed by setting the log level using logging library. + Example: + import logging + logging.getLogger().setLevel(logging.DEBUG) :param share_weights: Reuse weights allocated in the original model. If input model is in file, then mmap is used to allocate weights directly from file. If input model is diff --git a/tools/ovc/openvino/tools/ovc/convert_impl.py b/tools/ovc/openvino/tools/ovc/convert_impl.py index aef054f8aafc24..152ff03c28e71a 100644 --- a/tools/ovc/openvino/tools/ovc/convert_impl.py +++ b/tools/ovc/openvino/tools/ovc/convert_impl.py @@ -243,8 +243,6 @@ def check_model_object(argv): def driver(argv: argparse.Namespace, non_default_params: dict): - init_logger('ERROR', argv.verbose) - # Log dictionary with non-default cli parameters where complex classes are excluded. log.debug(str(non_default_params)) @@ -433,7 +431,11 @@ def _convert(cli_parser: argparse.ArgumentParser, args, python_api_used): telemetry.send_event('ovc', 'version', simplified_ie_version) # Initialize logger with 'ERROR' as default level to be able to form nice messages # before arg parser deliver log_level requested by user - init_logger('ERROR', False) + verbose = False + if "verbose" in args and args["verbose"] or "--verbose" in sys.argv: + verbose = True + + init_logger('ERROR', verbose, python_api_used) argv = None # Minimize modifications among other places in case if multiple pieces are passed as input_model if python_api_used: diff --git a/tools/ovc/openvino/tools/ovc/help.py b/tools/ovc/openvino/tools/ovc/help.py index e09102be39419e..4f312ef20be99c 100644 --- a/tools/ovc/openvino/tools/ovc/help.py +++ b/tools/ovc/openvino/tools/ovc/help.py @@ -43,4 +43,6 @@ def get_convert_model_help_specifics(): {'action': 'version', # FIXME: Why the following is not accessible from arg parser? 'version': 'OpenVINO Model Converter (ovc) {}'.format(VersionChecker().get_ie_version())}, + 'verbose': + {'description': 'Print detailed information about conversion.'} } diff --git a/tools/ovc/openvino/tools/ovc/logger.py b/tools/ovc/openvino/tools/ovc/logger.py index f3c24a8582d0a2..46bd043cf207b9 100644 --- a/tools/ovc/openvino/tools/ovc/logger.py +++ b/tools/ovc/openvino/tools/ovc/logger.py @@ -62,7 +62,10 @@ def filter(self, record: log.LogRecord): return True # if regex wasn't set print all logs -def init_logger(lvl: str, verbose: bool): +def init_logger(lvl: str, verbose: bool, python_api_used: bool): + if verbose and python_api_used: + # We need to not override logger in case of verbose=True to allow user set a log level + return global handler_num log_exp = os.environ.get('MO_LOG_PATTERN') if not verbose: