diff --git a/.github/dockerfiles/docker_tag b/.github/dockerfiles/docker_tag
index 5a4f7795ea4a44..3783a7e8d5600a 100644
--- a/.github/dockerfiles/docker_tag
+++ b/.github/dockerfiles/docker_tag
@@ -1 +1 @@
-pr-27384
+pr-27430
diff --git a/.github/dockerfiles/ov_build/manylinux2014_x86_64/Dockerfile b/.github/dockerfiles/ov_build/manylinux2014_x86_64/Dockerfile
new file mode 100644
index 00000000000000..59239575be329c
--- /dev/null
+++ b/.github/dockerfiles/ov_build/manylinux2014_x86_64/Dockerfile
@@ -0,0 +1,20 @@
+ARG REGISTRY="quay.io"
+FROM openvinogithubactions.azurecr.io/quayio/pypa/manylinux2014_x86_64
+
+USER root
+
+# Install build dependencies
+ADD install_build_dependencies.sh /install_build_dependencies.sh
+RUN chmod +x /install_build_dependencies.sh && /install_build_dependencies.sh
+
+# Install sscache
+ARG SCCACHE_VERSION="v0.7.5"
+ENV SCCACHE_HOME="/opt/sccache" \
+    SCCACHE_PATH="/opt/sccache/sccache"
+
+RUN mkdir ${SCCACHE_HOME} && cd ${SCCACHE_HOME} && \
+    SCCACHE_ARCHIVE="sccache-${SCCACHE_VERSION}-x86_64-unknown-linux-musl.tar.gz" && \
+    curl -SLO https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/${SCCACHE_ARCHIVE} && \
+    tar -xzf ${SCCACHE_ARCHIVE} --strip-components=1 && rm ${SCCACHE_ARCHIVE}
+
+ENV PATH="$SCCACHE_HOME:$PATH"
diff --git a/.github/dockerfiles/ov_build/ubuntu_22_04_x64_docker/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_22_04_x64_docker/Dockerfile
new file mode 100644
index 00000000000000..2d5bc1c878069a
--- /dev/null
+++ b/.github/dockerfiles/ov_build/ubuntu_22_04_x64_docker/Dockerfile
@@ -0,0 +1,42 @@
+ARG REGISTRY="docker.io"
+FROM ${REGISTRY}/library/ubuntu:22.04
+
+USER root
+
+# APT configuration
+RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf && \
+    echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf && \
+    echo 'APT::Get::Fix-Broken "true";' >> /etc/apt/apt.conf && \
+    echo 'APT::Get::no-install-recommends "true";' >> /etc/apt/apt.conf
+
+ENV DEBIAN_FRONTEND="noninteractive" \
+    TZ="Europe/London"
+
+RUN apt-get update && \
+    apt-get install software-properties-common && \
+    add-apt-repository --yes --no-update ppa:git-core/ppa && \
+    add-apt-repository --yes --no-update ppa:deadsnakes/ppa && \
+    apt-get update && \
+    apt-get install \
+        curl \
+        git \
+        gpg-agent \
+        tzdata \
+        # parallel gzip
+        pigz \
+        python3 \
+        python3-pip \
+        && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install docker
+RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | \
+    gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg && \
+    echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] \
+      https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | \
+      tee /etc/apt/sources.list.d/docker.list > /dev/null
+
+RUN apt-get update && \
+    apt-get install -y docker-ce docker-ce-cli containerd.io
+
+ENV DOCKER_BUILDKIT=1
\ No newline at end of file
diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml
index 29afb466d69a42..0de1708527739a 100644
--- a/.github/workflows/job_tensorflow_layer_tests.yml
+++ b/.github/workflows/job_tensorflow_layer_tests.yml
@@ -30,7 +30,7 @@ env:
 jobs:
   TensorFlow_Layer_Tests:
     name: TensorFlow Layer Tests
-    timeout-minutes: 30
+    timeout-minutes: 45
     runs-on: ${{ inputs.runner }}
     container: ${{ fromJSON(inputs.container) }}
     defaults:
diff --git a/.github/workflows/manylinux_2014.yml b/.github/workflows/manylinux_2014.yml
new file mode 100644
index 00000000000000..ed375fb868459f
--- /dev/null
+++ b/.github/workflows/manylinux_2014.yml
@@ -0,0 +1,191 @@
+name: Manylinux 2014
+on:
+  workflow_dispatch:
+  pull_request:
+  merge_group:
+  push:
+    branches:
+      - master
+      - 'releases/**'
+
+concurrency:
+  # github.ref is not unique in post-commit
+  group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-manylinux-2014
+  cancel-in-progress: true
+
+permissions: read-all
+
+env:
+  PIP_CACHE_PATH: /mount/caches/pip/linux
+
+jobs:
+  Smart_CI:
+    runs-on: ubuntu-latest
+    outputs:
+      affected_components: "${{ steps.smart_ci.outputs.affected_components }}"
+      changed_components: "${{ steps.smart_ci.outputs.changed_components }}"
+      skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}"
+    steps:
+      - name: checkout action
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          sparse-checkout: .github/actions/smart-ci
+
+      - name: Get affected components
+        id: smart_ci
+        uses: ./.github/actions/smart-ci
+        with:
+          repository: ${{ github.repository }}
+          pr: ${{ github.event.number }}
+          commit_sha: ${{ github.sha }}
+          ref_name: ${{ github.ref_name }}
+          component_pattern: "category: (.*)"
+          repo_token: ${{ secrets.GITHUB_TOKEN }}
+          skip_when_only_listed_labels_set: 'docs'
+          skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg'
+
+      - name: Show affected components
+        run: |
+          echo "${{ toJSON(steps.smart_ci.outputs.affected_components) }}"
+        shell: bash
+
+  Docker:
+    needs: Smart_CI
+    if: "!needs.smart_ci.outputs.skip_workflow"
+    runs-on: aks-linux-4-cores-16gb-docker-build
+    container:
+      image: openvinogithubactions.azurecr.io/docker_build:0.2
+      volumes:
+        - /mount:/mount
+    outputs:
+      images: "${{ steps.handle_docker.outputs.images }}"
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - uses: ./.github/actions/handle_docker
+        id: handle_docker
+        with:
+          images: |
+            ov_build/ubuntu_22_04_x64_docker
+            ov_build/manylinux2014_x86_64
+          registry: 'openvinogithubactions.azurecr.io'
+          dockerfiles_root_dir: '.github/dockerfiles'
+          changed_components: ${{ needs.smart_ci.outputs.changed_components }}
+          
+  Build:
+    needs: [Docker]
+    timeout-minutes: 120
+    defaults:
+      run:
+        shell: bash
+    runs-on: aks-linux-16-cores-32gb-manylinux
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
+    container:
+      image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_22_04_x64_docker }}
+      volumes:
+        - /mount:/mount
+      options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING -e DOCKER_CONFIG -v ${{ github.workspace }}:${{ github.workspace }}
+    env:
+      CMAKE_BUILD_TYPE: 'Release'
+      OPENVINO_REPO: ${{ github.workspace }}/src
+      INSTALL_DIR: ${{ github.workspace }}/install/openvino
+      INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels
+      BUILD_DIR: ${{ github.workspace }}/build
+      DOCKER_CONFIG: "/mount/.docker"
+      CMAKE_CXX_COMPILER_LAUNCHER: sccache
+      CMAKE_C_COMPILER_LAUNCHER: sccache
+      SCCACHE_IGNORE_SERVER_IO_ERROR: 1
+      SCCACHE_SERVER_PORT: 35555
+      SCCACHE_CACHE_SIZE: 50G
+      SCCACHE_AZURE_KEY_PREFIX: manylinux_2014
+      
+    steps:
+      - name: Clone OpenVINO
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          path: ${{ env.OPENVINO_REPO }}
+          submodules: 'true'
+
+      - name: System info
+        uses: ./src/.github/actions/system_info
+
+      - name: Create docker build cache
+        run: |
+          docker volume create ov_build_cache
+          
+      - name: Build OpenVINO
+        run: |
+          docker run --rm \
+            -v ${{ env.OPENVINO_REPO }}:/work/src \
+            -v ov_build_cache:/work/build \
+            -v ${{ env.INSTALL_DIR }}:/work/install \
+            -e SCCACHE_AZURE_BLOB_CONTAINER \
+            -e SCCACHE_AZURE_CONNECTION_STRING \
+            -e SCCACHE_SERVER_PORT \
+            -e SCCACHE_IGNORE_SERVER_IO_ERROR \
+            -e SCCACHE_CACHE_SIZE \
+            -e SCCACHE_AZURE_KEY_PREFIX \
+            -e CMAKE_CXX_COMPILER_LAUNCHER \
+            -e CMAKE_C_COMPILER_LAUNCHER \
+            -w /work/src \
+            ${{ fromJSON(needs.docker.outputs.images).ov_build.manylinux2014_x86_64 }} \
+            /bin/bash -c "
+              cmake -DENABLE_CPPLINT=OFF -DENABLE_NCC_STYLE=OFF -DCMAKE_VERBOSE_MAKEFILE=ON -DENABLE_PYTHON=OFF -DENABLE_WHEEL=OFF -S /work/src -B /work/build &&
+              cmake --build /work/build --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} &&
+              cmake --install /work/build --config ${{ env.CMAKE_BUILD_TYPE }} --prefix /work/install
+            "
+            
+      - name: Pack Artifacts
+        run: mkdir -p ${{ env.BUILD_DIR }} && tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_package.tar.gz
+        working-directory: ${{ env.INSTALL_DIR }}
+            
+      - name: Build Python API(Python 3.9-3.13)
+        run: |
+          SUPPORTED_PYTHON_VERSIONS=("39" "310" "311" "312" "313")
+          for PY_VER in "${SUPPORTED_PYTHON_VERSIONS[@]}"; do
+            python_path=/opt/python/cp${PY_VER}-cp${PY_VER}/bin
+            docker run --rm \
+              -v ${{ env.OPENVINO_REPO }}:/work/src \
+              -v ${{ env.INSTALL_WHEELS_DIR }}:/work/wheels \
+              -v ${{ env.PIP_CACHE_PATH }}:/work/pip_cache \
+              -v ov_build_cache:/work/build \
+              -e SCCACHE_AZURE_BLOB_CONTAINER \
+              -e SCCACHE_AZURE_CONNECTION_STRING \
+              -e SCCACHE_SERVER_PORT \
+              -e SCCACHE_IGNORE_SERVER_IO_ERROR \
+              -e SCCACHE_CACHE_SIZE \
+              -e SCCACHE_AZURE_KEY_PREFIX \
+              -e CMAKE_CXX_COMPILER_LAUNCHER \
+              -e CMAKE_C_COMPILER_LAUNCHER \
+              -w /work/src \
+              ${{ fromJSON(needs.docker.outputs.images).ov_build.manylinux2014_x86_64 }} \
+              /bin/bash -c "
+                export PATH=${python_path}:\$PATH
+                PIP_VER=$(python3 -c "import pip; print(pip.__version__)")
+                export "PIP_CACHE_DIR=/work/pip_cache/${PIP_VER}"
+                python3 -m pip install -r /work/src/src/bindings/python/wheel/requirements-dev.txt &&
+                cmake -DOpenVINODeveloperPackage_DIR=/work/build -DENABLE_PYTHON=ON -DENABLE_WHEEL=ON -S /work/src/src/bindings/python -B /work/build_py${PY_VER} &&
+                cmake --build /work/build_py${PY_VER} --parallel $(nproc) --target ie_wheel --config ${{ env.CMAKE_BUILD_TYPE }} &&
+                cmake --install /work/build_py${PY_VER} --config ${{ env.CMAKE_BUILD_TYPE }} --prefix /work/wheels --component python_wheels
+              "
+          done
+
+      #
+      # Upload build artifacts
+      #
+      - name: Upload openvino package
+        if: ${{ always() }}
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
+        with:
+          name: openvino_package
+          path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz
+          if-no-files-found: 'error'
+      
+      - name: Upload openvino wheels
+        if: ${{ always() }}
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
+        with:
+          name: openvino_wheels
+          path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl
+          if-no-files-found: 'error'
\ No newline at end of file
diff --git a/README.md b/README.md
index c37f2ef42b9785..695f84f1628118 100644
--- a/README.md
+++ b/README.md
@@ -34,9 +34,11 @@ Check [system requirements](https://docs.openvino.ai/2024/about-openvino/system-
 [OpenVINO Quickstart example](https://docs.openvino.ai/2024/get-started.html) will walk you through the basics of deploying your first model.
 
 Learn how to optimize and deploy popular models with the [OpenVINO Notebooks](https://github.com/openvinotoolkit/openvino_notebooks)📚:
-- [Create an LLM-powered Chatbot using OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llm-chatbot/llm-chatbot.ipynb)
-- [YOLOv8 Optimization](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/quantizing-model-with-accuracy-control/yolov8-quantization-with-accuracy-control.ipynb)
-- [Text-to-Image Generation](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/controlnet-stable-diffusion/controlnet-stable-diffusion.ipynb)
+- [Create an LLM-powered Chatbot using OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llm-chatbot/llm-chatbot-generate-api.ipynb)
+- [YOLOv11 Optimization](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/yolov11-optimization/yolov11-object-detection.ipynb)
+- [Text-to-Image Generation](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/text-to-image-genai/text-to-image-genai.ipynb)
+- [Multimodal assistant with LLaVa and OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb)
+- [Automatic speech recognition using Whisper and OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/whisper-asr-genai/whisper-asr-genai.ipynb)
 
 Here are easy-to-follow code examples demonstrating how to run PyTorch and TensorFlow model inference using OpenVINO:
 
diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst
index 75d1882b8cee89..75c7ba90db7e76 100644
--- a/docs/articles_en/about-openvino/performance-benchmarks.rst
+++ b/docs/articles_en/about-openvino/performance-benchmarks.rst
@@ -64,7 +64,7 @@ implemented in your solutions. Click the buttons below to see the chosen benchma
          :outline:
          :expand:
 
-         :material-regular:`bar_chart;1.4em` OVMS for GenAI (incoming)
+         :material-regular:`bar_chart;1.4em` OVMS for GenAI
 
 
 
diff --git a/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst b/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst
index c55d3f44451f1c..4bf0b3a0acb19a 100644
--- a/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst
+++ b/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst
@@ -58,11 +58,11 @@ Performance Information F.A.Q.
         - Hugginface
         - Causal Decoder-only
         - 2048
-      * - `Llama-2-7b-chat <https://ai.meta.com/llama/>`__
+      * - `Llama-2-7b-chat <https://www.llama.com/>`__
         - Meta AI
         - Auto regressive language
         - 4096
-      * - `Llama-3-8b <https://ai.meta.com/llama/>`__
+      * - `Llama-3-8b <https://www.llama.com/>`__
         - Meta AI
         - Auto regressive language
         - 8192
@@ -74,7 +74,7 @@ Performance Information F.A.Q.
         - Huggingface
         - Auto regressive language
         - 4096
-      * - `Stable-Diffusion-V1-5 <https://https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5>`__
+      * - `Stable-Diffusion-V1-5 <https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5>`__
         - Hugginface
         - Latent Diffusion Model
         - 77
@@ -118,7 +118,7 @@ Performance Information F.A.Q.
         - YOLO V5 Medium
         - object detection
         - 640x640
-      * - `yolov8n <https://https://github.com/ultralytics/ultralytics>`__
+      * - `yolov8n <https://github.com/ultralytics/ultralytics>`__
         - Yolov8nano
         - object detection
         - 608x608
diff --git a/docs/articles_en/about-openvino/release-notes-openvino.rst b/docs/articles_en/about-openvino/release-notes-openvino.rst
index 4bd0b5d32c0f0e..6685a4325d57fe 100644
--- a/docs/articles_en/about-openvino/release-notes-openvino.rst
+++ b/docs/articles_en/about-openvino/release-notes-openvino.rst
@@ -943,7 +943,7 @@ Previous 2024 releases
      deployed in an arbitrary path without any code changes.
    * KServe REST API support has been extended to properly handle the string format in JSON body,
      just like the binary format compatible with NVIDIA Triton™.
-   * `A demo showcasing a full RAG algorithm <https://github.com/openvinotoolkit/model_server/tree/main/demos/python_demos/rag_chatbot>`__
+   * `A demo showcasing a full RAG algorithm <https://github.com/openvinotoolkit/model_server/tree/releases/2024/3/demos/python_demos/rag_chatbot>`__
      fully delegated to the model server has been added.
 
    **Neural Network Compression Framework**
@@ -1000,7 +1000,7 @@ Previous 2024 releases
    * `RMBG background removal <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/rmbg-background-removal/rmbg-background-removal.ipynb>`__
    * `AnimateAnyone: pose guided image to video generation <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/animate-anyone/animate-anyone.ipynb>`__
    * `LLaVA-Next visual-language assistant <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llava-next-multimodal-chatbot/llava-next-multimodal-chatbot.ipynb>`__
-   * `TripoSR: single image 3d reconstruction <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/triposr-3d-reconstruction/triposr-3d-reconstruction.ipynb>`__
+   * `TripoSR: single image 3d reconstruction <https://github.com/openvinotoolkit/openvino_notebooks/blob/2024.4/notebooks/triposr-3d-reconstruction/triposr-3d-reconstruction.ipynb>`__
    * `RAG system with OpenVINO and LangChain <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb>`__
 
    *Known Issues*
@@ -1309,7 +1309,7 @@ Discontinued in 2024
   * `Accuracy Checker <https://github.com/openvinotoolkit/open_model_zoo/blob/master/tools/accuracy_checker/README.md>`__.
   * `Post-Training Optimization Tool <https://docs.openvino.ai/2023.3/pot_introduction.html>`__
     (POT). Neural Network Compression Framework (NNCF) should be used instead.
-  * A `Git patch <https://github.com/openvinotoolkit/nncf/tree/develop/third_party_integration/huggingface_transformers>`__
+  * A `Git patch <https://github.com/openvinotoolkit/nncf/tree/release_v281/third_party_integration/huggingface_transformers>`__
     for NNCF integration with `huggingface/transformers <https://github.com/huggingface/transformers>`__.
     The recommended approach is to use `huggingface/optimum-intel <https://github.com/huggingface/optimum-intel>`__
     for applying NNCF optimization on top of models from Hugging Face.
@@ -1360,25 +1360,25 @@ Deprecated and to be removed in the future
        * See alternative: `PaddleOCR with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/paddle-ocr-webcam>`__,
        * See alternative: `Handwritten Text Recognition Demo <https://github.com/openvinotoolkit/open_model_zoo/blob/master/demos/handwritten_text_recognition_demo/python/README.md>`__
 
-     * `Image In-painting with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/image-inpainting>`__
+     * `Image In-painting with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/image-inpainting>`__
 
        * See alternative: `Image Inpainting Python Demo <https://github.com/openvinotoolkit/open_model_zoo/blob/master/demos/image_inpainting_demo/python/README.md>`__
 
-     * `Interactive Machine Translation with OpenVINO <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/machine-translation>`__
+     * `Interactive Machine Translation with OpenVINO <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/machine-translation>`__
 
        * See alternative: `Machine Translation Python* Demo <https://github.com/openvinotoolkit/open_model_zoo/blob/master/demos/machine_translation_demo/python/README.md>`__
 
-     * `Open Model Zoo Tools Tutorial <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/model-tools>`__
+     * `Open Model Zoo Tools Tutorial <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/model-tools>`__
 
        * No alternatives, demonstrates deprecated tools.
 
-     * `Super Resolution with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/vision-superresolution>`__
+     * `Super Resolution with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/vision-superresolution>`__
 
        * See alternative: `Super Resolution with PaddleGAN and OpenVINO <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/vision-paddlegan-superresolution>`__
        * See alternative:  `Image Processing C++ Demo <https://github.com/openvinotoolkit/open_model_zoo/blob/master/demos/image_processing_demo/cpp/README.md>`__
 
-     * `Image Colorization with OpenVINO Tutorial <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/vision-image-colorization>`__
-     * `Interactive Question Answering with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/question-answering>`__
+     * `Image Colorization with OpenVINO Tutorial <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.2/notebooks/vision-image-colorization>`__
+     * `Interactive Question Answering with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/question-answering>`__
 
        * See alternative: `BERT Question Answering Embedding Python* Demo <https://github.com/openvinotoolkit/open_model_zoo/blob/master/demos/bert_question_answering_embedding_demo/python/README.md>`__
        * See alternative:  `BERT Question Answering Python* Demo <https://github.com/openvinotoolkit/open_model_zoo/blob/master/demos/bert_question_answering_demo/python/README.md>`__
@@ -1387,37 +1387,37 @@ Deprecated and to be removed in the future
 
        * See alternative: `Security Barrier Camera C++ Demo  <https://github.com/openvinotoolkit/open_model_zoo/blob/master/demos/security_barrier_camera_demo/cpp/README.md>`__
 
-     * `The attention center model with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/attention-center>`_
-     * `Image Generation with DeciDiffusion <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/decidiffusion-image-generation>`_
-     * `Image generation with DeepFloyd IF and OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/deepfloyd-if>`_
-     * `Depth estimation using VI-depth with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/depth-estimation-videpth>`_
+     * `The attention center model with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/attention-center>`_
+     * `Image Generation with DeciDiffusion <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.2/notebooks/decidiffusion-image-generation>`_
+     * `Image generation with DeepFloyd IF and OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/deepfloyd-if>`_
+     * `Depth estimation using VI-depth with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/depth-estimation-videpth>`_
      * `Instruction following using Databricks Dolly 2.0 and OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/dolly-2-instruction-following>`_
 
        * See alternative: `LLM Instruction-following pipeline with OpenVINO <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-question-answering>`__
 
-     * `Image generation with FastComposer and OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/fastcomposer-image-generation>`__
+     * `Image generation with FastComposer and OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/fastcomposer-image-generation>`__
      * `Video Subtitle Generation with OpenAI Whisper  <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/whisper-subtitles-generation>`__
 
        * See alternative: `Automatic speech recognition using Distil-Whisper and OpenVINO <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/distil-whisper-asr/distil-whisper-asr.ipynb>`__
 
-     * `Introduction to Performance Tricks in OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/performance-tricks>`__
-     * `Speaker Diarization with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/pyannote-speaker-diarization>`__
-     * `Subject-driven image generation and editing using BLIP Diffusion and OpenVINO  <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/blip-diffusion-subject-generation>`__
-     * `Text Prediction with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/text-prediction>`__
-     * `Training to Deployment with TensorFlow and OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/tensorflow-training-openvino>`__
-     * `Speech to Text with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/speech-to-text>`__
-     * `Convert and Optimize YOLOv7 with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/yolov7-optimization>`__
-     * `Quantize Data2Vec Speech Recognition Model using NNCF PTQ API <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/speech-recognition-quantization/speech-recognition-quantization-data2vec.ipynb>`__
+     * `Introduction to Performance Tricks in OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/performance-tricks>`__
+     * `Speaker Diarization with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.2/notebooks/pyannote-speaker-diarization>`__
+     * `Subject-driven image generation and editing using BLIP Diffusion and OpenVINO  <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/blip-diffusion-subject-generation>`__
+     * `Text Prediction with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/text-prediction>`__
+     * `Training to Deployment with TensorFlow and OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/tensorflow-training-openvino>`__
+     * `Speech to Text with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.2/notebooks/speech-to-text>`__
+     * `Convert and Optimize YOLOv7 with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.3/notebooks/yolov7-optimization>`__
+     * `Quantize Data2Vec Speech Recognition Model using NNCF PTQ API <https://github.com/openvinotoolkit/openvino_notebooks/blob/2024.2/notebooks/speech-recognition-quantization/speech-recognition-quantization-data2vec.ipynb>`__
 
        * See alternative: `Quantize Speech Recognition Models with accuracy control using NNCF PTQ API <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/quantizing-model-with-accuracy-control/speech-recognition-quantization-wav2vec2.ipynb>`__
 
-     * `Semantic segmentation with LRASPP MobileNet v3 and OpenVINO <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/torchvision-zoo-to-openvino/lraspp-segmentation.ipynb>`__
-     * `Video Recognition using SlowFast and OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/slowfast-video-recognition>`__
+     * `Semantic segmentation with LRASPP MobileNet v3 and OpenVINO <https://github.com/openvinotoolkit/openvino_notebooks/blob/2024.1/notebooks/torchvision-zoo-to-openvino/lraspp-segmentation.ipynb>`__
+     * `Video Recognition using SlowFast and OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/slowfast-video-recognition>`__
 
        * See alternative: `Live Action Recognition with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/action-recognition-webcam>`__
 
-     * `Semantic Segmentation with OpenVINO™ using Segmenter <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/segmenter-semantic-segmentation>`__
-     * `Programming Language Classification with OpenVINO <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/code-language-id>`__
+     * `Semantic Segmentation with OpenVINO™ using Segmenter <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/segmenter-semantic-segmentation>`__
+     * `Programming Language Classification with OpenVINO <https://github.com/openvinotoolkit/openvino_notebooks/tree/2024.1/notebooks/code-language-id>`__
      * `Stable Diffusion Text-to-Image Demo <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/stable-diffusion-v2/stable-diffusion-v2-text-to-image-demo.ipynb>`__
 
        * See alternative: `Stable Diffusion v2.1 using Optimum-Intel OpenVINO and multiple Intel Hardware <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/stable-diffusion-v2/stable-diffusion-v2-optimum-demo.ipynb>`__
@@ -1426,10 +1426,10 @@ Deprecated and to be removed in the future
 
        * See alternative: `Stable Diffusion v2.1 using Optimum-Intel OpenVINO and multiple Intel Hardware <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/stable-diffusion-v2/stable-diffusion-v2-optimum-demo.ipynb>`__
 
-     * `Image generation with Segmind Stable Diffusion 1B (SSD-1B) model and OpenVINO <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/stable-diffusion-xl/ssd-b1.ipynb>`__
-     * `Data Preparation for 2D Medical Imaging <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/ct-segmentation-quantize/data-preparation-ct-scan.ipynb>`__
-     * `Train a Kidney Segmentation Model with MONAI and PyTorch Lightning <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/ct-segmentation-quantize/pytorch-monai-training.ipynb>`__
-     * `Live Inference and Benchmark CT-scan Data with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/ct-segmentation-quantize/ct-scan-live-inference.ipynb>`__
+     * `Image generation with Segmind Stable Diffusion 1B (SSD-1B) model and OpenVINO <https://github.com/openvinotoolkit/openvino_notebooks/blob/2024.1/notebooks/stable-diffusion-xl/ssd-b1.ipynb>`__
+     * `Data Preparation for 2D Medical Imaging <https://github.com/openvinotoolkit/openvino_notebooks/blob/2024.1/notebooks/ct-segmentation-quantize/data-preparation-ct-scan.ipynb>`__
+     * `Train a Kidney Segmentation Model with MONAI and PyTorch Lightning <https://github.com/openvinotoolkit/openvino_notebooks/blob/2024.1/notebooks/ct-segmentation-quantize/pytorch-monai-training.ipynb>`__
+     * `Live Inference and Benchmark CT-scan Data with OpenVINO™ <https://github.com/openvinotoolkit/openvino_notebooks/blob/2024.1/notebooks/ct-segmentation-quantize/ct-scan-live-inference.ipynb>`__
 
        * See alternative: `Quantize a Segmentation Model and Show Live Inference <https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/ct-segmentation-quantize/ct-segmentation-quantize-nncf.ipynb>`__
 
@@ -1458,7 +1458,7 @@ are available on request.
 
 Intel technologies' features and benefits depend on system configuration and may require
 enabled hardware, software or service activation. Learn more at
-`www.intel.com <http://www.intel.com/>`__
+`www.intel.com <https://www.intel.com/>`__
 or from the OEM or retailer.
 
 No computer system can be absolutely secure.
diff --git a/docs/articles_en/documentation/legacy-features.rst b/docs/articles_en/documentation/legacy-features.rst
index f859a3a4572f88..2457d28cf24c15 100644
--- a/docs/articles_en/documentation/legacy-features.rst
+++ b/docs/articles_en/documentation/legacy-features.rst
@@ -96,7 +96,7 @@ Discontinued:
 
    |   *New solution:* API 2.0 launched in OpenVINO 2022.1
    |   *Old solution:* discontinued with OpenVINO 2024.0
-   |      `The last version supporting API 1.0 <https://docs.openvino.ai/2023.2/openvino_2_0_transition_guide.html>`__
+   |      `2023.2 is the last version supporting API 1.0 <https://docs.openvino.ai/archives/index.html#:~:text=2023.2,Release%20Notes>`__
 
 .. dropdown:: Compile tool
 
diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst
index b5d3c08b39f480..fb9f41c755d4fb 100644
--- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst
+++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst
@@ -120,7 +120,7 @@ Here are code examples of how to use these methods with different model formats:
 
               For more details on conversion, refer to the
               :doc:`guide <[legacy]-supported-model-formats/[legacy]-convert-tensorflow>`
-              and an example `tutorial <https://docs.openvino.ai/2024/notebooks/tensorflow-to-openvino-with-output.html>`__
+              and an example `tutorial <https://docs.openvino.ai/2024/notebooks/tensorflow-classification-to-openvino-with-output.html>`__
               on this topic.
 
             * The ``read_model()`` and ``compile_model()`` methods:
@@ -592,7 +592,7 @@ to OpenVINO IR or ONNX before running inference should be considered the default
    OpenVINO versions of 2023 are mostly compatible with the old instructions,
    through a deprecated MO tool, installed with the deprecated OpenVINO Developer Tools package.
 
-   `OpenVINO 2023.0 <https://docs.openvino.ai/archive/2023.0/Supported_Model_Formats.html>`__ is the last
+   `OpenVINO 2023.0 <https://docs.openvino.ai/archives/index.html>`__ is the last
    release officially supporting the MO conversion process for the legacy formats.
 
 
diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst
index 711a060b7467b8..7880b261c80b81 100644
--- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst
+++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst
@@ -14,7 +14,7 @@ Converting an ONNX Faster R-CNN Model
 
 The instructions below are applicable **only** to the Faster R-CNN model converted to the ONNX file format from the `maskrcnn-benchmark model <https://github.com/facebookresearch/maskrcnn-benchmark>`__:
 
-1. Download the pretrained model file from `onnx/models <https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/faster-rcnn>`__ (commit-SHA: 8883e49e68de7b43e263d56b9ed156dfa1e03117).
+1. Download the pretrained model file from `onnx/models <https://github.com/onnx/models/tree/main/validated/vision/object_detection_segmentation/faster-rcnn>`__ (commit-SHA: 8883e49e68de7b43e263d56b9ed156dfa1e03117).
 
 2. Generate the Intermediate Representation of the model, by changing your current working directory to the model conversion API installation directory, and running model conversion with the following parameters:
 
diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst
index 84392e92e620d2..4c10c941c7fb47 100644
--- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst
+++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst
@@ -12,7 +12,7 @@ Converting an ONNX GPT-2 Model
 
    This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`.
 
-`Public pre-trained GPT-2 model <https://github.com/onnx/models/tree/master/text/machine_comprehension/gpt-2>`__ is a large
+`Public pre-trained GPT-2 model <https://github.com/onnx/models/tree/main/validated/text/machine_comprehension/gpt-2>`__ is a large
 transformer-based language model with a simple objective: predict the next word, given all of the previous words within some text.
 
 Downloading the Pre-Trained Base GPT-2 Model
diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst
index de3af8ce5175f0..f1ee885dae0b26 100644
--- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst
+++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst
@@ -20,7 +20,7 @@ Downloading the Pre-trained QuartzNet Model
 To download the pre-trained model, refer to the `NeMo Speech Models Catalog <https://ngc.nvidia.com/catalog/models/nvidia:nemospeechmodels>`__.
 Here are the instructions on how to obtain QuartzNet in ONNX format.
 
-1. Install the NeMo toolkit, using the `instructions <https://github.com/NVIDIA/NeMo/tree/main#installation>`__.
+1. Install the NeMo toolkit, using the `instructions <https://github.com/NVIDIA/NeMo/blob/main/README.md#install-nemo-framework>`__.
 
 2. Run the following code:
 
diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst
index 4f33e510a40267..ad646568aed598 100644
--- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst
+++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst
@@ -44,7 +44,7 @@ For UNIX-like systems, you can use ``wget``:
 
 
 The link was taken from ``setup.sh`` in the ``speech_recoginitin/rnnt`` subfolder. You will get exactly the same weights as
-if you were following the `guide <https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt>`__.
+if you were following the `guide <https://github.com/mlcommons/inference/tree/master/retired_benchmarks/speech_recognition/rnnt>`__.
 
 **Step 4**. Install required Python packages:
 
diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst
index 955d5418d37270..2bcb6fde9b833b 100644
--- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst
+++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst
@@ -59,7 +59,7 @@ To convert such TensorFlow model, run the `mo` script with a path to the MetaGra
 
 
 3. **SavedModel format**. In this case, a model consists of a special directory with a ``.pb`` file
-and several subfolders: ``variables``, ``assets``, and ``assets.extra``. For more information about the SavedModel directory, refer to the `README <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/saved_model#components>`__ file in the TensorFlow repository.
+and several subfolders: ``variables``, ``assets``, and ``assets.extra``. For more information about the SavedModel directory, refer to the `README <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/saved_model>`__ file in the TensorFlow repository.
 To convert such TensorFlow model, run the ``mo`` script with a path to the SavedModel directory:
 
 .. code-block:: sh
diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst
index fc78b12640771a..3d2365f45ffe3b 100644
--- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst
+++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst
@@ -160,7 +160,7 @@ It is important to mention that sometimes it seems like transformation cannot be
 because the actual values of inputs or shapes are needed. In fact, manipulations of shapes or values can be implemented
 using operations that are added to the graph. Consider the
 ``extensions/front/onnx/flattenONNX_to_reshape.py`` transformation, which replaces an ONNX
-`Flatten <https://github.com/onnx/onnx/blob/master/docs/Operators.md#Flatten>`__ operation with a sub-graph of operations performing
+`Flatten <https://github.com/onnx/onnx/blob/main/docs/Operators.md#Flatten>`__ operation with a sub-graph of operations performing
 the following (when ``axis`` is not equal to 0 and 1):
 
 1. Calculate a shape of the ``Flatten`` input tensor, using the :doc:`ShapeOf <../../openvino-ir-format/operation-sets/operation-specs/shape/shape-of-3>` operation.
diff --git a/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst b/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst
index 2d5598a5eb8e9d..3959ebefb09a4a 100644
--- a/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst
+++ b/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst
@@ -580,7 +580,7 @@ Building OpenVINO™ Security Add-on depends on OpenVINO™ Model Server docker
 
 1. Download the `OpenVINO™ Model Server software <https://github.com/openvinotoolkit/model_server>`__
 
-2. Build the `OpenVINO™ Model Server Docker images <https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md>`__
+2. Build the `OpenVINO™ Model Server Docker images <https://github.com/openvinotoolkit/model_server/blob/main/docs/developer_guide.md#step-1-compile-source-code>`__
 
    .. code-block:: sh
 
diff --git a/docs/articles_en/documentation/openvino-ecosystem/openvino-training-extensions.rst b/docs/articles_en/documentation/openvino-ecosystem/openvino-training-extensions.rst
index a7a81acd9ba3a7..8a5bd91f9c1b7b 100644
--- a/docs/articles_en/documentation/openvino-ecosystem/openvino-training-extensions.rst
+++ b/docs/articles_en/documentation/openvino-ecosystem/openvino-training-extensions.rst
@@ -32,9 +32,9 @@ If the results are unsatisfactory, add datasets and perform the same steps, star
 OpenVINO Training Extensions Components
 #######################################
 
-* `OpenVINO Training Extensions API <https://github.com/openvinotoolkit/training_extensions/tree/develop/src/otx/api>`__
+* `OpenVINO Training Extensions API <https://github.com/openvinotoolkit/training_extensions/tree/develop/src/otx/>`__
 * `OpenVINO Training Extensions CLI <https://github.com/openvinotoolkit/training_extensions/tree/develop/src/otx/cli>`__
-* `OpenVINO Training Extensions Algorithms <https://github.com/openvinotoolkit/training_extensions/tree/develop/src/otx/algorithms>`__
+* `OpenVINO Training Extensions Algorithms <https://github.com/openvinotoolkit/training_extensions/tree/develop/src/otx/algo>`__
 
 Tutorials
 #########
diff --git a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst
index 6ba9e0a9b60f52..9451fabd6219d8 100644
--- a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst
+++ b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst
@@ -35,7 +35,7 @@ The goal of Low Precision Transformations (LPT) is to transform a quantized mode
 
 As result, operation input tensor precisions will be changed from original to low precision and operations can be inferred by OpenVINO™ plugin in low precision.
 
-For a more detailed description on how to quantize a model, see the `Low precision tools <#low-precision-tools>`__ section below. For more information about model quantization, refer to **Brief History of Lower Precision in Deep Learning** section in `this whitepaper <https://software.intel.com/en-us/articles/lower-numerical-precision-deep-learning-inference-and-training>`__.
+For a more detailed description on how to quantize a model, see the `Low precision tools <#low-precision-tools>`__ section below. For more information about model quantization, refer to **Brief History of Lower Precision in Deep Learning** section in `this whitepaper <https://www.intel.com/content/dam/develop/external/us/en/documents/lower-numerical-precision-deep-learning-jan2018-754765.pdf>`__.
 
 Input model requirements
 ########################
diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/infrastructure/loop-5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/infrastructure/loop-5.rst
index 5cc1b024f158b1..f02c5414ac4369 100644
--- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/infrastructure/loop-5.rst
+++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/infrastructure/loop-5.rst
@@ -11,7 +11,7 @@ Loop
 **Category**: *Infrastructure*
 
 **Short description**: *Loop* operation performs recurrent execution of the network, which is described in the ``body``, iterating through the data.
-The operation has similar semantic to the ONNX Loop `operation <https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Loop-13>`__.
+The operation has similar semantic to the ONNX Loop `operation <https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Loop-13>`__.
 
 **Detailed description**
 
@@ -73,7 +73,7 @@ Loop operation description in the IR also has several special sections: ``body``
 1. The body operation getting an input from the main graph should have an entry in the ``port_map`` section of the Loop operation. These edges connect input ports of the Loop with the body ``Parameter``\ s.
 2. Input tensors to the Loop can be sliced along a specified axis, the Loop can iterates over all sliced parts. The corresponding ``input`` entry in the ``port_map`` should have ``axis`` attribute specifying the axis to slice. Therefore, inputs to the Loop operation corresponding to ``input`` entries in the ``port_map`` without ``axis`` attribute are used "as is" (without slicing).
 3. The body operation producing tensor to be used in the subsequent iterations (like in RNN models) should have a back edge described in the ``back_edges`` section of the operation. The back edge connects the respective body ``Parameter`` and ``Result`` operations. For such a case the Loop operation node provides input for the first iteration, while corresponding Loop operation output produces the tensor computed during the last iteration.
-4. Output tensors produced by a particular body operation across all iterations can be concatenated and returned as a Loop operation output (this is a "scan output" according to the ONNX* Loop operation `specification <https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Loop-13>`__ ). The corresponding ``output`` entry in the ``port_map`` should have ``axis`` attribute specifying the axis to concatenate. Therefore, outputs from operations corresponding to ``output`` entries in the ``port_map`` without ``axis`` attribute are returned "as is" (without concatenation).
+4. Output tensors produced by a particular body operation across all iterations can be concatenated and returned as a Loop operation output (this is a "scan output" according to the ONNX* Loop operation `specification <https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Loop-13>`__ ). The corresponding ``output`` entry in the ``port_map`` should have ``axis`` attribute specifying the axis to concatenate. Therefore, outputs from operations corresponding to ``output`` entries in the ``port_map`` without ``axis`` attribute are returned "as is" (without concatenation).
 5. There is one body ``Parameter`` operation not connected through the ``port_map``. This is a "current iteration" input. The Loop operation is responsible for providing the appropriate value for each iteration.
 6. Connection of nodes inside the Loop body with the main graph should be done through ``Parameter`` and ``Result`` body operations. No other ways to connect graphs are allowed.
 
diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-cell-3.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-cell-3.rst
index 28dbec46289f89..f58418ee923a8b 100644
--- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-cell-3.rst
+++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-cell-3.rst
@@ -64,7 +64,7 @@ GRUCell
 * *linear_before_reset*
 
   * **Description**: *linear_before_reset* flag denotes if the layer behaves according to the modification
-    of *GRUCell* described in the formula in the `ONNX documentation <https://github.com/onnx/onnx/blob/master/docs/Operators.md#GRU>`__.
+    of *GRUCell* described in the formula in the `ONNX documentation <https://github.com/onnx/onnx/blob/main/docs/Operators.md#GRU>`__.
   * **Range of values**: true or false
   * **Type**: ``boolean``
   * **Default value**: false
diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-sequence-5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-sequence-5.rst
index 37c70087e121ea..f9b9a5ece850ec 100644
--- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-sequence-5.rst
+++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-sequence-5.rst
@@ -19,7 +19,7 @@ represents a sequence of GRU cells. The sequence can be connected differently de
 ``direction`` attribute that specifies the direction of traversing of input data along sequence
 dimension or specifies whether it should be a bidirectional sequence. The most of the attributes
 are in sync with the specification of ONNX GRU operator defined
-`GRUCell <https://github.com/onnx/onnx/blob/master/docs/Operators.md#gru>`__
+`GRUCell <https://github.com/onnx/onnx/blob/main/docs/Operators.md#gru>`__
 
 
 **Attributes**
@@ -69,7 +69,7 @@ are in sync with the specification of ONNX GRU operator defined
 * *linear_before_reset*
 
   * **Description**: *linear_before_reset* flag denotes if the layer behaves according to the modification
-    of *GRUCell* described in the formula in the `ONNX documentation <https://github.com/onnx/onnx/blob/master/docs/Operators.md#GRU>`__.
+    of *GRUCell* described in the formula in the `ONNX documentation <https://github.com/onnx/onnx/blob/main/docs/Operators.md#GRU>`__.
   * **Range of values**: True or False
   * **Type**: ``boolean``
   * **Default value**: False
diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst
index c00b4c819cc66a..164033bdd2831c 100644
--- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst
+++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst
@@ -14,7 +14,7 @@ LSTMSequence
 
 **Detailed description**
 
-A single cell in the sequence is implemented in the same way as in :doc:`LSTM Cell <lstm-cell-1>` operation. *LSTMSequence* represents a sequence of LSTM cells. The sequence can be connected differently depending on ``direction`` attribute that specifies the direction of traversing of input data along sequence dimension or specifies whether it should be a bidirectional sequence. The most of the attributes are in sync with the specification of ONNX LSTM operator defined `LSTMCell <https://github.com/onnx/onnx/blob/master/docs/Operators.md#lstm>`__ .
+A single cell in the sequence is implemented in the same way as in :doc:`LSTM Cell <lstm-cell-1>` operation. *LSTMSequence* represents a sequence of LSTM cells. The sequence can be connected differently depending on ``direction`` attribute that specifies the direction of traversing of input data along sequence dimension or specifies whether it should be a bidirectional sequence. The most of the attributes are in sync with the specification of ONNX LSTM operator defined `LSTMCell <https://github.com/onnx/onnx/blob/main/docs/Operators.md#lstm>`__ .
 
 
 **Attributes**
diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/rnn-sequence-5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/rnn-sequence-5.rst
index fc9829dd999bda..a3dfc062de2dcd 100644
--- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/rnn-sequence-5.rst
+++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/rnn-sequence-5.rst
@@ -14,7 +14,7 @@ RNNSequence
 
 **Detailed description**
 
-A single cell in the sequence is implemented in the same way as in :doc:`RNNCell <rnn-cell-3>` operation. *RNNSequence* represents a sequence of RNN cells. The sequence can be connected differently depending on `direction` attribute that specifies the direction of traversing of input data along sequence dimension or specifies whether it should be a bidirectional sequence. The most of the attributes are in sync with the specification of ONNX RNN operator defined `RNNCell <https://github.com/onnx/onnx/blob/master/docs/Operators.md#rnn>`__.
+A single cell in the sequence is implemented in the same way as in :doc:`RNNCell <rnn-cell-3>` operation. *RNNSequence* represents a sequence of RNN cells. The sequence can be connected differently depending on `direction` attribute that specifies the direction of traversing of input data along sequence dimension or specifies whether it should be a bidirectional sequence. The most of the attributes are in sync with the specification of ONNX RNN operator defined `RNNCell <https://github.com/onnx/onnx/blob/main/docs/Operators.md#rnn>`__.
 
 
 **Attributes**
diff --git a/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst b/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst
index dc43881780b1e6..e10a67fddadb53 100644
--- a/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst
+++ b/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst
@@ -37,7 +37,7 @@ Below are the instructions on how to install the OpenCL packages on supported Li
       and install the apt package `ocl-icd-libopencl1` with the OpenCl ICD loader.
 
       Alternatively, you can add the apt repository by following the
-      `installation guide <https://dgpu-docs.intel.com/driver/installation.html#ubuntu-install-steps>`__.
+      `installation guide <https://dgpu-docs.intel.com/driver/installation.html#ubuntu>`__.
       Then install the `ocl-icd-libopencl1`, `intel-opencl-icd`, `intel-level-zero-gpu` and `level-zero`
       apt packages:
 
diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst b/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst
index 6326513fa3cea1..cd3fd41fed03e0 100644
--- a/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst
+++ b/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst
@@ -119,7 +119,7 @@ to see if your case needs any of them.
 
 .. code-block:: python
 
-   from openvino import get_cmake_path
+   from openvino.utils import get_cmake_path
    cmake_path = get_cmake_path()
 
 For detailed instructions on how to use these configurations in your build setup, check out the
diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst b/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst
index 0ff1b95c8eb212..475f623ef86598 100644
--- a/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst
+++ b/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst
@@ -108,6 +108,6 @@ Additional Resources
 - `Official Yocto Project documentation <https://docs.yoctoproject.org/>`__
 - `BitBake Tool <https://docs.yoctoproject.org/bitbake/>`__
 - `Poky <https://git.yoctoproject.org/poky>`__
-- `Meta-intel <https://git.yoctoproject.org/meta-intel/tree/README>`__
+- `Meta-intel <https://git.yoctoproject.org/meta-intel/tree/README.md>`__
 - `Meta-openembedded <http://cgit.openembedded.org/meta-openembedded/tree/README.md>`__
 - `Meta-clang <https://github.com/kraj/meta-clang/tree/master/#readme>`__
\ No newline at end of file
diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst
index 7f220111f64b98..2476a0423e30e1 100644
--- a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst
+++ b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst
@@ -31,8 +31,8 @@ some examples of popular Generative AI scenarios:
 
 To write such pipelines, you can follow the examples provided as part of OpenVINO:
 
-* `OpenVINO Latent Consistency Model C++ image generation pipeline <https://github.com/openvinotoolkit/openvino.genai/tree/master/image_generation/lcm_dreamshaper_v7/cpp>`__
-* `OpenVINO Stable Diffusion (with LoRA) C++ image generation pipeline <https://github.com/openvinotoolkit/openvino.genai/tree/master/image_generation/stable_diffusion_1_5/cpp>`__
+* `OpenVINO Latent Consistency Model C++ image generation pipeline <https://github.com/openvinotoolkit/openvino.genai/tree/releases/2024/4/image_generation/lcm_dreamshaper_v7/cpp>`__
+* `OpenVINO Stable Diffusion (with LoRA) C++ image generation pipeline <https://github.com/openvinotoolkit/openvino.genai/tree/releases/2024/4/image_generation/stable_diffusion_1_5/cpp>`__
 
 To perform inference, models must be first converted to OpenVINO IR format using
 Hugging Face Optimum-Intel API.
diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/ov-tokenizers.rst b/docs/articles_en/learn-openvino/llm_inference_guide/ov-tokenizers.rst
index d6e23b3791d001..2064aa843a93d8 100644
--- a/docs/articles_en/learn-openvino/llm_inference_guide/ov-tokenizers.rst
+++ b/docs/articles_en/learn-openvino/llm_inference_guide/ov-tokenizers.rst
@@ -336,7 +336,7 @@ Additional Resources
 
 * `OpenVINO Tokenizers repo <https://github.com/openvinotoolkit/openvino_tokenizers>`__
 * `OpenVINO Tokenizers Notebook <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/openvino-tokenizers>`__
-* `Text generation C++ samples that support most popular models like LLaMA 2 <https://github.com/openvinotoolkit/openvino.genai/tree/master/text_generation/causal_lm/cpp>`__
+* `Text generation C++ samples that support most popular models like LLaMA 3 <https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/greedy_causal_lm>`__
 * `OpenVINO GenAI Repo <https://github.com/openvinotoolkit/openvino.genai>`__
 
 
diff --git a/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst b/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst
index 19c4a013c54aae..390fe00605f2c6 100644
--- a/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst
+++ b/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst
@@ -245,6 +245,13 @@ There are several options for setting the number of inference iterations:
 The more iterations a model runs, the better the statistics will be for determining
 average latency and throughput.
 
+Maximum inference rate
+++++++++++++++++++++++
+
+By default, the benchmarking app will run inference at maximum rate based on device capabilities.
+The maximum inferance rate can be configured by ``-max_irate <MAXIMUM_INFERENCE_RATE>`` option.
+Tweaking this value allow better accuracy in power usage measurement by limiting the number of executions.
+
 Inputs
 ++++++++++++++++++++
 
@@ -337,7 +344,7 @@ following usage message:
             [Step 1/11] Parsing and validating input arguments
             [ INFO ] Parsing input parameters
             usage: benchmark_app.py [-h [HELP]] [-i PATHS_TO_INPUT [PATHS_TO_INPUT ...]] -m PATH_TO_MODEL [-d TARGET_DEVICE]
-                                    [-hint {throughput,cumulative_throughput,latency,none}] [-niter NUMBER_ITERATIONS] [-t TIME] [-b BATCH_SIZE] [-shape SHAPE]
+                                    [-hint {throughput,cumulative_throughput,latency,none}] [-niter NUMBER_ITERATIONS] [-max_irate MAXIMUM_INFERENCE_RATE] [-t TIME] [-b BATCH_SIZE] [-shape SHAPE]
                                     [-data_shape DATA_SHAPE] [-layout LAYOUT] [-extensions EXTENSIONS] [-c PATH_TO_CLDNN_CONFIG] [-cdir CACHE_DIR] [-lfile [LOAD_FROM_FILE]]
                                     [-api {sync,async}] [-nireq NUMBER_INFER_REQUESTS] [-nstreams NUMBER_STREAMS] [-inference_only [INFERENCE_ONLY]]
                                     [-infer_precision INFER_PRECISION] [-ip {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}]
@@ -536,6 +543,9 @@ following usage message:
                                            'none': no device performance mode will be set.
                                           Using explicit 'nstreams' or other device-specific options, please set hint to 'none'
                 -niter  <integer>             Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device.
+                -max_irate <float>            Optional. Maximum inference rate by frame per second.
+                                          If not specified, default value is 0, the inference will run at maximium rate depending on a device capabilities.
+                                          Tweaking this value allow better accuracy in power usage measurement by limiting the execution.
                 -t                            Optional. Time in seconds to execute topology.
 
             Input shapes
diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst
index 5033d24ba3785a..2a551d7aa44eb5 100644
--- a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst
+++ b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst
@@ -76,7 +76,7 @@ of optimization methods (`"compression"` section).
          :fragment: [nncf_congig]
 
 Here is a brief description of the required parameters of the Filter Pruning method. For a full description refer to the
-`GitHub <https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Pruning.md>`__ page.
+`GitHub <https://github.com/openvinotoolkit/nncf/blob/develop/docs/usage/training_time_compression/other_algorithms/Pruning.md>`__ page.
 
 * ``pruning_init`` - initial pruning rate target. For example, value ``0.1`` means that at the begging of training, convolutions that can be pruned will have 10% of their filters set to zero.
 
diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst
index 41d43f7eea37d6..aa8e9cdabfda64 100644
--- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst
+++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst
@@ -83,7 +83,7 @@ Accordingly, the code that loops over all available devices of the "GPU" type on
 Additional Resources
 ####################
 
-* `OpenVINO™ Runtime API Tutorial <./../../notebooks/openvino-api-with-output.html>`__
-* `AUTO Device Tutorial <./../../notebooks/auto-device-with-output.html>`__
-* `GPU Device Tutorial <./../../notebooks/gpu-device-with-output.html>`__
-* `NPU Device Tutorial <./../../notebooks/hello-npu-with-output.html>`__
\ No newline at end of file
+* `OpenVINO™ Runtime API Tutorial <../../notebooks/openvino-api-with-output.html>`__
+* `AUTO Device Tutorial <../../notebooks/auto-device-with-output.html>`__
+* `GPU Device Tutorial <../../notebooks/gpu-device-with-output.html>`__
+* `NPU Device Tutorial <../../notebooks/hello-npu-with-output.html>`__
\ No newline at end of file
diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst
index a440f77bc79984..46b541d84d4035 100644
--- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst
+++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst
@@ -63,19 +63,19 @@ the model precision and the ratio of P-cores and E-cores.
 
 Then the default settings for low-level performance properties on Windows and Linux are as follows:
 
-+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+
-| Property                             | Windows                                                                | Linux                                                              |
-+======================================+========================================================================+====================================================================+
-| ``ov::num_streams``                  | 1                                                                      | 1                                                                  |
-+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+
-| ``ov::inference_num_threads``        | is equal to the number of P-cores or P-cores+E-cores on one socket     | is equal to the number of P-cores or P-cores+E-cores on one socket |
-+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+
-| ``ov::hint::scheduling_core_type``   | :ref:`Core Type Table of Latency Hint <core_type_latency>`             | :ref:`Core Type Table of Latency Hint <core_type_latency>`         |
-+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+
-| ``ov::hint::enable_hyper_threading`` | No                                                                     | No                                                                 |
-+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+
-| ``ov::hint::enable_cpu_pinning``     | No / Not Supported                                                     | Yes except using P-cores and E-cores together                      |
-+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+
++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+
+| Property                             | Windows                                                               | Linux                                                                 |
++======================================+=======================================================================+=======================================================================+
+| ``ov::num_streams``                  | 1                                                                     | 1                                                                     |
++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+
+| ``ov::inference_num_threads``        | is equal to the number of P-cores or P-cores+E-cores on one numa node | is equal to the number of P-cores or P-cores+E-cores on one numa node |
++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+
+| ``ov::hint::scheduling_core_type``   | :ref:`Core Type Table of Latency Hint <core_type_latency>`            | :ref:`Core Type Table of Latency Hint <core_type_latency>`            |
++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+
+| ``ov::hint::enable_hyper_threading`` | No                                                                    | No                                                                    |
++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+
+| ``ov::hint::enable_cpu_pinning``     | No / Not Supported                                                    | Yes except using P-cores and E-cores together                         |
++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+
 
 .. note::
 
@@ -91,6 +91,16 @@ Then the default settings for low-level performance properties on Windows and Li
       enabled on Linux. Such default settings are aligned with typical workloads running
       in the corresponding environments to guarantee better out-of-the-box (OOB) performance.
 
+.. note::
+
+   Starting from 5th Gen Intel Xeon Processors, new microarchitecture enabled new sub-NUMA clusters
+   feature. A sub-NUMA cluster (SNC) can create two or more localization domains (numa nodes)
+   within a socket by BIOS configuration. 
+   By default OpenVINO with latency hint uses single NUMA node for inference. Although such
+   behavior allows to achive best performance for most of the models, there might be corner
+   cases which require manual tuning of ``ov::num_streams`` and ``ov::hint::enable_hyper_threading parameters``.
+   Please find more detail about `Sub-NUMA Clustering <https://www.intel.com/content/www/us/en/developer/articles/technical/xeon-processor-scalable-family-technical-overview.html>`__ 
+
 Throughput Hint
 #####################
 
diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst
index 78cf0632f61b2b..b4e1c7ac15afcc 100644
--- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst
+++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst
@@ -19,7 +19,7 @@ For an in-depth description of the GPU plugin, see:
 
 - `GPU plugin developer documentation <https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/README.md>`__
 - `OpenVINO Runtime GPU plugin source files <https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/>`__
-- `Accelerate Deep Learning Inference with Intel® Processor Graphics <https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics>`__
+- `Start AI Development with Intel <https://www.intel.com/content/www/us/en/developer/topic-technology/artificial-intelligence/overview.html>`__
 
 The GPU plugin is a part of the Intel® Distribution of OpenVINO™ toolkit. For more information on how to configure a system to use it, see the :doc:`GPU configuration <../../../get-started/configurations/configurations-intel-gpu>`.
 
diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst
index 26a09214ea462a..e45f51a37afa5e 100644
--- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst
+++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst
@@ -119,9 +119,6 @@ The hints are used on the presumption that the application queries ``ov::optimal
 
 While an application is free to create more requests if needed (for example to support asynchronous inputs population) **it is very important to at least run the** ``ov::optimal_number_of_infer_requests`` **of the inference requests in parallel**. It is recommended for efficiency, or device utilization, reasons.
 
-Keep in mind that ``ov::hint::PerformanceMode::LATENCY`` does not necessarily imply using single inference request. For example, multi-socket CPUs can deliver as many requests at the same minimal latency as the number of NUMA nodes in the system.
-To make your application fully scalable, make sure to query the ``ov::optimal_number_of_infer_requests`` directly.
-
 .. _prefer-async-api:
 
 Prefer Async API
diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput/advanced_throughput_options.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput/advanced_throughput_options.rst
index 7466d00efe5eb7..cad5633e11f85b 100644
--- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput/advanced_throughput_options.rst
+++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput/advanced_throughput_options.rst
@@ -85,12 +85,12 @@ Number of Streams Considerations
 
 * Select the number of streams that is **less or equal** to the number of requests that the application would be able to run simultaneously.
 * To avoid wasting resources, the number of streams should be enough to meet the *average* parallel slack rather than the peak load.
-* Use the `ov::streams::AUTO <groupov_runtime_cpp_prop_api.html#doxid-group-ov-runtime-cpp-prop-api-1gaddb29425af71fbb6ad3379c59342ff0e>`__ as a more portable option (that also respects the underlying hardware configuration).
+* Use the `ov::streams::AUTO <../../../../api/c_cpp_api/group__ov__runtime__cpp__prop__api.html#_CPPv44AUTO>`__ as a more portable option (that also respects the underlying hardware configuration).
 * It is very important to keep these streams busy, by running as many inference requests as possible (for example, start the newly-arrived inputs immediately):
 
-  * A bare minimum of requests to saturate the device can be queried as the `ov::optimal_number_of_infer_requests <groupov_runtime_cpp_prop_api.html#doxid-group-ov-runtime-cpp-prop-api-1ga087c6da667f7c3d8374aec5f6cbba027>`__ of the  ``ov:Compiled_Model``.
+  * A bare minimum of requests to saturate the device can be queried as the `ov::optimal_number_of_infer_requests <../../../../api/c_cpp_api/group__ov__runtime__cpp__prop__api.html#_CPPv432optimal_number_of_infer_requests>`__ of the  ``ov:Compiled_Model``.
 
-* *The maximum number of streams* for the device (per model) can be queried as the `ov::range_for_streams <groupov_runtime_cpp_prop_api.html#doxid-group-ov-runtime-cpp-prop-api-1ga8a5d84196f6873729167aa512c34a94a>`__.
+* *The maximum number of streams* for the device (per model) can be queried as the `ov::range_for_streams <../../../../api/c_cpp_api/group__ov__runtime__cpp__prop__api.html#_CPPv417range_for_streams>`__.
 
 Batch Size Considerations
 +++++++++++++++++++++++++
@@ -99,7 +99,7 @@ Batch Size Considerations
 
   * Otherwise (or if the number of "available" requests fluctuates), you may need to keep several instances of the network (reshaped to the different batch size) and select the properly sized instance in the runtime accordingly.
 
-* For OpenVINO devices that implement a dedicated heuristic internally, the `ov::optimal_batch_size <groupov_runtime_cpp_prop_api.html#doxid-group-ov-runtime-cpp-prop-api-1ga129bad2da2fc2a40a7d746d86fc9c68d>`__ is a *device* property (that accepts the actual model as a parameter) to query the recommended batch size for the model.
+* For OpenVINO devices that implement a dedicated heuristic internally, the `ov::optimal_batch_size <../../../../api/c_cpp_api/group__ov__runtime__cpp__prop__api.html#_CPPv418optimal_batch_size>`__ is a *device* property (that accepts the actual model as a parameter) to query the recommended batch size for the model.
 
 
 A Few Device-specific Details
diff --git a/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst b/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst
index 86788b20249a3f..d00fd19c4d636d 100644
--- a/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst
+++ b/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst
@@ -139,5 +139,5 @@ sequences.
 
 You can find more examples demonstrating how to work with states in other articles:
 
-* `LLM Chatbot notebook <../../notebooks/stable-zephyr-3b-chatbot-with-output.html>`__
+* `LLaVA-NeXT Multimodal Chatbot notebook <../../notebooks/llava-next-multimodal-chatbot-with-output.html>`__
 * :doc:`Serving Stateful Models with OpenVINO Model Server <../../openvino-workflow/model-server/ovms_docs_stateful_models>`
diff --git a/docs/articles_en/openvino-workflow/running-inference/string-tensors.rst b/docs/articles_en/openvino-workflow/running-inference/string-tensors.rst
index 438c9ea9ec0bd3..3032add547f8a8 100644
--- a/docs/articles_en/openvino-workflow/running-inference/string-tensors.rst
+++ b/docs/articles_en/openvino-workflow/running-inference/string-tensors.rst
@@ -201,6 +201,6 @@ Additional Resources
 
 * Learn about the :doc:`basic steps to integrate inference in your application <integrate-openvino-with-your-application>`.
 
-* Use `OpenVINO tokenizers <https://github.com/openvinotoolkit/openvino_contrib/tree/master/modules/custom_operations/user_ie_extensions/tokenizer/python>`__ to produce models that use string tensors to work with textual information as pre- and post-processing for the large language models.
+* Use `OpenVINO tokenizers <https://github.com/openvinotoolkit/openvino_contrib/tree/releases/2024/0/modules/custom_operations/user_ie_extensions/tokenizer/python>`__ to produce models that use string tensors to work with textual information as pre- and post-processing for the large language models.
 
-* Check out `GenAI Samples <https://github.com/openvinotoolkit/openvino.genai/tree/master/text_generation/causal_lm/cpp>`__ to see how string tensors are used in real-life applications.
+* Check out `GenAI Samples <https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/greedy_causal_lm>`__ to see how string tensors are used in real-life applications.
diff --git a/docs/articles_en/openvino-workflow/torch-compile.rst b/docs/articles_en/openvino-workflow/torch-compile.rst
index 5bdb51a596d5d8..e5bc0ca901a5aa 100644
--- a/docs/articles_en/openvino-workflow/torch-compile.rst
+++ b/docs/articles_en/openvino-workflow/torch-compile.rst
@@ -288,7 +288,7 @@ PyTorch supports ``torch.compile`` officially on Windows from version 2.3.0 onwa
 For PyTorch versions below 2.3.0, the ``torch.compile`` feature is not supported on Windows
 officially. However, it can be accessed by running the following instructions:
 
-1. Install the PyTorch nightly wheel file - `2.1.0.dev20230713 <https://download.pytorch.org/whl/nightly/cpu/torch-2.1.0.dev20230713%2Bcpu-cp38-cp38-win_amd64.whl>`__ ,
+1. Install the PyTorch nightly wheel file - `2.1.0.dev20230713 <https://download.pytorch.org/whl/cpu/torch-2.1.0%2Bcpu-cp38-cp38-win_amd64.whl>`__ ,
 2. Update the file at ``<python_env_root>/Lib/site-packages/torch/_dynamo/eval_frames.py``
 3. Find the function called ``check_if_dynamo_supported()``:
 
@@ -374,7 +374,7 @@ The ``torch.compile`` feature is part of PyTorch 2.0, and is based on:
   (PEP 523) to dynamically modify Python bytecode right before it is executed (PyTorch operators
   that cannot be extracted to FX graph are executed in the native Python environment).
   It maintains the eager-mode capabilities using
-  `Guards <https://pytorch.org/docs/stable/torch.compiler_guards_overview.html>`__ to ensure the
+  `Guards <https://pytorch.org/docs/2.0/dynamo/guards-overview.html>`__ to ensure the
   generated graphs are valid.
 
 * **AOTAutograd** - generates the backward graph corresponding to the forward graph captured by TorchDynamo.
diff --git a/docs/dev/ci/github_actions/overview.md b/docs/dev/ci/github_actions/overview.md
index 8daf56a3a2252f..e65c085ede30d5 100644
--- a/docs/dev/ci/github_actions/overview.md
+++ b/docs/dev/ci/github_actions/overview.md
@@ -11,6 +11,7 @@ detailed instructions where necessary.
   * [Required workflows](#required-workflows)
   * [Workflow structure](#structure-of-the-workflows)
   * [Workflow and job organisation](#workflows-and-jobs-organisation)
+  * [Security considerations](#security-considerations)
 * [Finding results, artifacts and logs](#finding-results-artifacts-and-logs)
 * [Custom actions overview](#custom-actions)
 * [Machines overview](#machines)
@@ -205,6 +206,10 @@ Overview of the [Linux workflow's](../../../../.github/workflows/ubuntu_22.yml)
   * All the steps are executed in the shell specified by the `shell` key under `defaults: run:`
     unless a shell is specified directly in a step.
 
+### Security considerations
+
+Please consult [workflow security guidelines](security.md) before submitting a PR with GitHub Actions workflows changes.
+
 ## Finding Results, Artifacts, and Logs
 
 ### Results
diff --git a/docs/dev/ci/github_actions/security.md b/docs/dev/ci/github_actions/security.md
new file mode 100644
index 00000000000000..d46cf6fd865c41
--- /dev/null
+++ b/docs/dev/ci/github_actions/security.md
@@ -0,0 +1,99 @@
+# Security best practices for GitHub Actions Workflows
+
+There are a few simple steps that we should follow to ensure our workflows are not vulnerable to common attacks.
+
+## Adjust `GITHUB_TOKEN` permissions
+
+Use the `permissions` key to make sure the `GITHUB_TOKEN` is configured with the least privileges for each job.
+
+Start with relatively safe permissions:
+
+```yaml
+permissions: read-all
+```
+
+If you need more permissions, declare them at the job level when possible, for example:
+
+```yaml
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+
+    # GITHUB_TOKEN will have only these permissions for
+    # `stale` job
+    permissions:
+      issues: write
+      pull-requests: write
+
+    steps:
+      - uses: actions/stale@f7176fd3007623b69d27091f9b9d4ab7995f0a06
+
+```
+
+Check [GitHub documentation](https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/controlling-permissions-for-github_token) on this also.
+
+## Reduce the scope of environment variables
+
+Environment variables should be declared at the step level when possible (e.g. the variable is used only in this exact step). Only put variables on the job level when they're used by a few steps, and on the workflow level when they're used by most of the steps.
+
+Example from [the official GitHub documentation](https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables):
+
+```yaml
+name: Greeting on variable day
+
+on:
+  workflow_dispatch
+
+# Workflow level variables. Avoid using these.
+env:
+  DAY_OF_WEEK: Monday
+
+jobs:
+  greeting_job:
+    runs-on: ubuntu-latest
+    # Job level variables
+    env:
+      Greeting: Hello
+    steps:
+      - name: "Say Hello Mona it's Monday"
+        run: echo "$Greeting $First_Name. Today is $DAY_OF_WEEK!"
+        # Step level variables. Prefer this approach
+        env:
+          First_Name: Mona
+
+```
+
+## Avoid using `pull_request_target`
+
+**Never** use `pull_request_target` trigger event for workflows. If you want to use `pull_request_target`, contact a member of the OpenVINO GitHub Actions task force first. Check [GitHub blog post](https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/) on this as well.
+
+## Handle secrets correctly
+
+**Never ever** use plain-text secrets hard-coded in GitHub Actions Workflow. If you need to use secrets, contact a member of the OpenVINO GitHub Actions task force first.
+
+## Be careful with user input.
+
+Most of GitHub context variables propagated from user input. That means they should be treated as an untrusted and potentially malicious. There are some tactics you can use to mitigate the risk:
+- Instead of using inline scripts, create an action and pass the variable as an argument
+- Put the value into an environment variable for the step, and use the variable in the script
+
+More details are available in [this](https://securitylab.github.com/resources/github-actions-untrusted-input/) blog post.
+
+## Pin versions for GitHub Actions
+
+When using third-party actions, pin the version with a commit hash rather than a tag to shield your workflow from potential supply-chain compromise.
+
+For example, instead of this:
+
+```yaml
+uses: actions/checkout@v4.2.2
+```
+
+use this:
+
+```yaml
+uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+```
+
+## Further reading
+Follow general [recommendations from GitHub itself](https://docs.github.com/en/actions/security-for-github-actions/security-guides/security-hardening-for-github-actions)
diff --git a/docs/sphinx_setup/_static/html/modal.html b/docs/sphinx_setup/_static/html/modal.html
index ac425599b821ce..38eb673824f97e 100644
--- a/docs/sphinx_setup/_static/html/modal.html
+++ b/docs/sphinx_setup/_static/html/modal.html
@@ -11,9 +11,6 @@ <h3>Configure Graphs</h3>
                 <div>
                     <button id="build-graphs-btn" disabled="disabled" class="build-graphs-btn">Show Graphs</button>
                 </div>
-                <div class="clear-all-btn">
-                    <span class="clear-all-btn-content">Clear All</span>
-                </div>
             </div>
         </div>
         <div class="configure-graphs-content">
diff --git a/docs/sphinx_setup/_static/html/modalLLM.html b/docs/sphinx_setup/_static/html/modalLLM.html
index e3395a16931188..37b569d0bd4078 100644
--- a/docs/sphinx_setup/_static/html/modalLLM.html
+++ b/docs/sphinx_setup/_static/html/modalLLM.html
@@ -11,9 +11,6 @@ <h3>Configure Graphs</h3>
                 <div>
                     <button id="build-graphs-btn" disabled="disabled" class="build-graphs-btn">Show Graphs</button>
                 </div>
-                <div class="clear-all-btn">
-                    <span class="clear-all-btn-content">Clear All</span>
-                </div>
             </div>
         </div>
         <div class="configure-graphs-content">
diff --git a/docs/sphinx_setup/_static/js/graphs.js b/docs/sphinx_setup/_static/js/graphs.js
index 4d621ce0780261..7171aed374dd99 100644
--- a/docs/sphinx_setup/_static/js/graphs.js
+++ b/docs/sphinx_setup/_static/js/graphs.js
@@ -1,11 +1,180 @@
-// =================== ADDITIONAL OUTPUT CONFIG =========================
+// =================== GENERAL OUTPUT CONFIG =========================
+
+class Filter {
+    // param: GraphData[], networkModels[]
+    static FilterByNetworkModel(graphDataArr, networkModels) {
+        const optionMap = new Map();
+        networkModels.map((model) => graphDataArr.filter((graphData => graphData.Model === model)))
+            .flat(1)
+            .forEach(item => optionMap.set(item.Platform, item));
+        return Array.from(optionMap.values());
+    }
+    // param: GraphData[], ieType
+    static ByIeTypes(graphDataArr, ieTypes) {
+        const optionMap = new Map();
+        graphDataArr
+            .filter(graphData => ieTypes.includes(graphData.PlatformType))
+            .forEach(item => optionMap.set(item.Platform, item));
+        return Array.from(optionMap.values());
+    }
+    // param: GraphData[], ieType, networkModels
+    static ByTypesAndModels(graphDataArr, ieTypes, models) {
+        return Array.from(
+            graphDataArr
+                .filter(({ PlatformType, Model }) => ieTypes.includes(PlatformType) && models.includes(Model))
+                .reduce((map, item) => map.set(item.Platform, item), new Map())
+                .values()
+        );
+    }
+    // param: GraphData[], clientPlatforms
+    static ByIeKpis(graphDataArr, clientPlatforms) {
+        return Array.from(
+            graphDataArr.reduce((kpiSet, data) => {
+                if (clientPlatforms.some(platformName => data.Platform.includes(platformName))) {
+                    Object.keys(data.Parameters).forEach(key => kpiSet.add(key));
+                }
+                return kpiSet;
+            }, new Set())
+        );
+    }
+    // param: GraphData[]
+    static getParameters(graphDataArr) {
+        var parameters = []
+        graphDataArr.filter((data) => {
+            for (var key in data.Parameters) {
+                if (!parameters.includes(Graph.capitalizeFirstLetter(key))) parameters.push(Graph.capitalizeFirstLetter(key))
+            }
+        })
+        return parameters;
+    }
+    // param: GraphData[]
+    static getIeTypes(graphDataArr) {
+        var kpis = []
+        graphDataArr.filter((data) => {
+            for (var key in data.Parameters) {
+                if (!kpis.includes(Graph.capitalizeFirstLetter(key))) kpis.push(Graph.capitalizeFirstLetter(key))
+            }
+        })
+        return kpis;
+    }
+    // param: GraphData[], clientPlatforms[]
+    static ByClientPlatforms(graphDataArr, platformsArr) {
+        return graphDataArr.filter((data) => {
+            return platformsArr.includes(data.Platform)
+        });
+    }
+}
+
+class Modal {
+    static getPrecisionsLabels(graphDataArr) {
+        const kpisSet = new Set();
+        graphDataArr.forEach(data => {
+            Object.values(data.Parameters).forEach(param => {
+                param.Precisions.forEach(precision => {
+                    Object.keys(precision).forEach(key => {
+                        kpisSet.add(key.toUpperCase());
+                    });
+                });
+            });
+        });
+        return Array.from(kpisSet);
+    }
+
+    static getPrecisions(appConfig, labels) {
+        return labels.map((label) => {
+            var prec = appConfig.PrecisionsMap[label];
+            if (prec !== undefined) {
+                return prec;
+            }
+            else {
+                return "no name";
+            }
+        });
+    }
+}
+
+class Graph {
+    // functions to get unique keys 
+    static getNetworkModels(graphDataArr) {
+        return Array.from(new Set(graphDataArr.map(obj => obj.Model)))
+            .sort((a, b) => a.localeCompare(b));
+    }
+    static getIeTypes(graphDataArr) {
+        return Array.from(new Set(graphDataArr.map((obj) => obj.PlatformType))).sort((a, b) => a.localeCompare(b));
+    }
+
+    // param: GraphData[]
+    static getPlatformNames(graphDataArr) {
+        return graphDataArr.map((data) => data.Platform)
+        .sort((a, b) => a.localeCompare(b));
+    }
+
+    // param: GraphData[], engine: string, precisions: list
+    static getDatabyParameter(graphDataArr, engine, array) {
+        if (!Array.isArray(array[engine])) {
+            array[engine] = [];
+        }
+        array[engine].push(graphDataArr.Parameters[engine].Precisions);
+        return array;
+    }
+
+    // this returns an object that is used to ender the chart
+    static getGraphConfig(engine, precisions, appConfig) {
+        return {
+            chartTitle: 'Throughput vs Latency',
+            iconClass: 'latency-icon',
+            datasets: precisions.map((precision) => appConfig.PrecisionData[engine][precision]),
+            unit: "None"
+        };
+    }
+
+    // param: GraphData[], parameterName: string, precisions: list
+    static getDatabyParameterOld(graphDataArr, parameterName, precisions) {
+        var array = [];
+        graphDataArr.forEach((item) => {
+            if (item.Parameters[parameterName] !== undefined) {
+                array.push(item.Parameters[parameterName].Precisions);
+            }
+            else {
+                var obj = {};
+                precisions.forEach((prec) => {
+                    obj[prec] = 0;
+                })
+                array.push([obj])
+            }
+        })
+        return array;
+
+    }
+
+    // this returns an object that is used to ender the chart
+    static getGraphConfigOld(parameterName, item, precisions, appConfig) {
+        return {
+            chartTitle: Graph.capitalizeFirstLetter(parameterName),
+            iconClass: parameterName + '-icon',
+            unit: item.Parameters[parameterName]?.Unit,
+            datasets: precisions.map((precision) => appConfig.PrecisionData[precision]),
+        };
+    }
+    static capitalizeFirstLetter(string) {
+        return string.charAt(0).toUpperCase() + string.slice(1);
+    }
+}
+
+class ChartDisplay {
+    constructor(mode, numberOfCharts) {
+        this.mode = mode;
+        this.numberOfChartsInRow = numberOfCharts;
+    }
+}
 
 $(document).ready(function () {
 
-    $('.ov-toolkit-benchmark-results').on('click', () => showModal("graph-data-ov.json"));
-    $('.ovms-toolkit-benchmark-results').on('click', () => showModal("graph-data-ovms.json"));
-    function clickBuildGraphs(graph, appConfig, networkModels, ieTypes, platforms, kpis, precisions) {
-        renderData(graph, appConfig, networkModels, ieTypes, platforms, kpis, precisions);
+    $('.ov-toolkit-benchmark-results').on('click', () => showModal("graph-data-ov.json", false));
+    $('.ovms-toolkit-benchmark-results').on('click', () => showModal("graph-data-ovms.json", false));
+    $('.ovms-toolkit-benchmark-llm-result').on('click', () => showModal("graph-data-ovms-genai.json", true));
+    function clickBuildGraphs(graph, appConfig, networkModels, ieTypes, platforms, kpis, precisions, isLLM) {
+        renderData(graph, appConfig, networkModels, ieTypes, platforms, kpis, precisions, isLLM);
         $('.modal-footer').show();
         $('#modal-display-graphs').show();
         $('.edit-settings-btn').off('click').on('click', (event) => {
@@ -34,16 +203,16 @@ $(document).ready(function () {
         $('body').css('overflow', 'auto');
     }
 
-    function showModal(file) {
+    function showModal(file, isLLM) {
         $('body').css('overflow', 'hidden');
 
-        fetch('../_static/benchmarks_files/data/'+ file)
+        fetch('../_static/benchmarks_files/data/' + file)
             .then((response) => response.json())
             .then((jsonData) => {
                 fetch('../_static/benchmarks_files/graph-config.json')
                     .then((configResponse) => configResponse.json())
                     .then((appConfig) => {
-                        renderModal(jsonData, appConfig)
+                        renderModal(jsonData, appConfig, isLLM)
                     })
             });
     }
@@ -93,11 +262,12 @@ $(document).ready(function () {
         $('#build-graphs-btn').prop('disabled', true);
     }
 
-    function renderModal(graph, appConfig) {
+    function renderModal(graph, appConfig, isLLM) {
+        var modalPath = isLLM === true ? '../_static/html/modalLLM.html' : '../_static/html/modal.html'
         new Graph(graph);
         var networkModels = Graph.getNetworkModels(graph);
         var ieTypes = Graph.getIeTypes(graph);
-        fetch('../_static/html/modal.html').then((response) => response.text()).then((text) => {
+        fetch(modalPath).then((response) => response.text()).then((text) => {
 
             // generate and configure modal container
             var modal = $('<div>');
@@ -111,13 +281,13 @@ $(document).ready(function () {
             const models = networkModels.map((networkModel) => createCheckMark(networkModel, 'networkmodel'));
             modal.find('.models-column').append(models);
 
-            const selectAllModelsButton = createCheckMark('', 'networkmodel');            
+            const selectAllModelsButton = createCheckMark('', 'networkmodel', false , false);
             modal.find('.models-selectall').append(selectAllModelsButton);
 
-            const selectAllPlatformsButton = createCheckMark('', 'platform');  
+            const selectAllPlatformsButton = createCheckMark('', 'platform', false , false);
             modal.find('.platforms-selectall').append(selectAllPlatformsButton);
 
-            const precisions = Modal.getPrecisionsLabels(graph).map((precision) => createCheckMark(precision, 'precision', false));
+            const precisions = Modal.getPrecisionsLabels(graph).map((precision) => createCheckMark(precision, 'precision', false , false));
             modal.find('.precisions-column').append(precisions);
 
             selectAllCheckboxes(precisions);
@@ -132,24 +302,20 @@ $(document).ready(function () {
             modal.find('#modal-display-graphs').hide();
             modal.find('.ietype-column input').first().prop('checked', true);
 
-            const kpiLabels = Filter.getParameters(graph).map((parameter) => createCheckMark(parameter, 'kpi', false));
+            const kpiLabels = Filter.getParameters(graph).map((parameter) => createCheckMark(parameter, 'kpi', false , true));
             modal.find('.kpi-column').append(kpiLabels);
 
             $('body').prepend(modal);
 
-            preselectDefaultSettings(graph, modal, appConfig);
-
-            //is not generic solution :(
             if (appConfig.DefaultSelections.platformTypes?.data?.includes('Select All')) {
                 selectAllCheckboxes(iefilter);
-                
             };
+            preselectDefaultSettings(graph, modal, appConfig);
             renderClientPlatforms(graph, modal);
 
-            $('.clear-all-btn').on('click', clearAll);
             $('#build-graphs-btn').on('click', () => {
                 $('#modal-configure-graphs').hide();
-                clickBuildGraphs(graph, appConfig, getSelectedNetworkModels(), getSelectedIeTypes(), getSelectedClientPlatforms(), getSelectedKpis(), Modal.getPrecisions(appConfig, getSelectedPrecisions()));
+                clickBuildGraphs(graph, appConfig, getSelectedNetworkModels(), getSelectedIeTypes(), getSelectedClientPlatforms(), getSelectedKpis(), Modal.getPrecisions(appConfig, getSelectedPrecisions()), isLLM);
             });
             $('.modal-close').on('click', hideModal);
             $('.close-btn').on('click', hideModal);
@@ -163,18 +329,18 @@ $(document).ready(function () {
             modal.find('.models-selectall input').on('click', function () {
                 if ($(this).prop('checked')) selectAllCheckboxes(models);
                 else deSelectAllCheckboxes(models);
-                
+
                 renderClientPlatforms(graph, modal)
             });
 
             modal.find('.platforms-selectall input').on('click', function () {
-                if ($(this).prop('checked')) 
+                if ($(this).prop('checked'))
                     renderClientPlatforms(graph, modal)
                 else {
                     var enabledPlatforms = modal.find('.platforms-column .checkmark-container');
                     deSelectCheckbox(enabledPlatforms);
                 };
-                
+
             });
 
             modal.find('.models-column input').on('click', function () {
@@ -223,49 +389,20 @@ $(document).ready(function () {
         precisions.prop('disabled', false);
     }
 
-    function clearAll() {
-        $('.modal-content-grid-container input:checkbox').each((index, object) => $(object).prop('checked', false));
-        validatePrecisionSelection();
-        validateSelections();
-    }
-
     function preselectDefaultSettings(graph, modal, appConfig) {
-
-        const defaultSelections = appConfig.DefaultSelections;
-    
-        selectDefaultPlatformType(defaultSelections.platformTypes, graph, modal);
-    
-        applyPlatformFilters(defaultSelections.platformFilters, modal, graph);
-    
-        clearAllSettings(defaultSelections);
-
+        selectDefaultPlatformType(appConfig.DefaultSelections.platformTypes, graph, modal);
+        clearAllSettings(appConfig.DefaultSelections);
         validateSelections();
         validatePrecisionSelection();
     }
-    
     function selectDefaultPlatformType(platformTypes, graph, modal) {
         if (!platformTypes) return;
-    
         const type = platformTypes.data[0];
         $(`input[data-ietype="${type}"]`).prop('checked', true);
         renderClientPlatforms(graph, modal);
     }
-    
-    function applyPlatformFilters(platformFilters, modal, graph) {
-        if (!platformFilters) return;
-    
-        const filters = modal.find('.selectable-box-container').children('.selectable-box');
-        filters.removeClass('selected');
-    
-        platformFilters.data.forEach(selection => {
-            filters.filter(`[data-${platformFilters.name}="${selection}"]`).addClass('selected');
-        });
-    
-        renderClientPlatforms(graph, modal);
-    }
-    
+
     function clearAllSettings(defaultSelections) {
-        clearAll();
         Object.keys(defaultSelections).forEach(setting => {
             const { name, data } = defaultSelections[setting];
             data.forEach(selection => {
@@ -287,14 +424,15 @@ $(document).ready(function () {
         var platformNames = Graph.getPlatformNames(fPlatforms);
         $('.platforms-column .checkmark-container').remove();
 
-        const clientPlatforms = platformNames.map((platform) => createCheckMark(platform, 'platform', true));
-        
+        const clientPlatforms = platformNames.map((platform) => createCheckMark(platform, 'platform', true, false));
+
         var enabledPlatforms = filterPlatforms(graph, getSelectedIeTypes(), getSelectedNetworkModels());
         enableCheckBoxes(clientPlatforms, enabledPlatforms);
         modal.find('.platforms-column').append(clientPlatforms);
 
         enableParmeters(graph, getSelectedClientPlatforms());
         modal.find('.platforms-column input').on('click', validateSelections);
+        validateSelections();
     }
 
     function enableParmeters(graph, clientPlatforms) {
@@ -310,11 +448,12 @@ $(document).ready(function () {
         })
     }
 
-    function createCheckMark(itemLabel, modelLabel, disabled) {
+    function createCheckMark(itemLabel, modelLabel, disabled, checked = false) {
         const item = $('<label class="checkmark-container">');
         item.text(itemLabel);
         const checkbox = $('<input type="checkbox"/>');
         checkbox.prop('disabled', disabled);
+        checkbox.prop('checked', checked);
         const checkboxSpan = $('<span class="checkmark">');
         item.append(checkbox);
         item.append(checkboxSpan);
@@ -330,11 +469,11 @@ $(document).ready(function () {
     }
 
     function enableCheckBoxes(items, enabledItems) {
-        items.forEach((item) => {            
+        items.forEach((item) => {
             item.find(':input').prop('disabled', true);
             enabledItems.forEach((platform) => {
                 var tmp = item.find(':input');
-                if(tmp[0].dataset.platform === platform.Platform){
+                if (tmp[0].dataset.platform === platform.Platform) {
                     item.find(':input').prop('checked', true);
                     item.find(':input').prop('disabled', false);
                 }
@@ -397,13 +536,7 @@ $(document).ready(function () {
                 li.style.marginLeft = '4px';
 
                 li.onclick = () => {
-                    const { type } = chart.config;
-                    if (type === 'pie' || type === 'doughnut') {
-                        // Pie and doughnut charts only have a single dataset and visibility is per item
-                        chart.toggleDataVisibility(item.index);
-                    } else {
-                        chart.setDatasetVisibility(item.datasetIndex, !chart.isDatasetVisible(item.datasetIndex));
-                    }
+                    chart.toggleDataVisibility(item.index);
                     chart.update();
                 };
 
@@ -436,6 +569,41 @@ $(document).ready(function () {
         }
     };
 
+    function getChartOptionsByEngines(containerId, allowedAxisIDs) {
+        const axisConfigs = {
+            x: {
+                title: { display: true, text: 'Request Rate' }
+            },
+            y: {
+                type: 'linear',
+                display: true,
+                position: 'left',
+                title: { display: true, text: 'Throughput [tok/s]' },
+                grid: { drawOnChartArea: true }
+            },
+            y1: {
+                type: 'linear',
+                display: true,
+                position: 'right',
+                title: { display: true, text: 'TPOT Mean Latency' },
+                grid: { drawOnChartArea: true }
+            }
+        };
+
+        return {
+            responsive: true,
+            scales: Object.keys(axisConfigs)
+                .filter(key => allowedAxisIDs.includes(key))
+                .reduce((obj, key) => {
+                    obj[key] = axisConfigs[key];
+                    return obj;
+                }, {}),
+            plugins: {
+                legend: { display: false },
+                htmlLegend: { containerID: containerId }
+            }
+        };
+    }
     function getChartOptions(title, containerId) {
         return {
             responsive: true,
@@ -469,25 +637,9 @@ $(document).ready(function () {
         }
     }
 
-    function getChartDataNew(labels, datasets) {
-        return {
-            labels: labels,
-            datasets: datasets.map((item) => {
-                return {
-                    label: item.label,
-                    data: item.data,
-                    backgroundColor: item.color,
-                    borderColor: 'rgba(170,170,170,0)',
-                    barThickness: 10
-                }
-            })
-        }
-    }
-
-    function renderData(graph, appConfig, networkModels, ieTypes, platforms, kpis, precisions) {
+    function renderData(graph, appConfig, networkModels, ieTypes, platforms, kpis, precisions, isLLM) {
         $('.chart-placeholder').empty();
         $('.modal-footer').empty();
-
         const display = new ChartDisplay(getChartsDisplayMode(kpis.length), kpis.length);
 
         networkModels.forEach((networkModel) => {
@@ -509,8 +661,17 @@ $(document).ready(function () {
             var filteredIeTypes = Filter.ByIeTypes(filteredNetworkModels, ieTypes);
             var filteredGraphData = Filter.ByClientPlatforms(filteredIeTypes, platforms);
             $('.chart-placeholder').append(chartContainer);
+            var labels = Graph.getPlatformNames(filteredGraphData);
             if (filteredGraphData.length > 0) {
-                createChartWithNewData(filteredGraphData, appConfig, chartContainer, kpis, ieTypes, precisions, display);
+                if (isLLM === true) {
+                    var graphConfigs = setGraphConfigsByEngines(filteredGraphData, appConfig, kpis, precisions)
+                    createChartWithNewDataByEngines(labels, graphConfigs, chartContainer, display);
+                }
+                else {
+                    var graphConfigs = setGraphConfigs(filteredGraphData, appConfig, kpis, precisions)
+                    createChartWithNewData(labels, graphConfigs, chartContainer, display);
+                }
+
             } else {
                 createEmptyChartContainer(chartContainer);
             }
@@ -526,27 +687,59 @@ $(document).ready(function () {
 
     // this function should take the final data set and turn it into graphs
     // params: GraphData, unused, chartContainer
-    function createChartWithNewData(model, appConfig, chartContainer, parameters, ietype, precisions, display) {
+    function createChartWithNewDataByEngines(labels, graphConfigs, chartContainer, display) {
         var chartWrap = $('<div>');
         chartWrap.addClass('chart-wrap');
         chartContainer.append(chartWrap);
-        var graphConfigs = parameters.map((parameter) => {
-            var groupUnit = model[0];
-            var kpiData = Graph.getDatabyParameterOld(model, appConfig.ParametersMap[parameter], precisions);
-            var config = Graph.getGraphConfigOld(appConfig.ParametersMap[parameter], groupUnit, precisions, JSON.parse(JSON.stringify(appConfig)));
-            precisions.forEach((precision, index) => {
-                config.datasets[index].data = kpiData.map(tData => tData[0][precision]);
+        var labelsContainer = $('<div>');
+        labelsContainer.addClass('chart-labels-container');
+        chartWrap.append(labelsContainer);
+
+        var chartGraphsContainer = $('<div>');
+        chartGraphsContainer.addClass('chart-graphs-container');
+        chartWrap.append(chartGraphsContainer);
+
+        graphConfigs.forEach((graphConfig, index) => {
+            const id = getRandomNumber();
+            if (graphConfig.unit === undefined) {
+                graphConfig.unit = 'No unit.';
+            }
+
+            var graphItem = $(`<div id=${id}>`);
+            graphItem.addClass('graph-item');
+            var columnHeaderContainer = $('<div>');
+            columnHeaderContainer.addClass('chart-column-title');
+            var columnIcon = $('<div class="icon">');
+            columnIcon.addClass(graphConfig.iconClass);
+            columnHeaderContainer.append(columnIcon);
+            var columnHeader = $('<div class="chart-header">');
+            columnHeader.append($('<div class="title">' + graphConfig.chartTitle + '</div>'));
+            columnHeaderContainer.append(columnHeader);
+            chartGraphsContainer.append(graphItem);
+            var graphClass = $('<div>');
+            graphClass.addClass('graph-row');
+
+            graphItem.append(columnHeaderContainer);
+            graphItem.append(graphClass);
+            processMetricByEngines(labels, graphConfig, graphClass, 'graph-row-column', id);
+            window.setTimeout(() => {
+                var labelsItem = $('<div>');
+                setInitialItemsVisibility(labelsItem, index, display.mode);
             });
-            return config;
         });
-        
-        // get the client platform labels and create labels for all the graphs
-        var labels = Graph.getPlatformNames(model);
+        setChartsDisplayDirection(display.mode);
+        adjustHeaderIcons(display.mode);
+    }
+    function createChartWithNewData(labels, graphConfigs, chartContainer, display) {
+
+        var chartWrap = $('<div>');
+        chartWrap.addClass('chart-wrap');
+        chartContainer.append(chartWrap);
+
         var labelsContainer = $('<div>');
         labelsContainer.addClass('chart-labels-container');
         chartWrap.append(labelsContainer);
 
-        // get the kpi title's and create headers for the graphs
         var chartGraphsContainer = $('<div>');
         chartGraphsContainer.addClass('chart-graphs-container');
         chartWrap.append(chartGraphsContainer);
@@ -566,7 +759,6 @@ $(document).ready(function () {
             columnHeaderContainer.append(columnIcon);
             var columnHeader = $('<div class="chart-header">');
             columnHeader.append($('<div class="title">' + graphConfig.chartTitle + '</div>'));
-            columnHeader.append($('<div class="subtitle">' + graphConfig.unit + ' ' + appConfig.UnitDescription[graphConfig.unit] + '</div>'));
             columnHeaderContainer.append(columnHeader);
             chartGraphsContainer.append(graphItem);
             var graphClass = $('<div>');
@@ -574,7 +766,7 @@ $(document).ready(function () {
 
             graphItem.append(columnHeaderContainer);
             graphItem.append(graphClass);
-            processMetricNew(labels, graphConfig.datasets, graphConfig.chartTitle, graphClass, 'graph-row-column', id);
+            processMetric(labels, graphConfig.datasets, graphConfig.chartTitle, graphClass, 'graph-row-column', id);
 
             window.setTimeout(() => {
                 const topPadding = getLabelsTopPadding(display.mode);
@@ -592,13 +784,39 @@ $(document).ready(function () {
                 setInitialItemsVisibility(labelsItem, index, display.mode);
                 labelsContainer.append(labelsItem);
             });
-        });
-        setChartsDisplayDirection(display.mode);
-        adjustHeaderIcons(display.mode);
+        })
+    }
+    function setGraphConfigsByEngines(model, appConfig, engines, precisions) {
+        var graphConfigs = model.map((platform) => {
+            var kpiData = [];
+            engines.forEach((engine) => Graph.getDatabyParameter(platform, engine, kpiData));
+            var graphConfig = [];
+            engines.forEach((engine) => {
+                const engineGraphConfig = { engine: engine, config: Graph.getGraphConfig(engine, precisions, JSON.parse(JSON.stringify(appConfig))) };
+                precisions.forEach((precision, index) => {
+                    engineGraphConfig.config.datasets[index].data = kpiData[engine][0].find(el => el[precision])?.[precision];
+                });
+                graphConfig.push(engineGraphConfig);
+                graphConfig.chartTitle = platform.Platform;
+            });
+            return graphConfig
+        })
+        return graphConfigs;
     }
 
-    function processMetricNew(labels, datasets, chartTitle, container, widthClass, id) {
-        // ratio for consistent chart label height
+    function setGraphConfigs(model, appConfig, parameters, precisions) {
+        var graphConfigs = parameters.map((parameter) => {
+            var groupUnit = model[0];
+            var kpiData = Graph.getDatabyParameterOld(model, appConfig.ParametersMap[parameter], precisions);
+            var config = Graph.getGraphConfigOld(appConfig.ParametersMap[parameter], groupUnit, precisions, JSON.parse(JSON.stringify(appConfig)));
+            precisions.forEach((precision, index) => {
+                config.datasets[index].data = kpiData.map(tData => tData[0][precision]);
+            });
+            return config;
+        });
+        return graphConfigs
+    }
+    function processMetric(labels, datasets, chartTitle, container, widthClass, id) {
         var heightRatio = (30 + (labels.length * 55));
         var chart = $('<div>');
         const containerId = `legend-container-${id}`;
@@ -616,13 +834,89 @@ $(document).ready(function () {
         window.setTimeout(() => {
             new Chart(context, {
                 type: 'bar',
-                data: getChartDataNew(labels, datasets),
+                data: getChartData(labels, datasets),
                 options: getChartOptions(chartTitle, containerId),
                 plugins: [htmlLegendPlugin]
             });
         });
     }
 
+    function getChartData(labels, datasets) {
+        return {
+            labels: labels,
+            datasets: datasets.map((item) => {
+                return {
+                    label: item.label,
+                    data: item.data,
+                    backgroundColor: item.color,
+                    borderColor: 'rgba(170,170,170,0)',
+                    barThickness: 10
+                }
+            })
+        }
+    }
+
+    function processMetricByEngines(labels, datasets, container, widthClass, id) {
+         var heightRatio = (80 + (labels.length * 55));
+        var chart = $('<div>');
+        const containerId = `legend-container-${id}`;
+        const legend = $(`<div id="${containerId}">`);
+        legend.addClass('graph-legend-container');
+        chart.addClass('chart');
+        chart.addClass(widthClass);
+        chart.height(heightRatio);
+        var canvas = $('<canvas>');
+        chart.append(canvas);
+        container.append(chart);
+        container.append(legend);
+        var context = canvas.get(0).getContext('2d');
+        context.canvas.height = heightRatio;
+        window.setTimeout(() => {
+            var labels = [];
+            for (const key in datasets[0].config.datasets[0].data) {
+                labels.push(key);
+            }
+
+            var graphDatas = [];
+            datasets.forEach((engineSet) => {
+                engineSet.config.datasets.forEach((precision) => {
+                    var precData = [];
+                    for (const key in precision.data) {
+                        precData.push(precision.data[key]);
+                    }
+                    graphDatas.push({
+                        label: engineSet.engine + ' ' + precision.label,
+                        data: precData,
+                        borderColor: precision.color,
+                        backgroundColor: precision.color,
+                        yAxisID: precision.label === "Throughput" ? 'y' : 'y1',
+                        fill: false
+                    }
+                    )
+                })
+            })
+
+            let allowedAxisIDs = ['x'];
+            const newAxisIDs = [...new Set(graphDatas.map(item => item.yAxisID))];
+
+            newAxisIDs.forEach(id => {
+                if (!allowedAxisIDs.includes(id)) {
+                    allowedAxisIDs.push(id);
+                }
+            });
+
+            new Chart(context, {
+                type: 'line',
+                data: {
+                    labels: labels,
+                    datasets: graphDatas
+                },
+                options: getChartOptionsByEngines(containerId, allowedAxisIDs),
+                plugins: [htmlLegendPlugin]
+            });
+        });
+    }
+
     function getRandomNumber() {
         return Math.floor(Math.random() * 100000);
     }
diff --git a/docs/sphinx_setup/_static/js/graphsLLM.js b/docs/sphinx_setup/_static/js/graphsLLM.js
deleted file mode 100644
index 4dbc0313e2a133..00000000000000
--- a/docs/sphinx_setup/_static/js/graphsLLM.js
+++ /dev/null
@@ -1,889 +0,0 @@
-// =================== GENERAL OUTPUT CONFIG =========================
-
-class Filter {
-
-    // param: GraphData[], networkModels[]
-    static FilterByNetworkModel(graphDataArr, networkModels) {
-        const optionMap = new Map();
-        networkModels.map((model) => graphDataArr.filter((graphData => graphData.Model === model)))
-            .flat(1)
-            .forEach(item => optionMap.set(item.Platform, item));
-        return Array.from(optionMap.values());
-    }
-
-    // param: GraphData[], ieType
-    static ByIeTypes(graphDataArr, ieTypes) {
-        const optionMap = new Map();
-        graphDataArr
-            .filter(graphData => ieTypes.includes(graphData.PlatformType))
-            .forEach(item => optionMap.set(item.Platform, item));
-        return Array.from(optionMap.values());
-    }
-
-    // param: GraphData[], ieType, networkModels
-    static ByTypesAndModels(graphDataArr, ieTypes, models) {
-        const optionMap = new Map();
-        graphDataArr
-            .filter(graphData => ieTypes.includes(graphData.PlatformType))
-            .filter(graphData => models.includes(graphData.Model))
-            .forEach(item => optionMap.set(item.Platform, item));
-        return Array.from(optionMap.values());
-    }
-
-    // param: GraphData[], clientPlatforms
-    static ByIeKpis(graphDataArr, clientPlatforms) {
-        var kpis = []
-        clientPlatforms.forEach((platformName) => {
-            graphDataArr.filter((data) => {
-                if (data.Platform.includes(platformName)) {
-                    for (var key in data.Parameters) {
-                        if (!kpis.includes(key)) kpis.push(key)
-                    }
-                }
-            })
-        })
-        return kpis;
-    }
-
-    // param: GraphData[]
-    static getParameters(graphDataArr) {
-        var parameters = []
-        graphDataArr.filter((data) => {
-            for (var key in data.Parameters) {
-                if (!parameters.includes(Graph.capitalizeFirstLetter(key))) parameters.push(Graph.capitalizeFirstLetter(key))
-            }
-        })
-        return parameters;
-    }
-
-    // param: GraphData[]
-    static getIeTypes(graphDataArr) {
-        var kpis = []
-        graphDataArr.filter((data) => {
-            for (var key in data.Parameters) {
-                if (!kpis.includes(Graph.capitalizeFirstLetter(key))) kpis.push(Graph.capitalizeFirstLetter(key))
-            }
-        })
-        return kpis;
-    }
-
-    // param: GraphData[], clientPlatforms[]
-    static ByClientPlatforms(graphDataArr, platformsArr) {
-        return graphDataArr.filter((data) => {
-            return platformsArr.includes(data.Platform)
-        });
-    }
-
-    // param: GraphData[], coreTypes[]
-    static FilterByCoreTypes(graphDataArr, coreTypes) {
-        if (coreTypes) {
-            return graphDataArr.filter((data) => coreTypes.includes(data.PlatformType));
-        }
-        return graphDataArr;
-    }
-}
-
-class Modal {
-    static getPrecisionsLabels(graphDataArr) {
-        var kpis = []
-        graphDataArr.filter((data) => {
-            for (var key in data.Parameters) {
-                data.Parameters[key].Precisions.forEach((key) => {
-                    Object.keys(key).forEach((key) => {
-                        if (!kpis.includes(key.toUpperCase())) kpis.push(key.toUpperCase())
-                    });
-                })
-            }
-        })
-        return kpis;
-    }
-
-    static getPrecisions(appConfig, labels) {
-        return labels.map((label) => {
-            var prec = appConfig.PrecisionsMap[label];
-            if (prec !== undefined) {
-                return prec;
-            }
-            else {
-                return "no name";
-            }
-        });
-    }
-}
-
-class Graph {
-    // functions to get unique keys 
-    static getNetworkModels(graphDataArr) {
-        return Array.from(new Set(graphDataArr.map((obj) => obj.Model)));
-    }
-    static getIeTypes(graphDataArr) {
-        return Array.from(new Set(graphDataArr.map((obj) => obj.PlatformType)));
-    }
-    static getCoreTypes(graphDataArr) {
-        return Array.from(new Set(graphDataArr.map((obj) => obj.ieType)));
-    }
-
-    // param: GraphData[]
-    static getPlatformNames(graphDataArr) {
-        return graphDataArr.map((data) => data.Platform);
-    }
-
-    // param: GraphData[], engine: string, precisions: list
-    static getDatabyParameter(graphDataArr, engine, array) {
-        if (!Array.isArray(array[engine])) {
-            array[engine] = [];
-        }
-        array[engine].push(graphDataArr.Parameters[engine].Precisions);
-        return array;
-    }
-
-    // this returns an object that is used to ender the chart
-    static getGraphConfig(engine, precisions, appConfig) {
-        return {
-            chartTitle: 'Throughput vs Latency',
-            iconClass: 'latency-icon',
-            datasets: precisions.map((precision) => appConfig.PrecisionData[engine][precision]),
-            unit: "None"
-        };
-    }
-    // param: GraphData[], parameterName: string, precisions: list
-    static getDatabyParameterOld(graphDataArr, parameterName, precisions) {
-        var array = [];
-        graphDataArr.forEach((item) => {
-            if (item.Parameters[parameterName] !== undefined) {
-                array.push(item.Parameters[parameterName].Precisions);
-            }
-            else {
-                var obj = {};
-                precisions.forEach((prec) => {
-                    obj[prec] = 0;
-                })
-                array.push([obj])
-            }
-        })
-        return array;
-
-    }
-
-    // this returns an object that is used to ender the chart
-    static getGraphConfigOld(parameterName, item, precisions, appConfig) {
-        return {
-            chartTitle: Graph.capitalizeFirstLetter(parameterName),
-            iconClass: parameterName + '-icon',
-            unit: item.Parameters[parameterName]?.Unit,
-            datasets: precisions.map((precision) => appConfig.PrecisionData[precision]),
-        };
-    }
-    static capitalizeFirstLetter(string) {
-        return string.charAt(0).toUpperCase() + string.slice(1);
-    }
-}
-
-class ChartDisplay {
-    constructor(mode, numberOfCharts) {
-        this.mode = mode;
-        this.numberOfChartsInRow = numberOfCharts;
-    }
-}
-
-$(document).ready(function () {
-    $('.ovms-toolkit-benchmark-llm-result').on('click', () => showModal("graph-data-ovms-genai.json"));
-    function clickBuildGraphsLLM(graph, appConfig, networkModels, ieTypes, platforms, kpis, precisions) {
-        renderData(graph, appConfig, networkModels, ieTypes, platforms, kpis, precisions);
-        $('.modal-footer').show();
-        $('#modal-display-graphs').show();
-        $('.edit-settings-btn').off('click').on('click', (event) => {
-            $('#modal-configure-graphs').show();
-            $('#modal-display-graphs').hide();
-            $('.modal-footer').hide();
-            $('.chart-placeholder').empty();
-        });
-
-        $('.graph-chart-title-header').off('click').on('click', (event) => {
-            var parent = event.target.parentElement;
-
-            if ($(parent).children('.chart-wrap,.empty-chart-container').is(":visible")) {
-                $(parent).children('.chart-wrap,.empty-chart-container').hide();
-                $(parent).children('.chevron-right-btn').show();
-                $(parent).children('.chevron-down-btn').hide();
-            } else {
-                $(parent).children('.chart-wrap,.empty-chart-container').show();
-                $(parent).children('.chevron-down-btn').show();
-                $(parent).children('.chevron-right-btn').hide();
-            }
-        });
-    }
-
-    function hideModal() {
-        $('#graphModal').remove();
-        $('body').css('overflow', 'auto');
-    }
-
-    function showModal(file) {
-        $('body').css('overflow', 'hidden');
-
-        fetch('../_static/benchmarks_files/data/' + file)
-            .then((response) => response.json())
-            .then((jsonData) => {
-                fetch('../_static/benchmarks_files/graph-config.json')
-                    .then((configResponse) => configResponse.json())
-                    .then((appConfig) => {
-                        renderModal(jsonData, appConfig)
-                    })
-            });
-    }
-
-    function getSelectedNetworkModels() {
-        return $('.models-column input:checked, .platforms-column input:checked').not('[data-networkmodel="Select All"]').map(function () {
-            return $(this).data('networkmodel');
-        }).get();
-    }
-
-    function getSelectedIeTypes() {
-        return $('.ietype-column input:checked').map(function () {
-            return $(this).data('ietype');
-        }).get();
-    }
-
-    function getSelectedClientPlatforms() {
-        return $('.platforms-column input:checked').map(function () {
-            return $(this).data('platform');
-        }).get();
-    }
-
-    function getSelectedKpis() {
-        return $('.kpi-column input:checked').map(function () {
-            return $(this).data('kpi');
-        }).get();
-    }
-
-    function getSelectedPrecisions() {
-        return $('.precisions-column input:checked').map(function () {
-            return $(this).data('precision');
-        }).get();
-    }
-
-    function validateSelections() {
-        if (getSelectedNetworkModels().length > 0
-            && getSelectedIeTypes()
-            && getSelectedClientPlatforms().length > 0
-            && getSelectedKpis().length > 0) {
-            if (getSelectedPrecisions().length > 0) {
-                $('#build-graphs-btn').prop('disabled', false);
-                return;
-            }
-            $('#build-graphs-btn').prop('disabled', true);
-            return;
-        }
-        $('#build-graphs-btn').prop('disabled', true);
-    }
-
-    function renderModal(graph, appConfig) {
-        new Graph(graph);
-        var networkModels = Graph.getNetworkModels(graph);
-        var ieTypes = Graph.getIeTypes(graph);
-        fetch('../_static/html/modalLLM.html').then((response) => response.text()).then((text) => {
-
-            // generate and configure modal container
-            var modal = $('<div>');
-            modal.attr('id', 'graphModal');
-            modal.addClass('modal');
-            var modalContent = $(text);
-            modalContent.attr('id', 'graphModalContent');
-            modalContent.addClass('modal-content');
-            modal.append(modalContent);
-
-            const models = networkModels.map((networkModel) => createCheckMark(networkModel, 'networkmodel'));
-            modal.find('.models-column').append(models);
-
-            const selectAllModelsButton = createCheckMark('', 'networkmodel');
-            modal.find('.models-selectall').append(selectAllModelsButton);
-
-            const selectAllPlatformsButton = createCheckMark('', 'platform');
-            modal.find('.platforms-selectall').append(selectAllPlatformsButton);
-
-            const precisions = Modal.getPrecisionsLabels(graph).map((precision) => createCheckMark(precision, 'precision', false));
-            modal.find('.precisions-column').append(precisions);
-
-            selectAllCheckboxes(precisions);
-            disableAllCheckboxes(precisions);
-
-            const selectAllTypesButton = createCheckMark('', 'ietype')
-            modal.find('.ietype-selectall').append(selectAllTypesButton);
-
-            const iefilter = ieTypes.map((ieType) => createCheckMark(ieType, 'ietype'));
-            modal.find('.ietype-column').append(iefilter);
-
-            modal.find('#modal-display-graphs').hide();
-            modal.find('.ietype-column input').first().prop('checked', true);
-
-            const kpiLabels = Filter.getParameters(graph).map((parameter) => createCheckMark(parameter, 'kpi', false));
-            modal.find('.kpi-column').append(kpiLabels);
-
-            $('body').prepend(modal);
-
-            preselectDefaultSettings(graph, modal, appConfig);
-
-            //is not generic solution :(
-            if (appConfig.DefaultSelections.platformTypes?.data?.includes('Select All')) {
-                selectAllCheckboxes(iefilter);
-
-            };
-            renderClientPlatforms(graph, modal);
-
-            $('.clear-all-btn').on('click', clearAll);
-            $('#build-graphs-btn').on('click', () => {
-                $('#modal-configure-graphs').hide();
-                clickBuildGraphsLLM(graph, appConfig, getSelectedNetworkModels(), getSelectedIeTypes(), getSelectedClientPlatforms(), getSelectedKpis(), Modal.getPrecisions(appConfig, getSelectedPrecisions()));
-            });
-            $('.modal-close').on('click', hideModal);
-            $('.close-btn').on('click', hideModal);
-
-            modal.find('.ietype-selectall input').on('click', function () {
-                if ($(this).prop('checked'))
-                    selectAllCheckboxes(iefilter);
-                else deSelectAllCheckboxes(iefilter);
-            });
-
-            modal.find('.models-selectall input').on('click', function () {
-                if ($(this).prop('checked')) selectAllCheckboxes(models);
-                else deSelectAllCheckboxes(models);
-
-                renderClientPlatforms(graph, modal)
-            });
-
-            modal.find('.platforms-selectall input').on('click', function () {
-                if ($(this).prop('checked'))
-                    renderClientPlatforms(graph, modal)
-                else {
-                    var enabledPlatforms = modal.find('.platforms-column .checkmark-container');
-                    deSelectCheckbox(enabledPlatforms);
-                };
-
-            });
-
-            modal.find('.models-column input').on('click', function () {
-                if (!$(this)[0].checked) {
-                    deSelectCheckbox(selectAllModelsButton);
-                }
-            });
-
-
-            modal.find('.ietype-column input').on('click', function () {
-                if (!$(this)[0].checked) {
-                    deSelectCheckbox(selectAllTypesButton);
-                }
-            });
-
-            modal.find('.models-column input').on('click', () => renderClientPlatforms(graph, modal));
-            modal.find('.ietype-column input').on('click', () => renderClientPlatforms(graph, modal));
-            modal.find('.ietype-selectall input').on('click', () => renderClientPlatforms(graph, modal));
-            modal.find('.platforms-column').on('click', () => enableParmeters(graph, getSelectedClientPlatforms()));
-
-            modal.find('.kpi-column input').on('click', validatePrecisionSelection);
-            modal.find('input').on('click', validateSelections);
-
-            var modalFilters = document.getElementById("modal-filters");
-
-            var showFiltersButton = document.getElementById("filters");
-            showFiltersButton.onclick = function () {
-                modalFilters.style.display = "block";
-            }
-
-            var closeFiltersButton = document.getElementsByClassName("close-filters")[0];
-            closeFiltersButton.onclick = function () {
-                modalFilters.style.display = "none";
-            }
-
-            window.onclick = function (event) {
-                if (event.target == modalFilters) {
-                    modalFilters.style.display = "none";
-                }
-            }
-        });
-    }
-
-    function validatePrecisionSelection() {
-        const precisions = $('.precisions-column').find('input')
-        precisions.prop('disabled', false);
-    }
-
-    function clearAll() {
-        $('.modal-content-grid-container input:checkbox').each((index, object) => $(object).prop('checked', false));
-        validatePrecisionSelection();
-        validateSelections();
-    }
-
-    function preselectDefaultSettings(graph, modal, appConfig) {
-
-        const defaultSelections = appConfig.DefaultSelections;
-
-        selectDefaultPlatformType(defaultSelections.platformTypes, graph, modal);
-
-        applyPlatformFilters(defaultSelections.platformFilters, modal, graph);
-
-        clearAllSettings(defaultSelections);
-
-        validateSelections();
-        validatePrecisionSelection();
-    }
-
-    function selectDefaultPlatformType(platformTypes, graph, modal) {
-        if (!platformTypes) return;
-
-        const type = platformTypes.data[0];
-        $(`input[data-ietype="${type}"]`).prop('checked', true);
-        renderClientPlatforms(graph, modal);
-    }
-
-    function applyPlatformFilters(platformFilters, modal, graph) {
-        if (!platformFilters) return;
-
-        const filters = modal.find('.selectable-box-container').children('.selectable-box');
-        filters.removeClass('selected');
-
-        platformFilters.data.forEach(selection => {
-            filters.filter(`[data-${platformFilters.name}="${selection}"]`).addClass('selected');
-        });
-
-        renderClientPlatforms(graph, modal);
-    }
-
-    function clearAllSettings(defaultSelections) {
-        clearAll();
-        Object.keys(defaultSelections).forEach(setting => {
-            const { name, data } = defaultSelections[setting];
-            data.forEach(selection => {
-                $(`input[data-${name}="${selection}"]`).prop('checked', true);
-            });
-        });
-    }
-
-    function filterClientPlatforms(graph, ietypes) {
-        return Filter.ByIeTypes(graph, ietypes);
-    }
-
-    function filterPlatforms(graph, ietypes, models) {
-        return Filter.ByTypesAndModels(graph, ietypes, models);
-    }
-
-    function renderClientPlatforms(graph, modal) {
-        var fPlatforms = filterClientPlatforms(graph, getSelectedIeTypes());
-        var platformNames = Graph.getPlatformNames(fPlatforms);
-        $('.platforms-column .checkmark-container').remove();
-
-        const clientPlatforms = platformNames.map((platform) => createCheckMark(platform, 'platform', true));
-
-        var enabledPlatforms = filterPlatforms(graph, getSelectedIeTypes(), getSelectedNetworkModels());
-        enableCheckBoxes(clientPlatforms, enabledPlatforms);
-        modal.find('.platforms-column').append(clientPlatforms);
-
-        enableParmeters(graph, getSelectedClientPlatforms());
-        modal.find('.platforms-column input').on('click', validateSelections);
-    }
-
-    function enableParmeters(graph, clientPlatforms) {
-        var allKpis = Filter.getParameters(graph);
-
-        allKpis.forEach((kpi) => {
-            $(`input[data-kpi="${Graph.capitalizeFirstLetter(kpi)}"]`).prop('disabled', true);
-        })
-
-        var kpis = Filter.ByIeKpis(graph, clientPlatforms);
-        kpis.forEach((kpi) => {
-            $(`input[data-kpi="${Graph.capitalizeFirstLetter(kpi)}"]`).prop('disabled', false);
-        })
-    }
-
-    function createCheckMark(itemLabel, modelLabel, disabled) {
-        const item = $('<label class="checkmark-container">');
-        item.text(itemLabel);
-        const checkbox = $('<input type="checkbox"/>');
-        checkbox.prop('disabled', disabled);
-        const checkboxSpan = $('<span class="checkmark">');
-        item.append(checkbox);
-        item.append(checkboxSpan);
-        checkbox.attr('data-' + modelLabel, itemLabel);
-        return item;
-    }
-
-    // receives a jquery list of items and selects all input checkboxes
-    function selectAllCheckboxes(items) {
-        items.forEach((item) => {
-            item.find(':input').prop('checked', true);
-        });
-    }
-
-    function enableCheckBoxes(items, enabledItems) {
-        items.forEach((item) => {
-            item.find(':input').prop('disabled', true);
-            enabledItems.forEach((platform) => {
-                var tmp = item.find(':input');
-                if (tmp[0].dataset.platform === platform.Platform) {
-                    item.find(':input').prop('checked', true);
-                    item.find(':input').prop('disabled', false);
-                }
-            })
-        })
-    }
-
-    function disableAllCheckboxes(items) {
-        items.forEach((item) => {
-            item.find(':input').prop('disabled', true);
-        })
-    }
-
-    function deSelectAllCheckboxes(items) {
-        items.forEach((item) => {
-            item.find(':input').prop('checked', false);
-        });
-    }
-    function deSelectCheckbox(item) {
-        item.find(':input').prop('checked', false);
-    }
-
-    // =================== HTMLLEGEND =========================
-
-    const getOrCreateLegendList = (chart, id) => {
-        const legendContainer = document.getElementById(id);
-        let listContainer = legendContainer.querySelector('ul');
-
-        if (!listContainer) {
-            listContainer = document.createElement('ul');
-            listContainer.style.display = 'flex';
-            listContainer.style.flexDirection = 'row';
-            listContainer.style.margin = 0;
-            listContainer.style.padding = 0;
-            listContainer.style.paddingLeft = '0px';
-
-            legendContainer.appendChild(listContainer);
-        }
-
-        return listContainer;
-    };
-
-    const htmlLegendPlugin = {
-        id: 'htmlLegend',
-        afterUpdate(chart, args, options) {
-
-            const ul = getOrCreateLegendList(chart, chart.options.plugins.htmlLegend.containerID);
-
-            // Remove old legend items
-            while (ul.firstChild) {
-                ul.firstChild.remove();
-            }
-
-            const items = chart.legend.legendItems;
-            items.forEach(item => {
-                const li = document.createElement('li');
-                li.style.alignItems = 'center';
-                li.style.display = 'block';
-                li.style.flexDirection = 'column';
-                li.style.marginLeft = '4px';
-
-                li.onclick = () => {
-                    chart.toggleDataVisibility(item.index);
-                    chart.update();
-                };
-
-                // Color box
-                const boxSpan = document.createElement('span');
-                boxSpan.style.background = item.fillStyle;
-                boxSpan.style.borderColor = item.strokeStyle;
-                boxSpan.style.borderWidth = item.lineWidth + 'px';
-                boxSpan.style.display = 'inline-block';
-                boxSpan.style.height = '10px';
-                boxSpan.style.marginRight = '4px';
-                boxSpan.style.width = '30px';
-
-                // Text
-                const textContainer = document.createElement('p');
-                textContainer.style.color = '#666';
-                textContainer.style.margin = 0;
-                textContainer.style.padding = 0;
-                textContainer.style.fontSize = '0.6rem';
-                textContainer.style.marginLeft = '3px';
-                textContainer.style.textDecoration = item.hidden ? 'line-through' : '';
-
-                const text = document.createTextNode(item.text);
-                textContainer.appendChild(text);
-
-                li.appendChild(boxSpan);
-                li.appendChild(textContainer);
-                ul.appendChild(li);
-            });
-        }
-    };
-
-    function getChartOptions(containerId, allowedAxisIDs) {
-        const axisConfigs = {
-            x: {
-                title: { display: true, text: 'Request Rate' }
-            },
-            y: {
-                type: 'linear',
-                display: true,
-                position: 'left',
-                title: { display: true, text: 'Throughput [tok/s]' },
-                grid: { drawOnChartArea: true }
-            },
-            y1: {
-                type: 'linear',
-                display: true,
-                position: 'right',
-                title: { display: true, text: 'TPOT Mean Latency' },
-                grid: { drawOnChartArea: true }
-            }
-        };
-
-        return {
-            responsive: true,
-            scales: Object.keys(axisConfigs)
-                .filter(key => allowedAxisIDs.includes(key))
-                .reduce((obj, key) => {
-                    obj[key] = axisConfigs[key];
-                    return obj;
-                }, {}),
-            plugins: {
-                legend: { display: false },
-                htmlLegend: { containerID: containerId }
-            }
-        };
-    }
-
-    function renderData(graph, appConfig, networkModels, ieTypes, platforms, kpis, precisions) {
-        $('.chart-placeholder').empty();
-        $('.modal-footer').empty();
-
-        const display = new ChartDisplay(getChartsDisplayMode(kpis.length), kpis.length);
-        networkModels.forEach((networkModel) => {
-            var chartName = networkModel;
-            var chartSlug = chartName.replace(')', '').replace(' (', '-');
-            var chartContainer = $('<div>');
-            var chevronDown = '<span class="chevron-down-btn"></span>';
-            var chevronRight = '<span style="display:none" class="chevron-right-btn"></span>';
-            $(chevronRight).hide();
-
-            var chartContainerHeader = $(chevronDown + chevronRight + '<span class="graph-chart-title">' + networkModel + '</span>');
-            chartContainerHeader.addClass('graph-chart-title-header');
-            chartContainer.prepend(chartContainerHeader);
-            chartContainer.attr('id', 'ov-chart-container-' + chartSlug);
-
-            chartContainer.addClass('chart-container');
-
-            var filteredNetworkModels = Filter.FilterByNetworkModel(graph, [networkModel]);
-            var filteredIeTypes = Filter.ByIeTypes(filteredNetworkModels, ieTypes);
-            var filteredGraphData = Filter.ByClientPlatforms(filteredIeTypes, platforms);
-            $('.chart-placeholder').append(chartContainer);
-            if (filteredGraphData.length > 0) {
-                createChartWithNewData(filteredGraphData, appConfig, chartContainer, kpis, precisions, display);
-            } else {
-                createEmptyChartContainer(chartContainer);
-            }
-        })
-
-        $(window).off('resize');
-        $(window).resize(() => resetChartsDisplay(display));
-    };
-
-    function createEmptyChartContainer(chartContainer) {
-        chartContainer.append($('<div>').addClass('empty-chart-container').text('No data for this configuration.'));
-    }
-
-    // this function should take the final data set and turn it into graphs
-    // params: GraphData, unused, chartContainer
-    function createChartWithNewData(graphData, appConfig, chartContainer, engines, precisions, display) {
-        var chartWrap = $('<div>');
-        chartWrap.addClass('chart-wrap');
-        chartContainer.append(chartWrap);
-        var graphConfigs = graphData.map((platform) => {
-            var kpiData = [];
-            engines.forEach((engine) => Graph.getDatabyParameter(platform, engine, kpiData));
-            var graphConfig = [];
-            engines.forEach((engine) => {
-                const engineGraphConfig = { engine: engine, config: Graph.getGraphConfig(engine, precisions, JSON.parse(JSON.stringify(appConfig))) };
-                precisions.forEach((precision, index) => {
-                    engineGraphConfig.config.datasets[index].data = kpiData[engine][0].find(el => el[precision])?.[precision];
-                });
-                graphConfig.push(engineGraphConfig);
-                graphConfig.chartTitle = platform.Platform;
-            });
-            return graphConfig
-        })
-
-        // get the client platform labels and create labels for all the graphs
-        var labels = Graph.getPlatformNames(graphData);
-        var labelsContainer = $('<div>');
-        labelsContainer.addClass('chart-labels-container');
-        chartWrap.append(labelsContainer);
-
-        // get the kpi title's and create headers for the graphs
-        var chartGraphsContainer = $('<div>');
-        chartGraphsContainer.addClass('chart-graphs-container');
-        chartWrap.append(chartGraphsContainer);
-
-        graphConfigs.forEach((graphConfig, index) => {
-            const id = getRandomNumber();
-            if (graphConfig.unit === undefined) {
-                graphConfig.unit = 'No unit.';
-            }
-
-            var graphItem = $(`<div id=${id}>`);
-            graphItem.addClass('graph-item');
-            var columnHeaderContainer = $('<div>');
-            columnHeaderContainer.addClass('chart-column-title');
-            var columnIcon = $('<div class="icon">');
-            columnIcon.addClass(graphConfig.iconClass);
-            columnHeaderContainer.append(columnIcon);
-            var columnHeader = $('<div class="chart-header">');
-            columnHeader.append($('<div class="title">' + graphConfig.chartTitle + '</div>'));
-            columnHeaderContainer.append(columnHeader);
-            chartGraphsContainer.append(graphItem);
-            var graphClass = $('<div>');
-            graphClass.addClass('graph-row');
-            graphItem.append(columnHeaderContainer);
-            graphItem.append(graphClass);
-            processMetricNew(labels, graphConfig, graphClass, 'graph-row-column', id);
-            window.setTimeout(() => {
-                var labelsItem = $('<div>');
-                setInitialItemsVisibility(labelsItem, index, display.mode);
-            });
-        });
-        setChartsDisplayDirection(display.mode);
-        adjustHeaderIcons(display.mode);
-    }
-
-    function processMetricNew(labels, datasets, container, widthClass, id) {
-        // ratio for consistent chart label height
-        var heightRatio = (80 + (labels.length * 55));
-        var chart = $('<div>');
-        const containerId = `legend-container-${id}`;
-        const legend = $(`<div id="${containerId}">`);
-        legend.addClass('graph-legend-container');
-        chart.addClass('chart');
-        chart.addClass(widthClass);
-        chart.height(heightRatio);
-        var canvas = $('<canvas>');
-        chart.append(canvas);
-        container.append(chart);
-        container.append(legend);
-        var context = canvas.get(0).getContext('2d');
-        context.canvas.height = heightRatio;
-        window.setTimeout(() => {
-            var labels = [];
-            for (const key in datasets[0].config.datasets[0].data) {
-                labels.push(key);
-            }
-
-            var graphDatas = [];
-            datasets.forEach((engineSet) => {
-                engineSet.config.datasets.forEach((precision) => {
-                    var precData = [];
-                    for (const key in precision.data) {
-                        precData.push(precision.data[key]);
-                    }
-                    graphDatas.push({
-                        label: engineSet.engine + ' ' + precision.label,
-                        data: precData,
-                        borderColor: precision.color,
-                        backgroundColor: precision.color,
-                        yAxisID: precision.label === "Throughput" ? 'y' : 'y1',
-                        fill: false
-                    }
-                    )
-                })
-            })
-
-            let allowedAxisIDs = ['x'];
-            const newAxisIDs = [...new Set(graphDatas.map(item => item.yAxisID))];
-
-            newAxisIDs.forEach(id => {
-                if (!allowedAxisIDs.includes(id)) {
-                    allowedAxisIDs.push(id);
-                }
-            });
-
-            new Chart(context, {
-                type: 'line',
-                data: {
-                    labels: labels,
-                    datasets: graphDatas
-                },
-                options: getChartOptions(containerId, allowedAxisIDs),
-                plugins: [htmlLegendPlugin]
-            });
-        });
-    }
-
-    function getRandomNumber() {
-        return Math.floor(Math.random() * 100000);
-    }
-
-    function resetChartsDisplay(currentDisplay) {
-        const newDisplayMode = getChartsDisplayMode(currentDisplay.numberOfChartsInRow);
-        if (currentDisplay.mode != newDisplayMode) {
-            currentDisplay.mode = newDisplayMode;
-            setChartsDisplayDirection(currentDisplay.mode);
-            adjustLabels(currentDisplay.mode);
-            adjustHeaderIcons(currentDisplay.mode);
-        }
-    }
-
-    function adjustLabels(displayMode) {
-        const firstLabels = $('.chart-labels-container').find('.chart-labels-item:first-child');
-        const labels = $('.chart-labels-container').find('.chart-labels-item');
-        labels.css('padding-top', getLabelsTopPadding(displayMode));
-        if (displayMode == 'column') {
-            labels.show();
-        }
-        else {
-            labels.hide()
-            firstLabels.show();
-        }
-    }
-
-    function adjustHeaderIcons(displayMode) {
-        const icons = $('.graph-item').find('.chart-column-title');
-        if (displayMode == 'rowCompact')
-            icons.css('flex-direction', 'column')
-        else
-            icons.css('flex-direction', 'row')
-    }
-
-    function getLabelsTopPadding(displayMode) {
-        return (displayMode == 'rowCompact') ? 105.91 : 83.912;
-    }
-
-    function setChartsDisplayDirection(displayMode) {
-        const container = $('.chart-placeholder').find('.chart-graphs-container');
-        if (displayMode == 'column') {
-            container.css('flex-direction', 'column');
-        }
-        else {
-            container.css('flex-direction', 'row');
-        }
-    }
-
-    function setInitialItemsVisibility(item, count, displayMode) {
-        if (count == 0 || displayMode == 'column') item.show();
-        else item.hide();
-    }
-
-    function getChartsDisplayMode(numberOfCharts) {
-        switch (numberOfCharts) {
-            case 4:
-                return window.matchMedia('(max-width: 721px)').matches ? 'column'
-                    : window.matchMedia('(max-width: 830px)').matches ? 'rowCompact'
-                        : 'row';
-            case 3:
-                return window.matchMedia('(max-width: 569px)').matches ? 'column'
-                    : window.matchMedia('(max-width: 649px)').matches ? 'rowCompact'
-                        : 'row';
-            case 2:
-                return window.matchMedia('(max-width: 500px)').matches ? 'column'
-                    : 'row';
-            default:
-                return 'row';
-        }
-    }
-});
\ No newline at end of file
diff --git a/docs/sphinx_setup/conf.py b/docs/sphinx_setup/conf.py
index 4f2f126935804c..01c74de0175bcf 100644
--- a/docs/sphinx_setup/conf.py
+++ b/docs/sphinx_setup/conf.py
@@ -201,7 +201,6 @@
     'js/openvino_sphinx_theme.js',
     'js/splide.min.js',
     'js/sortable_tables.js',
-    'js/graphsLLM.js',
     'js/graphs.js',
     'js/gsearch.js',
     'js/hide_banner.js',
diff --git a/samples/cpp/benchmark_app/benchmark_app.hpp b/samples/cpp/benchmark_app/benchmark_app.hpp
index 99cbd7edff8856..cf38ff6708ad29 100644
--- a/samples/cpp/benchmark_app/benchmark_app.hpp
+++ b/samples/cpp/benchmark_app/benchmark_app.hpp
@@ -65,6 +65,12 @@ static const char cache_dir_message[] = "Optional. Enables caching of loaded mod
 static const char load_from_file_message[] = "Optional. Loads model from file directly without read_model."
                                              " All CNNNetwork options (like re-shape) will be ignored";
 
+/// @brief message for maximum inference rate
+static const char maximum_inference_rate_message[] =
+    "Optional. Maximum inference rate by frame per second"
+    "If not specified, default value is 0, the inference will run at maximium rate depending on a device capabilities. "
+    "Tweaking this value allow better accuracy in power usage measurement by limiting the execution.";
+
 /// @brief message for execution time
 static const char execution_time_message[] = "Optional. Time in seconds to execute topology.";
 
@@ -307,6 +313,9 @@ DEFINE_string(api, "async", api_message);
 /// @brief Number of infer requests in parallel
 DEFINE_uint64(nireq, 0, infer_requests_count_message);
 
+/// @brief Execute infer requests at a fixed frequency
+DEFINE_double(max_irate, 0, maximum_inference_rate_message);
+
 /// @brief Number of streams to use for inference on the CPU (also affects Hetero cases)
 DEFINE_string(nstreams, "", infer_num_streams_message);
 
@@ -388,6 +397,7 @@ static void show_usage() {
     std::cout << "    -hint  <performance hint> (latency or throughput or cumulative_throughput or none)   "
               << hint_message << std::endl;
     std::cout << "    -niter  <integer>             " << iterations_count_message << std::endl;
+    std::cout << "    -max_irate \"<float>\"        " << maximum_inference_rate_message << std::endl;
     std::cout << "    -t                            " << execution_time_message << std::endl;
     std::cout << std::endl;
     std::cout << "Input shapes" << std::endl;
diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp
index 4dcc1e82924efd..1f1b89c2427e67 100644
--- a/samples/cpp/benchmark_app/main.cpp
+++ b/samples/cpp/benchmark_app/main.cpp
@@ -7,6 +7,7 @@
 #include <map>
 #include <memory>
 #include <string>
+#include <thread>
 #include <utility>
 #include <vector>
 
@@ -1157,6 +1158,12 @@ int main(int argc, char* argv[]) {
 
             execTime = std::chrono::duration_cast<ns>(Time::now() - startTime).count();
             processedFramesN += batchSize;
+
+            if (FLAGS_max_irate > 0) {
+                auto nextRunFinishTime = 1 / FLAGS_max_irate * processedFramesN * 1.0e9;
+                std::this_thread::sleep_for(
+                    std::chrono::nanoseconds(static_cast<int64_t>(nextRunFinishTime - execTime)));
+            }
         }
 
         // wait the latest inference executions
diff --git a/src/bindings/js/node/package.json b/src/bindings/js/node/package.json
index 8bc6bbd4bb1d46..1ca1f10cdf57c2 100644
--- a/src/bindings/js/node/package.json
+++ b/src/bindings/js/node/package.json
@@ -3,7 +3,7 @@
   "version": "2024.4.0",
   "description": "OpenVINO™ utils for using from Node.js environment",
   "repository": {
-    "url": "https://github.com/openvinotoolkit/openvino.git",
+    "url": "git+https://github.com/openvinotoolkit/openvino.git",
     "type": "git"
   },
   "license": "Apache-2.0",
@@ -23,7 +23,7 @@
     "test:e2e": "mocha ./tests/e2e/electron-app.test.js",
     "tsc": "tsc",
     "postinstall": "npm run install_runtime",
-    "download_runtime": "node ./scripts/download_runtime.js",
+    "download_runtime": "node ./scripts/download-runtime.js",
     "install_runtime": "npm run download_runtime -- --ignore-if-exists"
   },
   "devDependencies": {
diff --git a/src/bindings/js/node/scripts/download-runtime.js b/src/bindings/js/node/scripts/download-runtime.js
new file mode 100644
index 00000000000000..90bece67161a6a
--- /dev/null
+++ b/src/bindings/js/node/scripts/download-runtime.js
@@ -0,0 +1,24 @@
+const { join } = require('node:path');
+
+const BinaryManager = require('./lib/binary-manager');
+const packageJson = require('../package.json');
+
+if (require.main === module) main();
+
+async function main() {
+  if (!BinaryManager.isCompatible()) process.exit(1);
+
+  const force = process.argv.includes('-f') || process.argv.includes('--force');
+  const ignoreIfExists = process.argv.includes('-i')
+    || process.argv.includes('--ignore-if-exists');
+
+  const { env } = process;
+  const proxy = env.http_proxy || env.HTTP_PROXY || env.npm_config_proxy;
+
+  await BinaryManager.prepareBinary(
+    join(__dirname, '..'),
+    packageJson.version,
+    packageJson.binary,
+    { force, ignoreIfExists, proxy },
+  );
+}
diff --git a/src/bindings/js/node/scripts/download_runtime.js b/src/bindings/js/node/scripts/download_runtime.js
deleted file mode 100644
index 321eb4b125bc6c..00000000000000
--- a/src/bindings/js/node/scripts/download_runtime.js
+++ /dev/null
@@ -1,302 +0,0 @@
-const os = require('os');
-const path = require('path');
-const tar = require('tar-fs');
-const https = require('node:https');
-const gunzip = require('gunzip-maybe');
-const fs = require('node:fs/promises');
-const { createReadStream, createWriteStream } = require('node:fs');
-const { HttpsProxyAgent } = require('https-proxy-agent');
-
-const packageJson = require('../package.json');
-
-const codeENOENT = 'ENOENT';
-
-if (require.main === module) {
-  main();
-}
-
-async function main() {
-  const modulePath = packageJson.binary['module_path'];
-  const destinationPath = path.resolve(__dirname, '..', modulePath);
-  const force = process.argv.includes('-f');
-  const ignoreIfExists = process.argv.includes('--ignore-if-exists');
-  const { env } = process;
-  const proxy = env.http_proxy || env.HTTP_PROXY || env.npm_config_proxy;
-
-  try {
-    await downloadRuntime(destinationPath, { force, ignoreIfExists, proxy });
-  } catch(error) {
-    if (error instanceof RuntimeExistsError) {
-      console.error(
-        `Directory '${destinationPath}' already exists. ` +
-          'To force runtime downloading run \'npm run download_runtime -- -f\'',
-      );
-    } else {
-      throw error;
-    }
-    process.exit(1);
-  }
-}
-
-class RuntimeExistsError extends Error {
-  constructor(message) {
-    super(message);
-    this.name = 'RuntimeExistsError';
-    Error.captureStackTrace(this, RuntimeExistsError);
-  }
-}
-
-/**
- * Download OpenVINO Runtime archive and extract it to destination directory.
- *
- * @async
- * @function downloadRuntime
- * @param {string} destinationPath - The destination directory path.
- * @param {Object} [config] - The configuration object.
- * @param {boolean} [config.force=false] - The flag
- * to force install and replace runtime if it exists. Default is `false`.
- * @param {boolean} [config.ignoreIfExists=true] - The flag
- * to skip installation if it exists Default is `true`.
- * @param {string|null} [config.proxy=null] - The proxy URL. Default is `null`.
- * @returns {Promise<void>}
- * @throws {RuntimeExistsError}
- */
-async function downloadRuntime(
-  destinationPath,
-  config = { force: false, ignoreIfExists: true, proxy: null },
-) {
-  const { version } = packageJson;
-  const osInfo = await getOsInfo();
-  const isRuntimeDirectoryExists = await checkIfPathExists(destinationPath);
-
-  if (isRuntimeDirectoryExists && !config.force) {
-    if (config.ignoreIfExists) {
-      console.warn(
-        `Directory '${destinationPath}' already exists. Skipping ` +
-          'runtime downloading because \'ignoreIfExists\' flag is passed.',
-      );
-
-      return;
-    }
-
-    throw new RuntimeExistsError(
-      `Directory '${destinationPath}' already exists. ` +
-        'To force runtime downloading use \'force\' flag.',
-    );
-  }
-
-  const runtimeArchiveUrl = getRuntimeArchiveUrl(version, osInfo);
-  const tmpDir = `temp-ov-runtime-archive-${new Date().getTime()}`;
-  const tempDirectoryPath = path.join(os.tmpdir(), tmpDir);
-
-  try {
-    const filename = path.basename(runtimeArchiveUrl);
-    const archiveFilePath = path.resolve(tempDirectoryPath, filename);
-
-    await fs.mkdir(tempDirectoryPath);
-
-    console.log('Downloading OpenVINO runtime archive...');
-    await downloadFile(
-      runtimeArchiveUrl,
-      tempDirectoryPath,
-      filename,
-      config.proxy,
-    );
-    console.log('OpenVINO runtime archive downloaded.');
-
-    await removeDirectory(destinationPath);
-
-    console.log('Extracting archive...');
-    await unarchive(archiveFilePath, destinationPath);
-
-    console.log('The archive was successfully extracted.');
-  } catch(error) {
-    console.error(`Failed to download OpenVINO runtime: ${error}.`);
-    throw error;
-  } finally {
-    await removeDirectory(tempDirectoryPath);
-  }
-}
-
-/**
- * The OS information object.
- * @typedef {Object} OsInfo
- * @property {NodeJS.Platform} platform
- * @property {string} arch
- */
-
-/**
- * Get information about OS.
- *
- * @async
- * @function getOsInfo
- * @returns {Promise<OsInfo>}
- */
-async function getOsInfo() {
-  const platform = os.platform();
-
-  if (!['win32', 'linux', 'darwin'].includes(platform)) {
-    throw new Error(`Platform '${platform}' is not supported.`);
-  }
-
-  const arch = os.arch();
-
-  if (!['arm64', 'armhf', 'x64'].includes(arch)) {
-    throw new Error(`Architecture '${arch}' is not supported.`);
-  }
-
-  if (platform === 'win32' && arch !== 'x64') {
-    throw new Error(`Version for windows and '${arch}' is not supported.`);
-  }
-
-  return { platform, arch };
-}
-
-/**
- * Check if path exists.
- *
- * @async
- * @function checkIfPathExists
- * @param {string} path - The path to directory or file.
- * @returns {Promise<boolean>}
- */
-async function checkIfPathExists(path) {
-  try {
-    await fs.access(path);
-
-    return true;
-  } catch(error) {
-    if (error.code === codeENOENT) {
-      return false;
-    }
-    throw error;
-  }
-}
-
-/**
- * Get OpenVINO runtime archive URL.
- *
- * @function getRuntimeArchiveUrl
- * @param {string} version - Package version.
- * @param {OsInfo} osInfo - The OS related data.
- * @returns {string}
- */
-function getRuntimeArchiveUrl(version, osInfo) {
-  const {
-    host,
-    package_name: packageNameTemplate,
-    remote_path: remotePathTemplate,
-  } = packageJson.binary;
-  const fullPathTemplate = `${remotePathTemplate}${packageNameTemplate}`;
-  const fullPath = fullPathTemplate
-    .replace(new RegExp('{version}', 'g'), version)
-    .replace(new RegExp('{platform}', 'g'), osInfo.platform)
-    .replace(new RegExp('{arch}', 'g'), osInfo.arch);
-
-  return new URL(fullPath, host).toString();
-}
-
-/**
- * Remove directory and its content.
- *
- * @async
- * @function removeDirectory
- * @param {string} path - The directory path.
- * @returns {Promise<void>}
- */
-async function removeDirectory(path) {
-  try {
-    console.log(`Removing ${path}`);
-    await fs.rm(path, { recursive: true, force: true });
-  } catch(error) {
-    if (error.code === codeENOENT) console.log(`Path: ${path} doesn't exist`);
-
-    throw error;
-  }
-}
-
-/**
- * Download file by URL and save it to the destination path.
- *
- * @function downloadFile
- * @param {string} url - The file URL.
- * @param {string} filename - The filename of result file.
- * @param {string} destination - The destination path of result file.
- * @param {string} [proxy=null] - (Optional) The proxy URL.
- * @returns {Promise<void>}
- */
-function downloadFile(url, destination, filename, proxy = null) {
-  const timeout = 5000;
-  const fullPath = path.resolve(destination, filename);
-  const file = createWriteStream(fullPath);
-
-  if (new URL(url).protocol === 'http')
-    throw new Error('Http link doesn\'t support');
-
-  let agent;
-
-  if (proxy) {
-    agent = new HttpsProxyAgent(proxy);
-    console.log(`Proxy agent is configured with '${proxy}'.`);
-  }
-
-  return new Promise((resolve, reject) => {
-    file.on('error', (error) => {
-      reject(`Failed to open file stream: ${error}.`);
-    });
-
-    console.log(`Download file by link: ${url}`);
-
-    const request = https.get(url, { agent }, (res) => {
-      const { statusCode } = res;
-
-      if (statusCode !== 200) {
-        return reject(`Server returned status code ${statusCode}.`);
-      }
-
-      res.pipe(file);
-
-      file.on('finish', () => {
-        file.close();
-        console.log(`File was successfully downloaded to '${fullPath}'.`);
-        resolve();
-      });
-    });
-
-    request.on('error', (error) => {
-      reject(`Failed to send request: ${error}.`);
-    });
-
-    request.setTimeout(timeout, () => {
-      request.destroy();
-      reject(`Request was timed out after ${timeout} ms.`);
-    });
-  });
-}
-
-/**
- * Unarchive tar and tar.gz archives.
- *
- * @function unarchive
- * @param {tarFilePath} tarFilePath - Path to archive.
- * @param {dest} tarFilePath - Path where to unpack.
- * @returns {Promise<void>}
- */
-function unarchive(tarFilePath, dest) {
-  return new Promise((resolve, reject) => {
-    createReadStream(tarFilePath)
-      .pipe(gunzip())
-      .pipe(
-        tar
-          .extract(dest)
-          .on('finish', () => {
-            resolve();
-          })
-          .on('error', (err) => {
-            reject(err);
-          }),
-      );
-  });
-}
-
-module.exports = { downloadRuntime, downloadFile, checkIfPathExists };
diff --git a/src/bindings/js/node/scripts/lib/binary-manager.js b/src/bindings/js/node/scripts/lib/binary-manager.js
new file mode 100644
index 00000000000000..f0af78b49093ec
--- /dev/null
+++ b/src/bindings/js/node/scripts/lib/binary-manager.js
@@ -0,0 +1,172 @@
+const os = require('node:os');
+const tar = require('tar-fs');
+const path = require('node:path');
+const gunzip = require('gunzip-maybe');
+const fs = require('node:fs/promises');
+const { createReadStream } = require('node:fs');
+
+const { downloadFile, checkIfPathExists, removeDirectory } = require('./utils');
+
+class BinaryManager {
+  constructor(packageRoot, version, binaryConfig) {
+    this.packageRoot = packageRoot;
+    this.version = version;
+    this.binaryConfig = binaryConfig;
+  }
+
+  getPlatformLabel() {
+    return os.platform();
+  }
+
+  getArchLabel() {
+    return os.arch();
+  }
+
+  getExtension() {
+    return 'tar.gz';
+  }
+
+  getArchiveUrl() {
+    const {
+      host,
+      package_name: packageNameTemplate,
+      remote_path: remotePathTemplate,
+    } = this.binaryConfig;
+    const fullPathTemplate = `${remotePathTemplate}${packageNameTemplate}`
+    const fullPath = fullPathTemplate
+      .replace(new RegExp('{version}', 'g'), this.version)
+      .replace(new RegExp('{platform}', 'g'), this.getPlatformLabel())
+      .replace(new RegExp('{arch}', 'g'), this.getArchLabel())
+      .replace(new RegExp('{extension}', 'g'), this.getExtension());
+
+    return new URL(fullPath, host).toString();
+  }
+
+  getDestinationPath() {
+    const modulePath = this.binaryConfig['module_path'];
+
+    return path.resolve(this.packageRoot, modulePath);
+  }
+
+  /**
+   * Prepares the binary by downloading and extracting the OpenVINO runtime archive.
+   *
+   * @param {string} packageRoot - The root directory of the package.
+   * @param {string} version - The version of the binary.
+   * @param {Object} binaryConfig - The configuration object for the binary.
+   * @param {Object} options - The options for preparing the binary.
+   * @param {boolean} options.force - Whether to force the download if the directory already exists.
+   * @param {boolean} options.ignoreIfExists - Whether to ignore the download if the directory already exists.
+   * @param {string} [options.proxy] - The proxy to use for downloading the file.
+   * @throws {Error} If the directory already exists and the force option is not set.
+   * @throws {Error} If the download or extraction fails.
+   * @returns {Promise<void>} A promise that resolves when the binary is prepared.
+   */
+  static async prepareBinary(packageRoot, version, binaryConfig, options) {
+    const binaryManager = new this(packageRoot, version, binaryConfig);
+    const destinationPath = binaryManager.getDestinationPath();
+    const isRuntimeDirectoryExists = await checkIfPathExists(destinationPath);
+
+    if (isRuntimeDirectoryExists && !options.force) {
+      if (options.ignoreIfExists) {
+        console.warn(
+          `Directory '${destinationPath}' already exists. Skipping `
+          + 'runtime downloading because "ignoreIfExists" flag is passed.'
+        );
+
+        return;
+      }
+
+      throw new Error(
+        `Directory '${destinationPath}' already exists. ` +
+          'To force runtime downloading use "force" flag.',
+      );
+    }
+
+    const archiveUrl = binaryManager.getArchiveUrl();
+    let tempDirectoryPath = null;
+
+    try {
+      tempDirectoryPath = await fs.mkdtemp(
+        path.join(os.tmpdir(), 'temp-ov-runtime-archive-')
+      );
+
+      const filename = path.basename(archiveUrl);
+
+      console.log('Downloading OpenVINO runtime archive...');
+      const archiveFilePath = await downloadFile(
+        archiveUrl,
+        tempDirectoryPath,
+        filename,
+        options.proxy,
+      )
+      console.log('OpenVINO runtime archive downloaded.');
+
+      await removeDirectory(destinationPath);
+      await this.unarchive(archiveFilePath, destinationPath);
+      console.log('The archive was successfully extracted.');
+    } catch(error) {
+      console.error(`Failed to download OpenVINO runtime: ${error}.`);
+      throw error;
+    } finally {
+      if (tempDirectoryPath) await removeDirectory(tempDirectoryPath);
+    }
+  }
+
+  /**
+   * Checks if the current platform and architecture are compatible.
+   *
+   * Supported platforms: 'win32', 'linux', 'darwin'.
+   * Supported architectures: 'arm64', 'armhf', 'x64'.
+   *
+   * If the platform or architecture is not supported, an error message is logged to the console.
+   *
+   * @returns {boolean} Returns true if the platform and architecture are compatible, otherwise false.
+   */
+  static isCompatible() {
+    const missleadings = [];
+    const platform = os.platform();
+
+    if (!['win32', 'linux', 'darwin'].includes(platform))
+      missleadings.push(`Platform '${platform}' is not supported.`);
+
+    const arch = os.arch();
+
+    if (!['arm64', 'armhf', 'x64'].includes(arch))
+      missleadings.push(`Architecture '${arch}' is not supported.`);
+
+    if (platform === 'win32' && arch !== 'x64')
+      missleadings.push(`Version for windows and '${arch}' is not supported.`);
+
+    if (missleadings.length) {
+      console.error(missleadings.join(' '));
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * Unarchive tar and tar.gz archives.
+   *
+   * @function unarchive
+   * @param {string} archivePath - Path to archive.
+   * @param {string} dest - Path where to unpack.
+   * @returns {Promise<void>}
+   */
+  static unarchive(archivePath, dest) {
+    return new Promise((resolve, reject) => {
+      createReadStream(archivePath)
+        .pipe(gunzip())
+        .pipe(tar.extract(dest)
+          .on('finish', () => {
+            resolve();
+          }).on('error', (err) => {
+            reject(err);
+          }),
+        );
+    });
+  }
+}
+
+module.exports = BinaryManager;
diff --git a/src/bindings/js/node/scripts/lib/utils.js b/src/bindings/js/node/scripts/lib/utils.js
new file mode 100644
index 00000000000000..9658ec504fa0d9
--- /dev/null
+++ b/src/bindings/js/node/scripts/lib/utils.js
@@ -0,0 +1,115 @@
+const path = require('node:path');
+const https = require('node:https');
+const fs = require('node:fs/promises');
+const { createWriteStream } = require('node:fs');
+
+const { HttpsProxyAgent } = require('https-proxy-agent');
+
+const codeENOENT = 'ENOENT';
+
+module.exports = {
+  removeDirectory,
+  checkIfPathExists,
+  downloadFile,
+};
+
+/**
+ * Remove directory and its content.
+ *
+ * @async
+ * @function removeDirectory
+ * @param {string} path - The directory path.
+ * @returns {Promise<void>}
+ */
+async function removeDirectory(path) {
+  try {
+    console.log(`Removing ${path}`);
+    await fs.rm(path, { recursive: true });
+  } catch (error) {
+    if (error.code !== codeENOENT) throw error;
+
+    console.warn(`Path: ${path} doesn't exist`);
+  }
+}
+
+/**
+ * Check if path exists.
+ *
+ * @async
+ * @function checkIfPathExists
+ * @param {string} path - The path to directory or file.
+ * @returns {Promise<boolean>}
+ */
+async function checkIfPathExists(path) {
+  try {
+    await fs.access(path);
+    return true;
+  } catch (error) {
+    if (error.code === codeENOENT) {
+      return false;
+    }
+    throw error;
+  }
+}
+
+/**
+ * Download file by URL and save it to the destination path.
+ *
+ * @function downloadFile
+ * @param {string} url - The file URL.
+ * @param {string} filename - The filename of result file.
+ * @param {string} destination - The destination path of result file.
+ * @param {string} [proxy=null] - (Optional) The proxy URL.
+ * @returns {Promise<string>} - Path to downloaded file.
+ */
+function downloadFile(url, destination, filename, proxy = null) {
+  console.log(`Downloading file by link: ${url} to ${destination}`
+    + `with filename: ${filename}`);
+
+  const timeout = 5000;
+  const fullPath = path.resolve(destination, filename);
+  const file = createWriteStream(fullPath);
+
+  if (new URL(url).protocol === 'http:')
+    throw new Error('Http link doesn\'t support');
+
+  let agent;
+
+  if (proxy) {
+    agent = new HttpsProxyAgent(proxy);
+    console.log(`Proxy agent is configured with '${proxy}'.`);
+  }
+
+  return new Promise((resolve, reject) => {
+    file.on('error', (error) => {
+      reject(`Failed to open file stream: ${error}.`);
+    });
+
+    console.log(`Download file by link: ${url}`);
+
+    const request = https.get(url, { agent }, (res) => {
+      const { statusCode } = res;
+
+      if (statusCode !== 200) {
+        return reject(`Server returned status code ${statusCode}.`);
+      }
+
+      res.pipe(file);
+
+      file.on('finish', () => {
+        file.close();
+        console.log(`File was successfully downloaded to '${fullPath}'.`);
+        resolve(fullPath);
+      });
+    });
+
+    request.on('error', (error) => {
+      reject(`Failed to send request: ${error}.`);
+    });
+
+    request.setTimeout(timeout, () => {
+      request.destroy();
+      reject(`Request was timed out after ${timeout} ms.`);
+    });
+  });
+}
diff --git a/src/bindings/js/node/tests/unit/utils.js b/src/bindings/js/node/tests/unit/utils.js
index c2089e30b4cdc8..456f87983dba20 100644
--- a/src/bindings/js/node/tests/unit/utils.js
+++ b/src/bindings/js/node/tests/unit/utils.js
@@ -7,7 +7,7 @@ const fs = require('node:fs/promises');
 const {
   downloadFile,
   checkIfPathExists,
-} = require('../../scripts/download_runtime');
+} = require('../../scripts/lib/utils');
 
 const modelDir = 'tests/unit/test_models/';
 const testModels = {
diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp
index a202c3a3801001..10ae0ed0ea6042 100644
--- a/src/bindings/python/src/pyopenvino/core/common.cpp
+++ b/src/bindings/python/src/pyopenvino/core/common.cpp
@@ -358,10 +358,14 @@ py::array array_from_constant_copy(ov::op::v0::Constant&& c, py::dtype& dst_dtyp
 py::array array_from_constant_view(ov::op::v0::Constant&& c) {
     const auto& ov_type = c.get_element_type();
     const auto dtype = Common::type_helpers::get_dtype(ov_type);
+    py::array data;
     if (ov_type.bitwidth() < Common::values::min_bitwidth) {
-        return py::array(dtype, c.get_byte_size(), c.get_data_ptr(), py::cast(c));
+        data = py::array(dtype, c.get_byte_size(), c.get_data_ptr(), py::cast(c));
+    } else {
+        data = py::array(dtype, c.get_shape(), constant_helpers::_get_strides(c), c.get_data_ptr(), py::cast(c));
     }
-    return py::array(dtype, c.get_shape(), constant_helpers::_get_strides(c), c.get_data_ptr(), py::cast(c));
+    data.attr("flags").attr("writeable") = false;
+    return data;
 }
 
 };  // namespace array_helpers
diff --git a/src/bindings/python/src/pyopenvino/core/core.cpp b/src/bindings/python/src/pyopenvino/core/core.cpp
index 6cf405cd167423..68e3e5cc4841ed 100644
--- a/src/bindings/python/src/pyopenvino/core/core.cpp
+++ b/src/bindings/python/src/pyopenvino/core/core.cpp
@@ -496,50 +496,6 @@ void regclass_Core(py::module m) {
             :rtype: openvino.runtime.Model
         )");
 
-    cls.def(
-        "import_model",
-        [](ov::Core& self,
-           const std::string& model_stream,
-           const std::string& device_name,
-           const std::map<std::string, py::object>& properties) {
-            auto _properties = Common::utils::properties_to_any_map(properties);
-            py::gil_scoped_release release;
-            std::stringstream _stream;
-            _stream << model_stream;
-            return self.import_model(_stream, device_name, _properties);
-        },
-        py::arg("model_stream"),
-        py::arg("device_name"),
-        py::arg("properties"),
-        R"(
-            Imports a compiled model from a previously exported one.
-
-            GIL is released while running this function.
-
-            :param model_stream: Input stream, containing a model previously exported, using export_model method.
-            :type model_stream: bytes
-            :param device_name: Name of device to which compiled model is imported.
-                                Note: if device_name is not used to compile the original model, an exception is thrown.
-            :type device_name: str
-            :param properties: Optional map of pairs: (property name, property value) relevant only for this load operation.
-            :type properties: dict, optional
-            :return: A compiled model.
-            :rtype: openvino.runtime.CompiledModel
-
-            :Example:
-            .. code-block:: python
-
-                user_stream = compiled.export_model()
-
-                with open('./my_model', 'wb') as f:
-                    f.write(user_stream)
-
-                # ...
-
-                new_compiled = core.import_model(user_stream, "CPU")
-        )");
-
-    // keep as second one to solve overload resolution problem
     cls.def(
         "import_model",
         [](ov::Core& self,
@@ -547,46 +503,26 @@ void regclass_Core(py::module m) {
            const std::string& device_name,
            const std::map<std::string, py::object>& properties) {
             const auto _properties = Common::utils::properties_to_any_map(properties);
-            if (!(py::isinstance(model_stream, pybind11::module::import("io").attr("BytesIO")))) {
+            if (!(py::isinstance(model_stream, pybind11::module::import("io").attr("BytesIO"))) &&
+                !py::isinstance<py::bytes>(model_stream)) {
                 throw py::type_error("CompiledModel.import_model(model_stream) incompatible function argument: "
-                                     "`model_stream` must be an io.BytesIO object but " +
+                                     "`model_stream` must be an io.BytesIO object or bytes but " +
                                      (std::string)(py::repr(model_stream)) + "` provided");
             }
-            std::random_device rd;
-            std::mt19937 gen(rd());
-            std::uniform_int_distribution<> distr(1000, 9999);
-            std::string filename = "model_stream_" + std::to_string(distr(gen)) + ".txt";
-            std::fstream _stream(filename, std::ios::out | std::ios::binary);
-            model_stream.attr("seek")(0);  // Always rewind stream!
-            if (_stream.is_open()) {
-                const py::bytes data = model_stream.attr("read")();
-                // convert the Python bytes object to C++ string
-                char* buffer;
-                Py_ssize_t length;
-                PYBIND11_BYTES_AS_STRING_AND_SIZE(data.ptr(), &buffer, &length);
-                _stream.write(buffer, length);
-                _stream.close();
-            } else {
-                OPENVINO_THROW("Failed to open temporary file for model stream");
-            }
+            py::buffer_info info;
 
-            ov::CompiledModel result;
-            std::fstream _fstream(filename, std::ios::in | std::ios::binary);
-            if (_fstream.is_open()) {
-                py::gil_scoped_release release;
-                result = self.import_model(_fstream, device_name, _properties);
-                _fstream.close();
-                if (std::remove(filename.c_str()) != 0) {
-                    const std::string abs_path =
-                        py::module_::import("os").attr("getcwd")().cast<std::string>() + "/" + filename;
-                    const std::string warning_message = "Temporary file " + abs_path + " failed to delete!";
-                    PyErr_WarnEx(PyExc_RuntimeWarning, warning_message.c_str(), 1);
-                }
+            if (py::isinstance(model_stream, pybind11::module::import("io").attr("BytesIO"))) {
+                model_stream.attr("seek")(0);
+                info = py::buffer(model_stream.attr("getbuffer")()).request();
             } else {
-                OPENVINO_THROW("Failed to open temporary file for model stream");
+                info = py::buffer(model_stream).request();
             }
 
-            return result;
+            Common::utils::MemoryBuffer mb(reinterpret_cast<char*>(info.ptr), info.size);
+            std::istream stream(&mb);
+
+            py::gil_scoped_release release;
+            return self.import_model(stream, device_name, _properties);
         },
         py::arg("model_stream"),
         py::arg("device_name"),
@@ -601,7 +537,7 @@ void regclass_Core(py::module m) {
 
 
             :param model_stream: Input stream, containing a model previously exported, using export_model method.
-            :type model_stream: io.BytesIO
+            :type model_stream: Union[io.BytesIO, bytes]
             :param device_name: Name of device to which compiled model is imported.
                                 Note: if device_name is not used to compile the original model, an exception is thrown.
             :type device_name: str
diff --git a/src/bindings/python/src/pyopenvino/frontend/frontend.cpp b/src/bindings/python/src/pyopenvino/frontend/frontend.cpp
index afc9e0af361c52..758fb505f5f885 100644
--- a/src/bindings/python/src/pyopenvino/frontend/frontend.cpp
+++ b/src/bindings/python/src/pyopenvino/frontend/frontend.cpp
@@ -20,13 +20,6 @@ namespace py = pybind11;
 
 using namespace ov::frontend;
 
-class MemoryBuffer : public std::streambuf {
-public:
-    MemoryBuffer(char* data, std::size_t size) {
-        setg(data, data, data + size);
-    }
-};
-
 void regclass_frontend_FrontEnd(py::module m) {
     py::class_<FrontEnd, std::shared_ptr<FrontEnd>> fem(m, "FrontEnd", py::dynamic_attr(), py::module_local());
     fem.doc() = "openvino.frontend.FrontEnd wraps ov::frontend::FrontEnd";
@@ -57,7 +50,7 @@ void regclass_frontend_FrontEnd(py::module m) {
             } else if (py::isinstance(py_obj, pybind11::module::import("io").attr("BytesIO"))) {
                 // support of BytesIO
                 py::buffer_info info = py::buffer(py_obj.attr("getbuffer")()).request();
-                MemoryBuffer mb(reinterpret_cast<char*>(info.ptr), info.size);
+                Common::utils::MemoryBuffer mb(reinterpret_cast<char*>(info.ptr), info.size);
                 std::istream _istream(&mb);
                 return self.load(&_istream, enable_mmap);
             } else {
diff --git a/src/bindings/python/src/pyopenvino/utils/utils.hpp b/src/bindings/python/src/pyopenvino/utils/utils.hpp
index b59ffe530f6045..2a7b6505269535 100644
--- a/src/bindings/python/src/pyopenvino/utils/utils.hpp
+++ b/src/bindings/python/src/pyopenvino/utils/utils.hpp
@@ -32,6 +32,37 @@ namespace py = pybind11;
 
 namespace Common {
 namespace utils {
+class MemoryBuffer : public std::streambuf {
+public:
+    MemoryBuffer(char* data, std::size_t size) {
+        setg(data, data, data + size);
+    }
+
+protected:
+    pos_type seekoff(off_type off,
+                     std::ios_base::seekdir dir,
+                     std::ios_base::openmode which = std::ios_base::in) override {
+        switch (dir) {
+        case std::ios_base::beg:
+            setg(eback(), eback() + off, egptr());
+            break;
+        case std::ios_base::end:
+            setg(eback(), egptr() + off, egptr());
+            break;
+        case std::ios_base::cur:
+            setg(eback(), gptr() + off, egptr());
+            break;
+        default:
+            return pos_type(off_type(-1));
+        }
+        return (gptr() < eback() || gptr() > egptr()) ? pos_type(off_type(-1)) : pos_type(gptr() - eback());
+    }
+
+    pos_type seekpos(pos_type pos, std::ios_base::openmode which) override {
+        return seekoff(pos, std::ios_base::beg, which);
+    }
+};
+
     enum class PY_TYPE : int { UNKNOWN = 0, STR, INT, FLOAT, BOOL, PARTIAL_SHAPE };
 
     struct EmptyList {};
diff --git a/src/bindings/python/tests/test_graph/test_constant.py b/src/bindings/python/tests/test_graph/test_constant.py
index 131654855b380a..7b349ad7cd94b1 100644
--- a/src/bindings/python/tests/test_graph/test_constant.py
+++ b/src/bindings/python/tests/test_graph/test_constant.py
@@ -205,53 +205,12 @@ def test_init_with_scalar(init_value, src_dtype, dst_dtype, shared_flag, data_ge
     assert np.allclose(const_data, expected_result)
 
 
-@pytest.mark.parametrize(
-    ("src_dtype"),
-    [
-        (np.float16),
-        (np.uint16),
-    ],
-)
-@pytest.mark.parametrize(
-    ("shared_flag"),
-    [
-        (True),
-        (False),
-    ],
-)
-@pytest.mark.parametrize(
-    ("data_getter"),
-    [
-        (DataGetter.COPY),
-        (DataGetter.VIEW),
-    ],
-)
-def test_init_bf16_populate(src_dtype, shared_flag, data_getter):
-    data = np.random.rand(1, 2, 16, 8) + 0.5
-    data = data.astype(src_dtype)
-
-    # To create bf16 constant, allocate memory and populate it:
-    init_data = np.zeros(shape=data.shape, dtype=src_dtype)
-    ov_const = ops.constant(init_data, dtype=Type.bf16, shared_memory=shared_flag)
-    ov_const.data[:] = data
-
-    # Check shape and element type of Constant class
-    assert isinstance(ov_const, Constant)
-    assert np.all(list(ov_const.shape) == [1, 2, 16, 8])
-    assert ov_const.get_element_type() == Type.bf16
-
-    _dst_dtype = Type.bf16.to_dtype()
-
-    assert ov_const.get_element_type().to_dtype() == _dst_dtype
-    # Compare values to Constant
-    if data_getter == DataGetter.COPY:
-        const_data = ov_const.get_data()
-    elif data_getter == DataGetter.VIEW:
-        const_data = ov_const.data
-    else:
-        raise AttributeError("Unknown DataGetter passed!")
-    assert const_data.dtype == _dst_dtype
-    assert np.allclose(const_data, data)
+def test_cant_change_data_in_const():
+    arr_0 = np.ones([1, 3, 32, 32])
+    ov_const = ops.constant(arr_0)
+    arr_1 = np.ones([1, 3, 32, 32]) + 1
+    with pytest.raises(ValueError, match="assignment destination is read-only"):
+        ov_const.data[:] = arr_1
 
 
 @pytest.mark.parametrize(
@@ -286,58 +245,6 @@ def test_init_bf16_direct(ov_type, numpy_dtype, shared_flag):
     assert np.allclose(data, result, rtol=0.01)
 
 
-@pytest.mark.parametrize(
-    "shape",
-    [
-        ([1, 3, 28, 28]),
-        ([1, 3, 27, 27]),
-    ],
-)
-@pytest.mark.parametrize(
-    ("low", "high", "ov_type", "src_dtype"),
-    [
-        (0, 2, Type.u1, np.uint8),
-        (0, 16, Type.u4, np.uint8),
-        (-8, 7, Type.i4, np.int8),
-        (0, 16, Type.nf4, np.uint8),
-    ],
-)
-@pytest.mark.parametrize(
-    ("shared_flag"),
-    [
-        (True),
-        (False),
-    ],
-)
-@pytest.mark.parametrize(
-    ("data_getter"),
-    [
-        (DataGetter.COPY),
-        (DataGetter.VIEW),
-    ],
-)
-def test_constant_helper_packing(shape, low, high, ov_type, src_dtype, shared_flag, data_getter):
-    data = np.random.uniform(low, high, shape).astype(src_dtype)
-
-    # Allocate memory first:
-    ov_const = ops.constant(np.zeros(shape=data.shape, dtype=src_dtype),
-                            dtype=ov_type,
-                            shared_memory=shared_flag)
-    # Fill data with packed values
-    packed_data = pack_data(data, ov_const.get_element_type())
-    ov_const.data[:] = packed_data
-
-    # Always unpack the data!
-    if data_getter == DataGetter.COPY:
-        unpacked = unpack_data(ov_const.get_data(), ov_const.get_element_type(), ov_const.shape)
-    elif data_getter == DataGetter.VIEW:
-        unpacked = unpack_data(ov_const.data, ov_const.get_element_type(), ov_const.shape)
-    else:
-        raise AttributeError("Unknown DataGetter passed!")
-
-    assert np.array_equal(unpacked, data)
-
-
 @pytest.mark.parametrize(
     ("ov_type", "src_dtype"),
     [
@@ -380,21 +287,6 @@ def test_constant_direct_packing(ov_type, src_dtype, shared_flag, data_getter):
     assert not np.shares_memory(unpacked, data)
 
 
-@pytest.mark.parametrize(
-    ("shared_flag"),
-    [
-        (True),
-        (False),
-    ],
-)
-def test_write_to_buffer(shared_flag):
-    arr_0 = np.ones([1, 3, 32, 32])
-    ov_const = ops.constant(arr_0, shared_memory=shared_flag)
-    arr_1 = np.ones([1, 3, 32, 32]) + 1
-    ov_const.data[:] = arr_1
-    assert np.array_equal(ov_const.data, arr_1)
-
-
 @pytest.mark.parametrize(
     ("shared_flag"),
     [
diff --git a/src/common/transformations/src/transformations/common_optimizations/fq_reshape_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/fq_reshape_fusion.cpp
index 8840a93e07c7b9..b9bafeeff90ff0 100644
--- a/src/common/transformations/src/transformations/common_optimizations/fq_reshape_fusion.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/fq_reshape_fusion.cpp
@@ -9,23 +9,29 @@
 
 #include "itt.hpp"
 #include "openvino/core/rt_info.hpp"
+#include "openvino/core/validation_util.hpp"
 #include "openvino/op/constant.hpp"
+#include "openvino/op/convert.hpp"
 #include "openvino/op/fake_quantize.hpp"
 #include "openvino/op/group_conv.hpp"
 #include "openvino/op/reshape.hpp"
+#include "openvino/pass/pattern/op/optional.hpp"
 #include "openvino/pass/pattern/op/wrap_type.hpp"
 
 ov::pass::FakeQuantizeReshapeFusion::FakeQuantizeReshapeFusion() {
     MATCHER_SCOPE(FakeQuantizeReshapeFusion);
-    const auto fq_node_p = ov::pass::pattern::wrap_type<ov::op::v0::FakeQuantize>(
-        {ov::pass::pattern::wrap_type<ov::op::v0::Constant>(),  // for weights only
-         pattern::any_input(),
-         pattern::any_input(),
-         pattern::any_input(),
-         pattern::any_input()},
-        pattern::consumers_count(1));
+    // for weights only
+    const auto data_p = ov::pass::pattern::wrap_type<ov::op::v0::Constant>(pattern::has_static_shape());
+    const auto convert_p = ov::pass::pattern::optional<ov::op::v0::Convert>(data_p, pattern::consumers_count(1));
+    const auto fq_node_p =
+        ov::pass::pattern::wrap_type<ov::op::v0::FakeQuantize>({convert_p,
+                                                                pattern::any_input(pattern::has_static_shape()),
+                                                                pattern::any_input(pattern::has_static_shape()),
+                                                                pattern::any_input(pattern::has_static_shape()),
+                                                                pattern::any_input(pattern::has_static_shape())},
+                                                               pattern::consumers_count(1));
     const auto reshape_node_p = ov::pass::pattern::wrap_type<ov::op::v1::Reshape>(
-        {fq_node_p, pattern::any_input()},
+        {fq_node_p, ov::pass::pattern::wrap_type<ov::op::v0::Constant>()},
         [](const Output<Node>& output) {
             // WA: check that all Reshape node consumers are not GroupConvolution operations
             const auto& target_inputs = output.get_target_inputs();
@@ -36,13 +42,11 @@ ov::pass::FakeQuantizeReshapeFusion::FakeQuantizeReshapeFusion() {
 
     ov::matcher_pass_callback callback = [=](pattern::Matcher& m) {
         const auto& pattern_map = m.get_pattern_value_map();
-        const auto fq_node = pattern_map.at(fq_node_p).get_node_shared_ptr();
-        if (fq_node->is_dynamic())
-            return false;
+        const auto& fq_node = pattern_map.at(fq_node_p).get_node_shared_ptr();
         const auto& reshape_node = pattern_map.at(reshape_node_p).get_node_shared_ptr();
         const auto& original_data_rank = fq_node->get_input_shape(0).size();
-        OutputVector renewed_inputs = {
-            reshape_node->clone_with_new_inputs({fq_node->input_value(0), reshape_node->input_value(1)})};
+
+        OutputVector renewed_inputs = {};
         for (auto i = 1; i < 5; ++i) {
             Output<Node> limit_input = fq_node->input_value(i);
             auto limit_shape = limit_input.get_shape();
@@ -62,21 +66,41 @@ ov::pass::FakeQuantizeReshapeFusion::FakeQuantizeReshapeFusion() {
                                });
                 const auto& new_limit_size = shape_size(new_limit_shape);
                 if (new_limit_size == limit_size) {  // we tracked future channel placement
-                    if (new_limit_shape == limit_input.get_shape())
+                    if (new_limit_shape == limit_input.get_shape()) {
                         renewed_inputs.push_back(limit_input);
-                    else
-                        renewed_inputs.push_back(reshape_node->clone_with_new_inputs(
+                    } else {
+                        auto reshaped_input = reshape_node->clone_with_new_inputs(
                             {limit_input,
-                             ov::op::v0::Constant::create(element::i64, {new_limit_shape.size()}, new_limit_shape)}));
+                             ov::op::v0::Constant::create(element::i64, {new_limit_shape.size()}, new_limit_shape)});
+                        if (auto constant = ov::util::get_constant_from_source(reshaped_input)) {
+                            reshaped_input = constant;
+                        }
+                        renewed_inputs.push_back(reshaped_input);
+                    }
                     continue;
                 }
             }
             // resulting FQ will become or already is more than per-tensor / per-channel
             return false;
         }
+
+        auto reshaped_input =
+            reshape_node->clone_with_new_inputs({pattern_map.at(data_p), reshape_node->input_value(1)});
+        if (auto constant = ov::util::get_constant_from_source(reshaped_input)) {
+            reshaped_input = constant;
+        }
+        if (pattern_map.count(convert_p)) {
+            const auto& convert_node = pattern_map.at(convert_p).get_node_shared_ptr();
+            convert_node->input(0).replace_source_output(reshaped_input);
+            convert_node->validate_and_infer_types();
+            reshaped_input = convert_node;
+        }
+        renewed_inputs.insert(renewed_inputs.begin(), reshaped_input);
+
         for (auto& new_input : renewed_inputs)
             copy_runtime_info({reshape_node, fq_node}, new_input.get_node_shared_ptr());
         const auto new_fq_node = fq_node->clone_with_new_inputs(renewed_inputs);
+        register_new_node(new_fq_node);
         replace_node(reshape_node, new_fq_node);
         new_fq_node->set_friendly_name(reshape_node->get_friendly_name());
         copy_runtime_info({fq_node, reshape_node}, new_fq_node);
diff --git a/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp b/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp
index 1fdd69711e3af5..0d021c55ca140d 100644
--- a/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp
@@ -14,13 +14,15 @@
 #include "openvino/op/fake_quantize.hpp"
 #include "openvino/op/transpose.hpp"
 #include "openvino/op/unsqueeze.hpp"
+#include "openvino/pass/pattern/op/optional.hpp"
 #include "openvino/pass/pattern/op/wrap_type.hpp"
 #include "transformations/utils/utils.hpp"
 
 ov::pass::PullTransposeThroughFQUp::PullTransposeThroughFQUp() {
     MATCHER_SCOPE(PullTransposeThroughFQUp);
     const auto weights = ov::pass::pattern::wrap_type<ov::op::v0::Constant>();
-    auto m_fq = pattern::wrap_type<ov::op::v0::FakeQuantize>({weights,
+    const auto convert_p = ov::pass::pattern::optional<ov::op::v0::Convert>(weights, pattern::consumers_count(1));
+    auto m_fq = pattern::wrap_type<ov::op::v0::FakeQuantize>({convert_p,
                                                               pattern::any_input(pattern::has_static_shape()),
                                                               pattern::any_input(pattern::has_static_shape()),
                                                               pattern::any_input(pattern::has_static_shape()),
@@ -33,25 +35,15 @@ ov::pass::PullTransposeThroughFQUp::PullTransposeThroughFQUp() {
         auto& pattern_map = m.get_pattern_value_map();
         auto transpose = pattern_map[m_transpose].get_node_shared_ptr();
         auto fq = pattern_map[m_fq].get_node_shared_ptr();
-
-        auto are_inputs_scalars =
-            shape_size(fq->input_value(1).get_shape()) == 1 && shape_size(fq->input_value(2).get_shape()) == 1 &&
-            shape_size(fq->input_value(3).get_shape()) == 1 && shape_size(fq->input_value(4).get_shape()) == 1;
-        if (!are_inputs_scalars) {
-            auto perm = ov::as_type_ptr<ov::op::v0::Constant>(pattern_map[m_transpose_perm].get_node_shared_ptr());
-            if (!perm)
-                return false;
-            auto perm_val = perm->cast_vector<int64_t>();
-            if (!(perm_val[0] == 0 && perm_val[1] == 1))
-                return false;
-        }
-
         auto input_rank = fq->input(0).get_partial_shape().rank().get_length();
 
         ov::NodeVector new_ops;
         ov::OutputVector fq_inputs;
         for (size_t i = 0; i < fq->inputs().size(); ++i) {
             auto fq_input = fq->input_value(i);
+            if (i == 0) {
+                fq_input = pattern_map[weights];
+            }
             auto fq_input_rank = fq_input.get_partial_shape().rank().get_length();
             std::vector<int64_t> unsqueeze_axes;
             for (int64_t j = 0; j < input_rank - fq_input_rank; ++j) {
@@ -68,10 +60,17 @@ ov::pass::PullTransposeThroughFQUp::PullTransposeThroughFQUp() {
                 fq_input = constant;
             }
             ov::copy_runtime_info(transpose, fq_input.get_node_shared_ptr());
+            if (i == 0 && pattern_map.count(convert_p)) {
+                const auto& convert_node = pattern_map.at(convert_p).get_node_shared_ptr();
+                convert_node->input(0).replace_source_output(fq_input);
+                convert_node->validate_and_infer_types();
+                fq_input = convert_node;
+            }
             fq_inputs.push_back(fq_input);
         }
 
         auto new_fq = fq->clone_with_new_inputs(fq_inputs);
+        register_new_node(new_fq);
         new_ops.push_back(new_fq);
         new_fq->set_friendly_name(transpose->get_friendly_name());
         ov::copy_runtime_info({fq, transpose}, new_ops);
diff --git a/src/common/transformations/tests/common_optimizations/fq_reshape_fusion.cpp b/src/common/transformations/tests/common_optimizations/fq_reshape_fusion.cpp
index cc4ac2981b6799..940a5b29b8d702 100644
--- a/src/common/transformations/tests/common_optimizations/fq_reshape_fusion.cpp
+++ b/src/common/transformations/tests/common_optimizations/fq_reshape_fusion.cpp
@@ -13,7 +13,10 @@
 #include "common_test_utils/ov_test_utils.hpp"
 #include "openvino/core/model.hpp"
 #include "openvino/opsets/opset4.hpp"
+#include "openvino/pass/graph_rewrite.hpp"
 #include "openvino/pass/manager.hpp"
+#include "transformations/common_optimizations/fq_mul_fusion.hpp"
+#include "transformations/common_optimizations/pull_transpose_through_fq.hpp"
 #include "transformations/init_node_info.hpp"
 
 using namespace ov;
@@ -66,13 +69,8 @@ class FQReshapeFusionTests : public ov::test::TestsCommon,
     }
 
     std::shared_ptr<ov::Model> get_reference_function(const FQReshapeFusionTestCase& test_case) {
-        const auto& data = std::make_shared<opset4::Constant>(element::f32, test_case.data_shape, 0);
-        const auto& reshaped_data = std::make_shared<opset4::Reshape>(
-            data,
-            std::make_shared<opset4::Constant>(element::i64,
-                                               Shape{test_case.reshape_pattern.size()},
-                                               test_case.reshape_pattern),
-            true);
+        auto shape = PartialShape(test_case.reshape_pattern).to_shape();
+        const auto& data = std::make_shared<opset4::Constant>(element::f32, shape, 0);
 
         const auto& p_il = std::make_shared<opset4::Parameter>(element::f32, test_case.il_shape);
         Output<Node> il = p_il;
@@ -104,7 +102,7 @@ class FQReshapeFusionTests : public ov::test::TestsCommon,
                 opset4::Constant::create(element::i64, {test_case.new_oh_shape.size()}, test_case.new_oh_shape),
                 true);
 
-        auto fq = std::make_shared<opset4::FakeQuantize>(reshaped_data, il, ih, ol, oh, 42);
+        auto fq = std::make_shared<opset4::FakeQuantize>(data, il, ih, ol, oh, 42);
 
         auto result = std::make_shared<op::v0::Result>(fq);
         ParameterVector params = {p_il, p_ih, p_ol, p_oh};
@@ -213,3 +211,77 @@ TEST_F(TransformationTestsF, FQReshapeGroupConvolution) {
     manager.register_pass<ov::pass::InitNodeInfo>();
     manager.register_pass<ov::pass::FakeQuantizeReshapeFusion>();
 }
+
+TEST_F(TransformationTestsF, FQOptimizations) {
+    {
+        const auto& data = std::make_shared<opset4::Constant>(element::u8, Shape{9, 32}, 0);
+        const auto& convert = std::make_shared<opset4::Convert>(data, element::f32);
+
+        const auto& il = op::v0::Constant::create(element::f32, Shape{1}, {0});
+        const auto& ih = op::v0::Constant::create(element::f32, Shape{1}, {254});
+        const auto& ol = op::v0::Constant::create(element::f32, Shape{32}, {-14.22});
+        const auto& oh = op::v0::Constant::create(element::f32, Shape{32}, {14.22});
+
+        const auto& fq = std::make_shared<opset4::FakeQuantize>(convert, il, ih, ol, oh, 255);
+
+        const auto& reshape =
+            std::make_shared<opset4::Reshape>(fq,
+                                              op::v0::Constant::create(element::i64, Shape{4}, {3, 3, 32, 1}),
+                                              true);
+
+        const auto& multiply =
+            std::make_shared<opset4::Multiply>(reshape,
+                                               op::v0::Constant::create(element::f32, Shape{1, 1, 32, 1}, {0.1140}));
+
+        const auto& transpose =
+            std::make_shared<opset4::Transpose>(multiply,
+                                                op::v0::Constant::create(element::i64, Shape{4}, {2, 3, 0, 1}));
+
+        const auto& reshape_to_weight =
+            std::make_shared<opset4::Reshape>(transpose,
+                                              op::v0::Constant::create(element::i64, Shape{5}, {32, 1, 1, 3, 3}),
+                                              true);
+
+        const auto& input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(4));
+        const auto& group_conv = std::make_shared<opset4::GroupConvolution>(input,
+                                                                            reshape_to_weight,
+                                                                            Strides{1, 1},
+                                                                            CoordinateDiff{0, 0},
+                                                                            CoordinateDiff{0, 0},
+                                                                            Strides{1, 1});
+
+        model = std::make_shared<ov::Model>(OutputVector{group_conv}, ParameterVector{input});
+
+        auto fq_fusions = manager.register_pass<ov::pass::GraphRewrite>();
+        fq_fusions->add_matcher<ov::pass::FakeQuantizeMulFusion>();
+        fq_fusions->add_matcher<ov::pass::FakeQuantizeReshapeFusion>();
+        fq_fusions->add_matcher<ov::pass::PullTransposeThroughFQUp>();
+        fq_fusions->set_name("ov::pass::FakeQuantizeFusions");
+    }
+    {
+        const auto& data = std::make_shared<opset4::Constant>(element::u8, Shape{32, 1, 3, 3}, 0);
+        const auto& convert = std::make_shared<opset4::Convert>(data, element::f32);
+
+        const auto& il = op::v0::Constant::create(element::f32, Shape{1, 1, 1, 1}, {0});
+        const auto& ih = op::v0::Constant::create(element::f32, Shape{1, 1, 1, 1}, {254});
+        const auto& ol = op::v0::Constant::create(element::f32, Shape{32, 1, 1, 1}, {-14.22 * 0.1140});
+        const auto& oh = op::v0::Constant::create(element::f32, Shape{32, 1, 1, 1}, {14.22 * 0.1140});
+
+        const auto& fq = std::make_shared<opset4::FakeQuantize>(convert, il, ih, ol, oh, 255);
+
+        const auto& reshape_to_weight =
+            std::make_shared<opset4::Reshape>(fq,
+                                              op::v0::Constant::create(element::i64, Shape{5}, {32, 1, 1, 3, 3}),
+                                              true);
+
+        const auto& input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(4));
+        const auto& group_conv = std::make_shared<opset4::GroupConvolution>(input,
+                                                                            reshape_to_weight,
+                                                                            Strides{1, 1},
+                                                                            CoordinateDiff{0, 0},
+                                                                            CoordinateDiff{0, 0},
+                                                                            Strides{1, 1});
+
+        model_ref = std::make_shared<ov::Model>(OutputVector{group_conv}, ParameterVector{input});
+    }
+}
diff --git a/src/core/shape_inference/include/range_shape_inference.hpp b/src/core/shape_inference/include/range_shape_inference.hpp
index 3be56a4543a7dd..5d754810d9b80a 100644
--- a/src/core/shape_inference/include/range_shape_inference.hpp
+++ b/src/core/shape_inference/include/range_shape_inference.hpp
@@ -11,6 +11,31 @@ namespace op {
 
 namespace ShapeInferRange {
 
+template <class TRShape, typename std::enable_if<std::is_same<TRShape, PartialShape>::value>::type* = nullptr>
+void symbol_propagation(const Node* op,
+                        std::vector<TRShape>& output_shapes,
+                        const double& start,
+                        const double& step,
+                        bool start_val,
+                        bool step_val) {
+    output_shapes[0] = ov::PartialShape::dynamic(1);
+    if (op->get_input_size() == 3 && step_val && step == 1) {
+        auto start_symbol = op->input_value(0).get_tensor().get_value_symbol();
+        auto stop_symbol = op->input_value(1).get_tensor().get_value_symbol();
+        if (start_val && start == 0 && !stop_symbol.empty()) {
+            output_shapes[0][0].set_symbol(stop_symbol[0]);
+        }
+    }
+}
+
+template <class TRShape, typename std::enable_if<!std::is_same<TRShape, PartialShape>::value>::type* = nullptr>
+void symbol_propagation(const Node* op,
+                        std::vector<TRShape>& output_shapes,
+                        const double& start,
+                        const double& step,
+                        bool start_val,
+                        bool step_val) {}
+
 template <class T, class TRShape = result_shape_t<T>>
 std::vector<TRShape> range_shape_infer(const Node* op,
                                        const std::vector<T>& input_shapes,
@@ -35,12 +60,18 @@ std::vector<TRShape> range_shape_infer(const Node* op,
         NODE_VALIDATION_CHECK(op, start_val->size() == 1);
         start = (*start_val)[0];
         NODE_VALIDATION_CHECK(op, std::isfinite(start) && !std::isnan(start), "'start' cannot be nan or infinite.");
+        if (output_is_integral)
+            // all inputs must be casted to output_type before the rounding for casting values are done towards zero
+            start = std::trunc(start);
     }
 
     if (stop_val) {
         NODE_VALIDATION_CHECK(op, stop_val->size() == 1);
         stop = (*stop_val)[0];
         NODE_VALIDATION_CHECK(op, std::isfinite(stop) && !std::isnan(stop), "'stop' cannot be nan or infinite.");
+        if (output_is_integral)
+            // all inputs must be casted to output_type before the rounding for casting values are done towards zero
+            stop = std::trunc(stop);
     }
 
     if (step_val) {
@@ -52,18 +83,13 @@ std::vector<TRShape> range_shape_infer(const Node* op,
             NODE_VALIDATION_CHECK(op,
                                   std::isfinite(step) && !std::isnan(step) && step != 0,
                                   "'step' cannot be zero, nan, or infinite.");
+        if (output_is_integral)
+            // all inputs must be casted to output_type before the rounding for casting values are done towards zero
+            step = std::trunc(step);
     }
 
     auto output_shapes = std::vector<TRShape>(1);
     if (start_val && stop_val && step_val) {
-        // all inputs must be casted to output_type before
-        // the rounding for casting values are done towards zero
-        if (output_is_integral) {
-            start = std::trunc(start);
-            stop = std::trunc(stop);
-            step = std::trunc(step);
-        }
-
         // the number of elements is: max(ceil((stop − start) / step), 0)
         double span;
         if ((step > 0 && start >= stop) || (step < 0 && start <= stop)) {
@@ -76,7 +102,7 @@ std::vector<TRShape> range_shape_infer(const Node* op,
 
         output_shapes[0] = TRShape{static_cast<uint32_t>(strided)};
     } else {
-        output_shapes[0] = ov::PartialShape::dynamic(1);
+        symbol_propagation(op, output_shapes, start, step, start_val, step_val);
     }
     return output_shapes;
 }
diff --git a/src/core/tests/type_prop/range.cpp b/src/core/tests/type_prop/range.cpp
index c37f5987047a53..d44fe3c3bcc6b8 100644
--- a/src/core/tests/type_prop/range.cpp
+++ b/src/core/tests/type_prop/range.cpp
@@ -895,3 +895,21 @@ INSTANTIATE_TEST_SUITE_P(type_prop,
                                            RangeParams{-1, 1, 0.25, PartialShape{8}},
                                            RangeParams{-1, 0.875, 0.25, PartialShape{8}}),
                          PrintToDummyParamName());
+
+TEST(type_prop, range_symbol_start_0_stop_A_step_1) {
+    auto stop_symbol = std::make_shared<ov::Symbol>();
+    auto source_shape = PartialShape::dynamic(1);
+    source_shape[0].set_symbol(stop_symbol);
+    auto symbol_source =
+        make_shared<ov::op::v0::ShapeOf>(make_shared<ov::op::v0::Parameter>(element::i64, source_shape));
+
+    auto start = make_shared<ov::op::v0::Constant>(element::i64, Shape{}, 0);
+    auto stop = make_shared<ov::op::v8::Gather>(symbol_source,
+                                                make_shared<ov::op::v0::Constant>(element::i64, Shape{}, 0),
+                                                make_shared<ov::op::v0::Constant>(element::i64, Shape{}, 0));
+    auto step = make_shared<ov::op::v0::Constant>(element::i64, Shape{}, 1);
+
+    auto range = make_shared<op::v0::Range>(start, stop, step);
+
+    ASSERT_TRUE(ov::symbol::are_equal(range->get_output_partial_shape(0)[0].get_symbol(), stop_symbol));
+}
diff --git a/src/frontends/tensorflow_common/src/op/depthwise_conv_2d.cpp b/src/frontends/tensorflow_common/src/op/depthwise_conv_2d.cpp
index 09eb606f41c353..c2fdee9c0cd319 100644
--- a/src/frontends/tensorflow_common/src/op/depthwise_conv_2d.cpp
+++ b/src/frontends/tensorflow_common/src/op/depthwise_conv_2d.cpp
@@ -43,10 +43,6 @@ OutputVector translate_depthwise_conv_2d_native_op(const NodeContext& node) {
     Strides dilations(2);
     convert_nhwc_to_hw(is_nhwc, tf_strides, strides);
     convert_nhwc_to_hw(is_nhwc, tf_dilations, dilations);
-
-    Shape ng_image_shape(2);
-    Shape ng_kernel_shape(2);
-
     convert_nhwc_to_nchw(is_nhwc, input, ov::Rank(4));
 
     // prepare filter to have a number of groups equal to CIN
diff --git a/src/inference/src/cpp/compiled_model.cpp b/src/inference/src/cpp/compiled_model.cpp
index c780bbee1e991d..d675cba4714887 100644
--- a/src/inference/src/cpp/compiled_model.cpp
+++ b/src/inference/src/cpp/compiled_model.cpp
@@ -8,6 +8,10 @@
 #include "openvino/runtime/icompiled_model.hpp"
 #include "openvino/runtime/properties.hpp"
 
+#if defined(OPENVINO_GNU_LIBC) && !defined(__ANDROID__)
+#    include <malloc.h>
+#endif
+
 #define OV_COMPILED_MODEL_CALL_STATEMENT(...)                 \
     if (_impl == nullptr)                                     \
         OPENVINO_THROW("CompiledModel was not initialized."); \
@@ -23,6 +27,12 @@ namespace ov {
 
 CompiledModel::~CompiledModel() {
     _impl = {};
+#if defined(OPENVINO_GNU_LIBC) && !defined(__ANDROID__)
+    // Linux memory margent doesn't return system memory immediate after release.
+    // It depends on memory chunk size and allocation history.
+    // Try return memory from a process to system now to reduce memory usage and not wait to the end of the process.
+    malloc_trim(0);
+#endif
 }
 
 CompiledModel::CompiledModel(const std::shared_ptr<ov::ICompiledModel>& impl, const std::shared_ptr<void>& so)
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index 7f0d7a6eda1c2a..4590c710646880 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -1482,6 +1482,18 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
 
                 ov::AnyMap update_config = config;
                 update_config[ov::loaded_from_cache.name()] = true;
+
+                if (util::contains(plugin.get_property(ov::supported_properties), ov::weights_path)) {
+                    std::string weights_path = cacheContent.modelPath;
+                    auto pos = weights_path.rfind('.');
+                    if (pos != weights_path.npos && weights_path.substr(pos) == ".xml") {
+                        weights_path = weights_path.substr(0, pos);
+                        weights_path += ".bin";
+                    }
+                    if (ov::util::file_exists(weights_path)) {
+                        update_config[ov::weights_path.name()] = weights_path;
+                    }
+                }
                 compiled_model = context ? plugin.import_model(networkStream, context, update_config)
                                          : plugin.import_model(networkStream, update_config);
             });
diff --git a/src/inference/src/os/cpu_map_info.hpp b/src/inference/src/os/cpu_map_info.hpp
index 497b25c3b68153..2cc6cef768d68d 100644
--- a/src/inference/src/os/cpu_map_info.hpp
+++ b/src/inference/src/os/cpu_map_info.hpp
@@ -53,6 +53,39 @@ class CPU {
     std::map<int, int> _numaid_mapping_table;
     std::mutex _cpu_mutex;
     int _socket_idx = 0;
+
+private:
+    /**
+     * @brief      Sort proc_type_table by CPU ID on which application is running. The numa node containing this CPU ID
+     * will move to first row.
+     * @param[in]  _processor_id CPU ID on which application is running.
+     * @param[in] _proc_type_table summary table of number of processors per type
+     * @param[in] _cpu_mapping_table CPU mapping table for each processor
+     * @return
+     */
+    void sort_table_by_cpu_id(const int _processor_id,
+                              std::vector<std::vector<int>>& _proc_type_table,
+                              const std::vector<std::vector<int>>& _cpu_mapping_table) {
+        int current_numa_node = 0;
+        int current_socket = 0;
+
+        for (auto& row : _cpu_mapping_table) {
+            if (_processor_id == row[CPU_MAP_PROCESSOR_ID]) {
+                current_numa_node = row[CPU_MAP_NUMA_NODE_ID];
+                current_socket = row[CPU_MAP_SOCKET_ID];
+                break;
+            }
+        }
+        for (size_t i = 1; i < _proc_type_table.size(); i++) {
+            if ((current_numa_node == _proc_type_table[i][PROC_NUMA_NODE_ID]) &&
+                (current_socket == _proc_type_table[i][PROC_SOCKET_ID])) {
+                std::rotate(_proc_type_table.begin() + 1, _proc_type_table.begin() + i, _proc_type_table.end());
+                break;
+            }
+        }
+    };
+
+    friend class LinuxSortProcTableTests;
 };
 
 CPU& cpu_info();
diff --git a/src/inference/src/os/lin/lin_system_conf.cpp b/src/inference/src/os/lin/lin_system_conf.cpp
index e30bcbbe8bc55e..f8bd16173b8fce 100644
--- a/src/inference/src/os/lin/lin_system_conf.cpp
+++ b/src/inference/src/os/lin/lin_system_conf.cpp
@@ -282,6 +282,11 @@ CPU::CPU() {
         OPENVINO_THROW("CPU affinity check failed. No CPU is eligible to run inference.");
     };
 
+    if (_proc_type_table.size() > 1) {
+        int cur_processor_id = sched_getcpu();
+        sort_table_by_cpu_id(cur_processor_id, _proc_type_table, _cpu_mapping_table);
+    }
+
     _org_proc_type_table = _proc_type_table;
 
     cpu_debug();
diff --git a/src/inference/src/os/win/win_system_conf.cpp b/src/inference/src/os/win/win_system_conf.cpp
index f0ea4f181896ac..a4129a80b599ba 100644
--- a/src/inference/src/os/win/win_system_conf.cpp
+++ b/src/inference/src/os/win/win_system_conf.cpp
@@ -52,6 +52,11 @@ CPU::CPU() {
         }
     }
 
+    if (_proc_type_table.size() > 1) {
+        int cur_processor_id = GetCurrentProcessorNumber();
+        sort_table_by_cpu_id(cur_processor_id, _proc_type_table, _cpu_mapping_table);
+    }
+
     cpu_debug();
 }
 
diff --git a/src/inference/tests/unit/cpu_map_parser/update_proc_table.cpp b/src/inference/tests/unit/cpu_map_parser/update_proc_table.cpp
new file mode 100644
index 00000000000000..fe255b85f47a33
--- /dev/null
+++ b/src/inference/tests/unit/cpu_map_parser/update_proc_table.cpp
@@ -0,0 +1,173 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/test_common.hpp"
+#include "openvino/runtime/system_conf.hpp"
+#include "os/cpu_map_info.hpp"
+
+using namespace testing;
+
+namespace ov {
+
+#ifdef __linux__
+
+struct LinuxSortProcTableTestCase {
+    int current_processor_id;
+    std::vector<std::vector<int>> _proc_type_table_input;
+    std::vector<std::vector<int>> _cpu_mapping_table;
+    std::vector<std::vector<int>> _proc_type_table_output;
+};
+
+class LinuxSortProcTableTests : public ov::test::TestsCommon,
+                                public testing::WithParamInterface<std::tuple<LinuxSortProcTableTestCase>> {
+public:
+    void SetUp() override {
+        const auto& test_data = std::get<0>(GetParam());
+
+        CPU& cpu = cpu_info();
+        std::vector<std::vector<int>> test_proc_type_table = test_data._proc_type_table_input;
+
+        cpu.sort_table_by_cpu_id(test_data.current_processor_id, test_proc_type_table, test_data._cpu_mapping_table);
+
+        ASSERT_EQ(test_proc_type_table, test_data._proc_type_table_output);
+    }
+};
+
+LinuxSortProcTableTestCase proc_table_2sockets_24cores_hyperthreading_1 = {
+    2,
+    {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}},
+    {
+        {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},    {1, 2, 1, 12, HYPER_THREADING_PROC, 12, -1},
+        {2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},    {3, 2, 1, 13, HYPER_THREADING_PROC, 13, -1},
+        {4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},    {5, 2, 1, 14, HYPER_THREADING_PROC, 14, -1},
+        {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},    {7, 2, 1, 15, HYPER_THREADING_PROC, 15, -1},
+        {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},    {9, 2, 1, 16, HYPER_THREADING_PROC, 16, -1},
+        {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},   {11, 2, 1, 17, HYPER_THREADING_PROC, 17, -1},
+        {12, 1, 0, 6, HYPER_THREADING_PROC, 6, -1},   {13, 3, 1, 18, HYPER_THREADING_PROC, 18, -1},
+        {14, 1, 0, 7, HYPER_THREADING_PROC, 7, -1},   {15, 3, 1, 19, HYPER_THREADING_PROC, 19, -1},
+        {16, 1, 0, 8, HYPER_THREADING_PROC, 8, -1},   {17, 3, 1, 20, HYPER_THREADING_PROC, 20, -1},
+        {18, 1, 0, 9, HYPER_THREADING_PROC, 9, -1},   {19, 3, 1, 21, HYPER_THREADING_PROC, 21, -1},
+        {20, 1, 0, 10, HYPER_THREADING_PROC, 10, -1}, {21, 3, 1, 22, HYPER_THREADING_PROC, 22, -1},
+        {22, 1, 0, 11, HYPER_THREADING_PROC, 11, -1}, {23, 3, 1, 23, HYPER_THREADING_PROC, 23, -1},
+        {24, 0, 0, 0, MAIN_CORE_PROC, 0, -1},         {25, 2, 1, 12, MAIN_CORE_PROC, 12, -1},
+        {26, 0, 0, 1, MAIN_CORE_PROC, 1, -1},         {27, 2, 1, 13, MAIN_CORE_PROC, 13, -1},
+        {28, 0, 0, 2, MAIN_CORE_PROC, 2, -1},         {29, 2, 1, 14, MAIN_CORE_PROC, 14, -1},
+        {30, 0, 0, 3, MAIN_CORE_PROC, 3, -1},         {31, 2, 1, 15, MAIN_CORE_PROC, 15, -1},
+        {32, 0, 0, 4, MAIN_CORE_PROC, 4, -1},         {33, 2, 1, 16, MAIN_CORE_PROC, 16, -1},
+        {34, 0, 0, 5, MAIN_CORE_PROC, 5, -1},         {35, 2, 1, 17, MAIN_CORE_PROC, 17, -1},
+        {36, 1, 0, 6, MAIN_CORE_PROC, 6, -1},         {37, 3, 1, 18, MAIN_CORE_PROC, 18, -1},
+        {38, 1, 0, 7, MAIN_CORE_PROC, 7, -1},         {39, 3, 1, 19, MAIN_CORE_PROC, 19, -1},
+        {40, 1, 0, 8, MAIN_CORE_PROC, 8, -1},         {41, 3, 1, 20, MAIN_CORE_PROC, 20, -1},
+        {42, 1, 0, 9, MAIN_CORE_PROC, 9, -1},         {43, 3, 1, 21, MAIN_CORE_PROC, 21, -1},
+        {44, 1, 0, 10, MAIN_CORE_PROC, 10, -1},       {45, 3, 1, 22, MAIN_CORE_PROC, 22, -1},
+        {46, 1, 0, 11, MAIN_CORE_PROC, 11, -1},       {47, 3, 1, 23, MAIN_CORE_PROC, 23, -1},
+    },
+    {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}},
+};
+LinuxSortProcTableTestCase proc_table_2sockets_24cores_hyperthreading_2 = {
+    16,
+    {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}},
+    {
+        {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},    {1, 2, 1, 12, HYPER_THREADING_PROC, 12, -1},
+        {2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},    {3, 2, 1, 13, HYPER_THREADING_PROC, 13, -1},
+        {4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},    {5, 2, 1, 14, HYPER_THREADING_PROC, 14, -1},
+        {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},    {7, 2, 1, 15, HYPER_THREADING_PROC, 15, -1},
+        {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},    {9, 2, 1, 16, HYPER_THREADING_PROC, 16, -1},
+        {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},   {11, 2, 1, 17, HYPER_THREADING_PROC, 17, -1},
+        {12, 1, 0, 6, HYPER_THREADING_PROC, 6, -1},   {13, 3, 1, 18, HYPER_THREADING_PROC, 18, -1},
+        {14, 1, 0, 7, HYPER_THREADING_PROC, 7, -1},   {15, 3, 1, 19, HYPER_THREADING_PROC, 19, -1},
+        {16, 1, 0, 8, HYPER_THREADING_PROC, 8, -1},   {17, 3, 1, 20, HYPER_THREADING_PROC, 20, -1},
+        {18, 1, 0, 9, HYPER_THREADING_PROC, 9, -1},   {19, 3, 1, 21, HYPER_THREADING_PROC, 21, -1},
+        {20, 1, 0, 10, HYPER_THREADING_PROC, 10, -1}, {21, 3, 1, 22, HYPER_THREADING_PROC, 22, -1},
+        {22, 1, 0, 11, HYPER_THREADING_PROC, 11, -1}, {23, 3, 1, 23, HYPER_THREADING_PROC, 23, -1},
+        {24, 0, 0, 0, MAIN_CORE_PROC, 0, -1},         {25, 2, 1, 12, MAIN_CORE_PROC, 12, -1},
+        {26, 0, 0, 1, MAIN_CORE_PROC, 1, -1},         {27, 2, 1, 13, MAIN_CORE_PROC, 13, -1},
+        {28, 0, 0, 2, MAIN_CORE_PROC, 2, -1},         {29, 2, 1, 14, MAIN_CORE_PROC, 14, -1},
+        {30, 0, 0, 3, MAIN_CORE_PROC, 3, -1},         {31, 2, 1, 15, MAIN_CORE_PROC, 15, -1},
+        {32, 0, 0, 4, MAIN_CORE_PROC, 4, -1},         {33, 2, 1, 16, MAIN_CORE_PROC, 16, -1},
+        {34, 0, 0, 5, MAIN_CORE_PROC, 5, -1},         {35, 2, 1, 17, MAIN_CORE_PROC, 17, -1},
+        {36, 1, 0, 6, MAIN_CORE_PROC, 6, -1},         {37, 3, 1, 18, MAIN_CORE_PROC, 18, -1},
+        {38, 1, 0, 7, MAIN_CORE_PROC, 7, -1},         {39, 3, 1, 19, MAIN_CORE_PROC, 19, -1},
+        {40, 1, 0, 8, MAIN_CORE_PROC, 8, -1},         {41, 3, 1, 20, MAIN_CORE_PROC, 20, -1},
+        {42, 1, 0, 9, MAIN_CORE_PROC, 9, -1},         {43, 3, 1, 21, MAIN_CORE_PROC, 21, -1},
+        {44, 1, 0, 10, MAIN_CORE_PROC, 10, -1},       {45, 3, 1, 22, MAIN_CORE_PROC, 22, -1},
+        {46, 1, 0, 11, MAIN_CORE_PROC, 11, -1},       {47, 3, 1, 23, MAIN_CORE_PROC, 23, -1},
+    },
+    {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}, {12, 6, 0, 6, 0, 0}},
+};
+LinuxSortProcTableTestCase proc_table_2sockets_24cores_hyperthreading_3 = {
+    7,
+    {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}},
+    {
+        {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},    {1, 2, 1, 12, HYPER_THREADING_PROC, 12, -1},
+        {2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},    {3, 2, 1, 13, HYPER_THREADING_PROC, 13, -1},
+        {4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},    {5, 2, 1, 14, HYPER_THREADING_PROC, 14, -1},
+        {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},    {7, 2, 1, 15, HYPER_THREADING_PROC, 15, -1},
+        {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},    {9, 2, 1, 16, HYPER_THREADING_PROC, 16, -1},
+        {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},   {11, 2, 1, 17, HYPER_THREADING_PROC, 17, -1},
+        {12, 1, 0, 6, HYPER_THREADING_PROC, 6, -1},   {13, 3, 1, 18, HYPER_THREADING_PROC, 18, -1},
+        {14, 1, 0, 7, HYPER_THREADING_PROC, 7, -1},   {15, 3, 1, 19, HYPER_THREADING_PROC, 19, -1},
+        {16, 1, 0, 8, HYPER_THREADING_PROC, 8, -1},   {17, 3, 1, 20, HYPER_THREADING_PROC, 20, -1},
+        {18, 1, 0, 9, HYPER_THREADING_PROC, 9, -1},   {19, 3, 1, 21, HYPER_THREADING_PROC, 21, -1},
+        {20, 1, 0, 10, HYPER_THREADING_PROC, 10, -1}, {21, 3, 1, 22, HYPER_THREADING_PROC, 22, -1},
+        {22, 1, 0, 11, HYPER_THREADING_PROC, 11, -1}, {23, 3, 1, 23, HYPER_THREADING_PROC, 23, -1},
+        {24, 0, 0, 0, MAIN_CORE_PROC, 0, -1},         {25, 2, 1, 12, MAIN_CORE_PROC, 12, -1},
+        {26, 0, 0, 1, MAIN_CORE_PROC, 1, -1},         {27, 2, 1, 13, MAIN_CORE_PROC, 13, -1},
+        {28, 0, 0, 2, MAIN_CORE_PROC, 2, -1},         {29, 2, 1, 14, MAIN_CORE_PROC, 14, -1},
+        {30, 0, 0, 3, MAIN_CORE_PROC, 3, -1},         {31, 2, 1, 15, MAIN_CORE_PROC, 15, -1},
+        {32, 0, 0, 4, MAIN_CORE_PROC, 4, -1},         {33, 2, 1, 16, MAIN_CORE_PROC, 16, -1},
+        {34, 0, 0, 5, MAIN_CORE_PROC, 5, -1},         {35, 2, 1, 17, MAIN_CORE_PROC, 17, -1},
+        {36, 1, 0, 6, MAIN_CORE_PROC, 6, -1},         {37, 3, 1, 18, MAIN_CORE_PROC, 18, -1},
+        {38, 1, 0, 7, MAIN_CORE_PROC, 7, -1},         {39, 3, 1, 19, MAIN_CORE_PROC, 19, -1},
+        {40, 1, 0, 8, MAIN_CORE_PROC, 8, -1},         {41, 3, 1, 20, MAIN_CORE_PROC, 20, -1},
+        {42, 1, 0, 9, MAIN_CORE_PROC, 9, -1},         {43, 3, 1, 21, MAIN_CORE_PROC, 21, -1},
+        {44, 1, 0, 10, MAIN_CORE_PROC, 10, -1},       {45, 3, 1, 22, MAIN_CORE_PROC, 22, -1},
+        {46, 1, 0, 11, MAIN_CORE_PROC, 11, -1},       {47, 3, 1, 23, MAIN_CORE_PROC, 23, -1},
+    },
+    {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}},
+};
+LinuxSortProcTableTestCase proc_table_2sockets_24cores_hyperthreading_4 = {
+    21,
+    {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}},
+    {
+        {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},    {1, 2, 1, 12, HYPER_THREADING_PROC, 12, -1},
+        {2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},    {3, 2, 1, 13, HYPER_THREADING_PROC, 13, -1},
+        {4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},    {5, 2, 1, 14, HYPER_THREADING_PROC, 14, -1},
+        {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},    {7, 2, 1, 15, HYPER_THREADING_PROC, 15, -1},
+        {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},    {9, 2, 1, 16, HYPER_THREADING_PROC, 16, -1},
+        {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},   {11, 2, 1, 17, HYPER_THREADING_PROC, 17, -1},
+        {12, 1, 0, 6, HYPER_THREADING_PROC, 6, -1},   {13, 3, 1, 18, HYPER_THREADING_PROC, 18, -1},
+        {14, 1, 0, 7, HYPER_THREADING_PROC, 7, -1},   {15, 3, 1, 19, HYPER_THREADING_PROC, 19, -1},
+        {16, 1, 0, 8, HYPER_THREADING_PROC, 8, -1},   {17, 3, 1, 20, HYPER_THREADING_PROC, 20, -1},
+        {18, 1, 0, 9, HYPER_THREADING_PROC, 9, -1},   {19, 3, 1, 21, HYPER_THREADING_PROC, 21, -1},
+        {20, 1, 0, 10, HYPER_THREADING_PROC, 10, -1}, {21, 3, 1, 22, HYPER_THREADING_PROC, 22, -1},
+        {22, 1, 0, 11, HYPER_THREADING_PROC, 11, -1}, {23, 3, 1, 23, HYPER_THREADING_PROC, 23, -1},
+        {24, 0, 0, 0, MAIN_CORE_PROC, 0, -1},         {25, 2, 1, 12, MAIN_CORE_PROC, 12, -1},
+        {26, 0, 0, 1, MAIN_CORE_PROC, 1, -1},         {27, 2, 1, 13, MAIN_CORE_PROC, 13, -1},
+        {28, 0, 0, 2, MAIN_CORE_PROC, 2, -1},         {29, 2, 1, 14, MAIN_CORE_PROC, 14, -1},
+        {30, 0, 0, 3, MAIN_CORE_PROC, 3, -1},         {31, 2, 1, 15, MAIN_CORE_PROC, 15, -1},
+        {32, 0, 0, 4, MAIN_CORE_PROC, 4, -1},         {33, 2, 1, 16, MAIN_CORE_PROC, 16, -1},
+        {34, 0, 0, 5, MAIN_CORE_PROC, 5, -1},         {35, 2, 1, 17, MAIN_CORE_PROC, 17, -1},
+        {36, 1, 0, 6, MAIN_CORE_PROC, 6, -1},         {37, 3, 1, 18, MAIN_CORE_PROC, 18, -1},
+        {38, 1, 0, 7, MAIN_CORE_PROC, 7, -1},         {39, 3, 1, 19, MAIN_CORE_PROC, 19, -1},
+        {40, 1, 0, 8, MAIN_CORE_PROC, 8, -1},         {41, 3, 1, 20, MAIN_CORE_PROC, 20, -1},
+        {42, 1, 0, 9, MAIN_CORE_PROC, 9, -1},         {43, 3, 1, 21, MAIN_CORE_PROC, 21, -1},
+        {44, 1, 0, 10, MAIN_CORE_PROC, 10, -1},       {45, 3, 1, 22, MAIN_CORE_PROC, 22, -1},
+        {46, 1, 0, 11, MAIN_CORE_PROC, 11, -1},       {47, 3, 1, 23, MAIN_CORE_PROC, 23, -1},
+    },
+    {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 3, 1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}},
+};
+
+TEST_P(LinuxSortProcTableTests, LinuxProcTable) {}
+
+INSTANTIATE_TEST_SUITE_P(CPUMap,
+                         LinuxSortProcTableTests,
+                         testing::Values(proc_table_2sockets_24cores_hyperthreading_1,
+                                         proc_table_2sockets_24cores_hyperthreading_2,
+                                         proc_table_2sockets_24cores_hyperthreading_3,
+                                         proc_table_2sockets_24cores_hyperthreading_4));
+#endif
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index adcaeaaaa31a6f..92470ca063a4c0 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -358,6 +358,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
             }
         } else if (key == ov::hint::kv_cache_precision.name()) {
             try {
+                kvCachePrecisionSetExplicitly = true;
                 auto const prec = val.as<ov::element::Type>();
                 if (one_of(prec, ov::element::f32, ov::element::f16, ov::element::bf16, ov::element::u8)) {
                     kvCachePrecision = prec;
@@ -411,6 +412,9 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
         if (!fcDynamicQuantizationGroupSizeSetExplicitly) {
             fcDynamicQuantizationGroupSize = 0;
         }
+        if (!kvCachePrecisionSetExplicitly) {
+            kvCachePrecision = ov::element::f32;
+        }
     }
 
     if (!prop.empty())
diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
index 79cdf3a5e827ec..5f4bb25ede350e 100644
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -51,14 +51,16 @@ struct Config {
     std::string device_id = {};
     float fcSparseWeiDecompressionRate = 1.0f;
     uint64_t fcDynamicQuantizationGroupSize = 32;
-    ov::element::Type kvCachePrecision = ov::element::f16;
     bool fcDynamicQuantizationGroupSizeSetExplicitly = false;
+    bool kvCachePrecisionSetExplicitly = false;
 #if defined(OV_CPU_WITH_ACL)
     bool aclFastMath = false;
 #endif
 #if defined(OPENVINO_ARCH_X86_64)
+    ov::element::Type kvCachePrecision = ov::element::u8;
     size_t rtCacheCapacity = 5000ul;
 #else
+    ov::element::Type kvCachePrecision = ov::element::f16;
     // TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives
     size_t rtCacheCapacity = 0ul;
 #endif
diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
index 7f5f968b10c3fe..0ed64d49ea68dd 100644
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
@@ -241,18 +241,27 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
             } else {
                 n_threads_per_stream = proc_type_table[0][ALL_PROC];
             }
-        } else if (hint_model_distribution_policy.size() == 0) {
-            for (auto& row : proc_socket_table) {
-                if (row[PROC_SOCKET_ID] == current_socket_id) {
-                    n_threads_per_stream = std::max(n_threads_per_stream, row[ALL_PROC]);
-                }
-            }
         } else {
-            for (size_t i = 1; i < proc_type_table.size(); i++) {
-                if (proc_type_table[i][PROC_SOCKET_ID] == current_socket_id) {
-                    n_threads_per_stream = std::max(n_threads_per_stream, proc_type_table[i][ALL_PROC]);
-                }
+            int numa_index = 1;
+            n_threads_per_stream = model_prefer_threads == 0
+                                       ? proc_type_table[numa_index][ALL_PROC]
+                                       : std::min(proc_type_table[numa_index][ALL_PROC], model_prefer_threads);
+            stream_info[THREADS_PER_STREAM] = n_threads_per_stream;
+            if (proc_type_table[numa_index][ALL_PROC] == proc_type_table[numa_index][MAIN_CORE_PROC]) {
+                stream_info[PROC_TYPE] = MAIN_CORE_PROC;
+                update_streams_per_node(MAIN_CORE_PROC, proc_type_table[numa_index]);
+            } else if (proc_type_table[numa_index][ALL_PROC] == proc_type_table[numa_index][EFFICIENT_CORE_PROC]) {
+                stream_info[PROC_TYPE] = EFFICIENT_CORE_PROC;
+                update_streams_per_node(EFFICIENT_CORE_PROC, proc_type_table[numa_index]);
+            } else {
+                stream_info[PROC_TYPE] = ALL_PROC;
+                update_mix_stream_info(proc_type_table[numa_index],
+                                       {proc_type_table[numa_index]},
+                                       n_threads_per_stream,
+                                       IStreamsExecutor::Config::StreamsMode::SUB_STREAMS_NULL,
+                                       ALL_PROC);
             }
+            update_ids_method(proc_type_table[numa_index]);
         }
     } else {
         n_threads =
@@ -474,7 +483,7 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
                 }
             }
         }
-    } else {
+    } else if (proc_type_table.size() == 1) {
         if (stream_info[PROC_TYPE] == ALL_PROC) {
             update_mix_stream_info(proc_socket_table[0],
                                    proc_type_table,
diff --git a/src/plugins/intel_cpu/src/memory_state.cpp b/src/plugins/intel_cpu/src/memory_state.cpp
index bf77917497de77..aa06f4ebd82957 100644
--- a/src/plugins/intel_cpu/src/memory_state.cpp
+++ b/src/plugins/intel_cpu/src/memory_state.cpp
@@ -297,18 +297,19 @@ void VariableStateKVcache::set_state_impl(const ov::SoPtr<ov::ITensor>& state) {
         auto S = internal.size(3);
         auto nthr = parallel_get_max_threads();
         std::vector<PlainTensor> buffers(nthr);
+        m_scale_zp.resize<float>({L0, B, H, 2});
         parallel_for3d(B, H, L0, [&](size_t ithr, size_t b, size_t h, size_t m) {
             buffers[ithr].resize<float>({S});
-            cpu_convert(external.ptr_v(b, h, m),
+            cpu_convert(external.ptr_v(m, b, h),
                         buffers[ithr].ptr<float>(),
                         external.m_dt,
                         element::f32,
                         S);
             attn_quant_u8(buffers[ithr].ptr<float>(),
-                          internal.ptr<uint8_t>(b, h, m),
+                          internal.ptr<uint8_t>(m, b, h),
                           S,
-                          m_scale_zp.at<float>({b, h, m, size_t{0}}),
-                          m_scale_zp.at<float>({b, h, m, size_t{1}}));
+                          m_scale_zp.at<float>({m, b, h, size_t{0}}),
+                          m_scale_zp.at<float>({m, b, h, size_t{1}}));
         });
     } else {
         m_internal_mem->load(external_mem);
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 89c7e79a0d6f74..766ec16b6f62d0 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -229,7 +229,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
                                                                            ov::element::Type_t::f32,
                                                                            ov::element::Type_t::f64,
                                                                            ov::element::Type_t::boolean,
-                                                                           ov::element::Type_t::string};
+                                                                           ov::element::Type_t::string,
+                                                                           ov::element::Type_t::nf4};
 
         if (!supported_precisions.count(input_precision)) {
             OPENVINO_THROW_NOT_IMPLEMENTED("CPU plugin: Input image format ",
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
index 365e7c56dcef82..8ec0900bc7d176 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp
@@ -194,6 +194,17 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkCheckAccuracyModeDynamicQuantiz
     ASSERT_EQ(groupSize, 0);
 }
 
+TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkCheckAccuracyModeKVCachePrecision) {
+    ov::Core core;
+
+    ASSERT_NO_THROW(core.set_property(deviceName, ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)));
+    ov::CompiledModel compiledModel = core.compile_model(model, deviceName);
+
+    auto kv_cache_precision_value = ov::element::undefined;
+    ASSERT_NO_THROW(kv_cache_precision_value = compiledModel.get_property(ov::hint::kv_cache_precision));
+    ASSERT_EQ(kv_cache_precision_value, ov::element::f32);
+}
+
 const auto bf16_if_can_be_emulated = ov::with_cpu_x86_avx512_core() ? ov::element::bf16 : ov::element::f32;
 
 TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkCheckExecutionModeIsAvailableInCoreAndModel) {
diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_transpose_sdp_transpose.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_transpose_sdp_transpose.cpp
index 65bc379c78b540..f4166544af2bf2 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_transpose_sdp_transpose.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_transpose_sdp_transpose.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "openvino/core/type/float16.hpp"
 #include "openvino/opsets/opset13.hpp"
 #include "openvino/pass/manager.hpp"
 #include "transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp"
@@ -207,6 +208,10 @@ class ConcatSDPTransposeTestBase : public testing::WithParamInterface<ConcatSDPT
                 ov::Tensor t{ov::element::f32, shape};
                 strided_iota(static_cast<float*>(t.data()), t.get_size(), val, 0.1f);
                 inputs.insert({param, t});
+            } else if (param->get_element_type() == ov::element::f16) {
+                ov::Tensor t{ov::element::f16, shape};
+                strided_iota(static_cast<ov::float16*>(t.data()), t.get_size(), val, 0.1f);
+                inputs.insert({param, t});
             } else {
                 ASSERT_TRUE(param->get_element_type() == ov::element::bf16);
                 ov::Tensor t{ov::element::bf16, shape};
@@ -365,6 +370,15 @@ class ConcatSDPTransposeTestSetState : public ConcatSDPTransposeTestBase {
     }
     std::vector<ov::Tensor> run_test(std::shared_ptr<ov::Model> model) {
         function = model;
+        // on spr, all kvccache precision will be covered and all paths for get/set_state will be tested
+        auto input_type = model->get_parameters()[0]->get_element_type();
+        if (input_type == ov::element::f32) {
+            configuration[ov::hint::kv_cache_precision.name()] = "f32";
+        } else if (input_type == ov::element::bf16) {
+            configuration[ov::hint::kv_cache_precision.name()] = "bf16";
+        } else {
+            configuration[ov::hint::kv_cache_precision.name()] = "u8";
+        }
         prepare();
         std::vector<ov::Tensor> outputs;
         // case 1: initialization + pastkv reaches limitation, remove some state
@@ -407,6 +421,15 @@ class ConcatSDPTransposeTestSetState : public ConcatSDPTransposeTestBase {
 
 TEST_P(ConcatSDPTransposeTestSetState, CompareWithRefs) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED();
+    ElementType inType;
+    InputShapeAndTransposeOrder inputShapeAndOrders;
+    bool hasShapeOf;
+    std::tie(inType, inputShapeAndOrders, hasShapeOf) = this->GetParam();
+
+    // skip bf16 test on avx512 platform
+    if (inType == ElementType::bf16 && !ov::with_cpu_x86_bfloat16())
+        GTEST_SKIP();
+
     auto actualOutputs = run_test(function);
     CheckNumberOfNodesWithType(compiledModel, "ScaledDotProductAttention", 1);
     CheckNumberOfNodesWithType(compiledModel, "Concatenation", 0);
@@ -438,7 +461,7 @@ const std::vector<InputShapeAndTransposeOrder> inputShapeAndReordersSetState = {
 
 INSTANTIATE_TEST_SUITE_P(smoke_ConcatSDPTransposeTestSetState,
                          ConcatSDPTransposeTestSetState,
-                         ::testing::Combine(::testing::Values(ElementType::f32),
+                         ::testing::Combine(::testing::Values(ElementType::f32, ElementType::bf16, ElementType::f16),
                                             ::testing::ValuesIn(inputShapeAndReordersSetState),
                                             ::testing::Values(false)),
                          ConcatSDPTransposeTest::getTestCaseName);
diff --git a/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp
index 93bac90be95a04..908c8802981ab8 100644
--- a/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp
@@ -158,11 +158,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_platform_3 = {
      {52, 26, 0, 26, 1, 0},
      {52, 26, 0, 26, 2, 1},
      {52, 26, 0, 26, 3, 1}},
-    {{1, ALL_PROC, 104, -1, 0},
-     {0, MAIN_CORE_PROC, 26, 0, 0},
-     {0, MAIN_CORE_PROC, 26, 1, 0},
-     {0, HYPER_THREADING_PROC, 26, 0, 0},
-     {0, HYPER_THREADING_PROC, 26, 1, 0}},
+    {{1, ALL_PROC, 52, 0, 0}, {0, MAIN_CORE_PROC, 26, 0, 0}, {0, HYPER_THREADING_PROC, 26, 0, 0}},
 };
 StreamsCalculationTestCase _2sockets_104cores_latency_platform_4 = {
     1,
@@ -174,7 +170,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_platform_4 = {
     "LATENCY",
     {},
     {{104, 104, 0, 0, -1, -1}, {26, 26, 0, 0, 0, 0}, {26, 26, 0, 0, 1, 0}, {26, 26, 0, 0, 2, 1}, {26, 26, 0, 0, 3, 1}},
-    {{1, ALL_PROC, 52, -1, 0}, {0, MAIN_CORE_PROC, 26, 0, 0}, {0, MAIN_CORE_PROC, 26, 1, 0}},
+    {{1, MAIN_CORE_PROC, 26, 0, 0}},
 };
 StreamsCalculationTestCase _2sockets_104cores_latency_socket_1 = {
     1,
@@ -214,11 +210,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_3 = {
      {52, 26, 0, 26, 1, 0},
      {52, 26, 0, 26, 2, 1},
      {52, 26, 0, 26, 3, 1}},
-    {{1, ALL_PROC, 104, -1, 0},
-     {0, MAIN_CORE_PROC, 26, 0, 0},
-     {0, MAIN_CORE_PROC, 26, 1, 0},
-     {0, HYPER_THREADING_PROC, 26, 0, 0},
-     {0, HYPER_THREADING_PROC, 26, 1, 0}},
+    {{1, ALL_PROC, 52, 0, 0}, {0, MAIN_CORE_PROC, 26, 0, 0}, {0, HYPER_THREADING_PROC, 26, 0, 0}},
 };
 StreamsCalculationTestCase _2sockets_104cores_latency_socket_4 = {
     1,
@@ -230,7 +222,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_4 = {
     "LATENCY",
     {},
     {{104, 104, 0, 0, -1, -1}, {26, 26, 0, 0, 0, 0}, {26, 26, 0, 0, 1, 0}, {26, 26, 0, 0, 2, 1}, {26, 26, 0, 0, 3, 1}},
-    {{1, ALL_PROC, 52, -1, 0}, {0, MAIN_CORE_PROC, 26, 0, 0}, {0, MAIN_CORE_PROC, 26, 1, 0}},
+    {{1, MAIN_CORE_PROC, 26, 0, 0}},
 };
 StreamsCalculationTestCase _2sockets_104cores_latency_socket_5 = {
     1,
@@ -242,7 +234,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_5 = {
     "LATENCY",
     {},
     {{60, 60, 0, 0, -1, -1}, {10, 10, 0, 0, 0, 0}, {10, 10, 0, 0, 1, 0}, {20, 20, 0, 0, 2, 1}, {20, 20, 0, 0, 3, 1}},
-    {{1, ALL_PROC, 20, -1, 0}, {0, MAIN_CORE_PROC, 10, 0, 0}, {0, MAIN_CORE_PROC, 10, 1, 0}},
+    {{1, MAIN_CORE_PROC, 10, 0, 0}},
 };
 StreamsCalculationTestCase _2sockets_104cores_latency_socket_6 = {
     1,
@@ -254,7 +246,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_6 = {
     "LATENCY",
     {},
     {{60, 60, 0, 0, -1, -1}, {10, 10, 0, 0, 0, 0}, {20, 20, 0, 0, 1, 1}, {10, 10, 0, 0, 2, 0}, {20, 20, 0, 0, 3, 1}},
-    {{1, ALL_PROC, 20, -1, 0}, {0, MAIN_CORE_PROC, 10, 0, 0}, {0, MAIN_CORE_PROC, 10, 2, 0}},
+    {{1, MAIN_CORE_PROC, 10, 0, 0}},
 };
 StreamsCalculationTestCase _2sockets_104cores_latency_socket_7 = {
     1,
@@ -266,7 +258,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_7 = {
     "LATENCY",
     {},
     {{104, 104, 0, 0, -1, -1}, {26, 26, 0, 0, 0, 0}, {26, 26, 0, 0, 1, 0}, {26, 26, 0, 0, 2, 1}, {26, 26, 0, 0, 3, 1}},
-    {{1, ALL_PROC, 52, -1, 0}, {0, MAIN_CORE_PROC, 26, 0, 0}, {0, MAIN_CORE_PROC, 26, 1, 0}},
+    {{1, MAIN_CORE_PROC, 26, 0, 0}},
 };
 StreamsCalculationTestCase _2sockets_104cores_latency_socket_8 = {
     1,
@@ -2001,7 +1993,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_2 = {
     1,
     "LATENCY",
     {},
-    {{60, 60, 0, 0, -1, -1}, {40, 40, 0, 0, 0, 0}, {20, 20, 0, 0, 1, 1}},
+    {{60, 60, 0, 0, -1, -1}, {20, 20, 0, 0, 1, 1}, {40, 40, 0, 0, 0, 0}},
     {{1, MAIN_CORE_PROC, 20, 1, 1}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_3 = {
@@ -2013,7 +2005,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_3 = {
     1,
     "THROUGHPUT",
     {},
-    {{60, 60, 0, 0, -1, -1}, {40, 40, 0, 0, 0, 0}, {20, 20, 0, 0, 1, 1}},
+    {{60, 60, 0, 0, -1, -1}, {20, 20, 0, 0, 1, 1}, {40, 40, 0, 0, 0, 0}},
     {{1, MAIN_CORE_PROC, 20, 1, 1}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_4 = {
@@ -2025,7 +2017,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_4 = {
     1,
     "LATENCY",
     {},
-    {{60, 60, 0, 0, -1, -1}, {40, 40, 0, 0, 0, 0}, {20, 20, 0, 0, 1, 1}},
+    {{60, 60, 0, 0, -1, -1}, {20, 20, 0, 0, 1, 1}, {40, 40, 0, 0, 0, 0}},
     {{1, MAIN_CORE_PROC, 10, 1, 1}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_5 = {
@@ -2037,7 +2029,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_5 = {
     1,
     "LATENCY",
     {},
-    {{60, 60, 0, 0, -1, -1}, {40, 40, 0, 0, 0, 0}, {20, 20, 0, 0, 1, 1}},
+    {{60, 60, 0, 0, -1, -1}, {20, 20, 0, 0, 1, 1}, {40, 40, 0, 0, 0, 0}},
     {{1, MAIN_CORE_PROC, 20, 1, 1}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_6 = {
@@ -2049,7 +2041,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_6 = {
     1,
     "LATENCY",
     {},
-    {{60, 0, 60, 0, -1, -1}, {40, 0, 40, 0, 0, 0}, {20, 0, 20, 0, 1, 1}},
+    {{60, 0, 60, 0, -1, -1}, {20, 0, 20, 0, 1, 1}, {40, 0, 40, 0, 0, 0}},
     {{1, EFFICIENT_CORE_PROC, 20, 1, 1}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_7 = {
@@ -2061,7 +2053,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_7 = {
     1,
     "THROUGHPUT",
     {},
-    {{60, 0, 60, 0, -1, -1}, {40, 0, 40, 0, 0, 0}, {20, 0, 20, 0, 1, 1}},
+    {{60, 0, 60, 0, -1, -1}, {20, 0, 20, 0, 1, 1}, {40, 0, 40, 0, 0, 0}},
     {{1, EFFICIENT_CORE_PROC, 20, 1, 1}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_8 = {
@@ -2073,7 +2065,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_8 = {
     1,
     "LATENCY",
     {},
-    {{60, 0, 60, 0, -1, -1}, {40, 0, 40, 0, 0, 0}, {20, 0, 20, 0, 1, 1}},
+    {{60, 0, 60, 0, -1, -1}, {20, 0, 20, 0, 1, 1}, {40, 0, 40, 0, 0, 0}},
     {{1, EFFICIENT_CORE_PROC, 10, 1, 1}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_9 = {
@@ -2085,7 +2077,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_9 = {
     1,
     "LATENCY",
     {},
-    {{60, 0, 60, 0, -1, -1}, {40, 0, 40, 0, 0, 0}, {20, 0, 20, 0, 1, 1}},
+    {{60, 0, 60, 0, -1, -1}, {20, 0, 20, 0, 1, 1}, {40, 0, 40, 0, 0, 0}},
     {{1, EFFICIENT_CORE_PROC, 20, 1, 1}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_10 = {
@@ -2097,7 +2089,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_10 = {
     1,
     "LATENCY",
     {},
-    {{60, 30, 0, 30, -1, -1}, {40, 20, 0, 20, 0, 0}, {20, 10, 0, 10, 1, 1}},
+    {{60, 30, 0, 30, -1, -1}, {20, 10, 0, 10, 1, 1}, {40, 20, 0, 20, 0, 0}},
     {{1, ALL_PROC, 20, 1, 1}, {0, MAIN_CORE_PROC, 10, 1, 1}, {0, HYPER_THREADING_PROC, 10, 1, 1}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_11 = {
@@ -2109,7 +2101,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_11 = {
     1,
     "THROUGHPUT",
     {},
-    {{60, 30, 0, 30, -1, -1}, {40, 20, 0, 20, 0, 0}, {20, 10, 0, 10, 1, 1}},
+    {{60, 30, 0, 30, -1, -1}, {20, 10, 0, 10, 1, 1}, {40, 20, 0, 20, 0, 0}},
     {{1, ALL_PROC, 20, 1, 1}, {0, MAIN_CORE_PROC, 10, 1, 1}, {0, HYPER_THREADING_PROC, 10, 1, 1}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_12 = {
@@ -2121,7 +2113,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_12 = {
     1,
     "LATENCY",
     {},
-    {{60, 30, 0, 30, -1, -1}, {40, 20, 0, 20, 0, 0}, {20, 10, 0, 10, 1, 1}},
+    {{60, 30, 0, 30, -1, -1}, {20, 10, 0, 10, 1, 1}, {40, 20, 0, 20, 0, 0}},
     {{1, ALL_PROC, 15, 1, 1}, {0, MAIN_CORE_PROC, 10, 1, 1}, {0, HYPER_THREADING_PROC, 5, 1, 1}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_13 = {
@@ -2133,7 +2125,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_13 = {
     1,
     "LATENCY",
     {},
-    {{60, 30, 0, 30, -1, -1}, {40, 20, 0, 20, 0, 0}, {20, 10, 0, 10, 1, 1}},
+    {{60, 30, 0, 30, -1, -1}, {20, 10, 0, 10, 1, 1}, {40, 20, 0, 20, 0, 0}},
     {{1, ALL_PROC, 20, 1, 1}, {0, MAIN_CORE_PROC, 10, 1, 1}, {0, HYPER_THREADING_PROC, 10, 1, 1}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_14 = {
@@ -2146,10 +2138,10 @@ StreamsCalculationTestCase _2sockets_mock_latency_14 = {
     "LATENCY",
     {},
     {{200, 100, 0, 100, -1, -1},
+     {20, 10, 0, 10, 3, 3},
      {80, 40, 0, 40, 0, 0},
      {60, 30, 0, 30, 1, 1},
-     {40, 20, 0, 20, 2, 2},
-     {20, 10, 0, 10, 3, 3}},
+     {40, 20, 0, 20, 2, 2}},
     {{1, ALL_PROC, 20, 3, 3}, {0, MAIN_CORE_PROC, 10, 3, 3}, {0, HYPER_THREADING_PROC, 10, 3, 3}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_15 = {
@@ -2162,10 +2154,10 @@ StreamsCalculationTestCase _2sockets_mock_latency_15 = {
     "THROUGHPUT",
     {},
     {{200, 100, 0, 100, -1, -1},
+     {20, 10, 0, 10, 3, 3},
      {80, 40, 0, 40, 0, 0},
      {60, 30, 0, 30, 1, 1},
-     {40, 20, 0, 20, 2, 2},
-     {20, 10, 0, 10, 3, 3}},
+     {40, 20, 0, 20, 2, 2}},
     {{1, ALL_PROC, 20, 3, 3}, {0, MAIN_CORE_PROC, 10, 3, 3}, {0, HYPER_THREADING_PROC, 10, 3, 3}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_16 = {
@@ -2178,10 +2170,10 @@ StreamsCalculationTestCase _2sockets_mock_latency_16 = {
     "LATENCY",
     {},
     {{200, 100, 0, 100, -1, -1},
+     {20, 10, 0, 10, 3, 3},
      {80, 40, 0, 40, 0, 0},
      {60, 30, 0, 30, 1, 1},
-     {40, 20, 0, 20, 2, 2},
-     {20, 10, 0, 10, 3, 3}},
+     {40, 20, 0, 20, 2, 2}},
     {{1, ALL_PROC, 15, 3, 3}, {0, MAIN_CORE_PROC, 10, 3, 3}, {0, HYPER_THREADING_PROC, 5, 3, 3}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_17 = {
@@ -2194,10 +2186,10 @@ StreamsCalculationTestCase _2sockets_mock_latency_17 = {
     "LATENCY",
     {},
     {{200, 100, 0, 100, -1, -1},
+     {20, 10, 0, 10, 3, 3},
      {80, 40, 0, 40, 0, 0},
      {60, 30, 0, 30, 1, 1},
-     {40, 20, 0, 20, 2, 2},
-     {20, 10, 0, 10, 3, 3}},
+     {40, 20, 0, 20, 2, 2}},
     {{1, ALL_PROC, 20, 3, 3}, {0, MAIN_CORE_PROC, 10, 3, 3}, {0, HYPER_THREADING_PROC, 10, 3, 3}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_18 = {
@@ -2210,14 +2202,14 @@ StreamsCalculationTestCase _2sockets_mock_latency_18 = {
     "LATENCY",
     {},
     {{440, 220, 0, 220, -1, -1},
+     {40, 20, 0, 20, 6, 6},
+     {20, 10, 0, 10, 7, 7},
      {90, 45, 0, 45, 0, 0},
      {70, 35, 0, 35, 1, 1},
      {50, 25, 0, 25, 2, 2},
      {30, 15, 0, 15, 3, 3},
      {80, 40, 0, 40, 4, 4},
-     {60, 30, 0, 30, 5, 5},
-     {40, 20, 0, 20, 6, 6},
-     {20, 10, 0, 10, 7, 7}},
+     {60, 30, 0, 30, 5, 5}},
     {{1, ALL_PROC, 40, 6, 6}, {0, MAIN_CORE_PROC, 20, 6, 6}, {0, HYPER_THREADING_PROC, 20, 6, 6}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_19 = {
@@ -2230,14 +2222,14 @@ StreamsCalculationTestCase _2sockets_mock_latency_19 = {
     "THROUGHPUT",
     {},
     {{440, 220, 0, 220, -1, -1},
+     {40, 20, 0, 20, 6, 6},
+     {20, 10, 0, 10, 7, 7},
      {90, 45, 0, 45, 0, 0},
      {70, 35, 0, 35, 1, 1},
      {50, 25, 0, 25, 2, 2},
      {30, 15, 0, 15, 3, 3},
      {80, 40, 0, 40, 4, 4},
-     {60, 30, 0, 30, 5, 5},
-     {40, 20, 0, 20, 6, 6},
-     {20, 10, 0, 10, 7, 7}},
+     {60, 30, 0, 30, 5, 5}},
     {{1, ALL_PROC, 40, 6, 6}, {0, MAIN_CORE_PROC, 20, 6, 6}, {0, HYPER_THREADING_PROC, 20, 6, 6}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_20 = {
@@ -2250,14 +2242,14 @@ StreamsCalculationTestCase _2sockets_mock_latency_20 = {
     "LATENCY",
     {},
     {{440, 220, 0, 220, -1, -1},
+     {40, 20, 0, 20, 6, 6},
+     {20, 10, 0, 10, 7, 7},
      {90, 45, 0, 45, 0, 0},
      {70, 35, 0, 35, 1, 1},
      {50, 25, 0, 25, 2, 2},
      {30, 15, 0, 15, 3, 3},
      {80, 40, 0, 40, 4, 4},
-     {60, 30, 0, 30, 5, 5},
-     {40, 20, 0, 20, 6, 6},
-     {20, 10, 0, 10, 7, 7}},
+     {60, 30, 0, 30, 5, 5}},
     {{1, ALL_PROC, 25, 6, 6}, {0, MAIN_CORE_PROC, 20, 6, 6}, {0, HYPER_THREADING_PROC, 5, 6, 6}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_21 = {
@@ -2270,14 +2262,14 @@ StreamsCalculationTestCase _2sockets_mock_latency_21 = {
     "LATENCY",
     {},
     {{440, 220, 0, 220, -1, -1},
+     {40, 20, 0, 20, 6, 6},
+     {20, 10, 0, 10, 7, 7},
      {90, 45, 0, 45, 0, 0},
      {70, 35, 0, 35, 1, 1},
      {50, 25, 0, 25, 2, 2},
      {30, 15, 0, 15, 3, 3},
      {80, 40, 0, 40, 4, 4},
-     {60, 30, 0, 30, 5, 5},
-     {40, 20, 0, 20, 6, 6},
-     {20, 10, 0, 10, 7, 7}},
+     {60, 30, 0, 30, 5, 5}},
     {{1, ALL_PROC, 40, 6, 6}, {0, MAIN_CORE_PROC, 20, 6, 6}, {0, HYPER_THREADING_PROC, 20, 6, 6}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_22 = {
@@ -2357,11 +2349,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_26 = {
      {60, 30, 0, 30, 1, 0},
      {40, 20, 0, 20, 2, 1},
      {20, 10, 0, 10, 3, 1}},
-    {{1, ALL_PROC, 140, -1, 0},
-     {0, MAIN_CORE_PROC, 40, 0, 0},
-     {0, MAIN_CORE_PROC, 30, 1, 0},
-     {0, HYPER_THREADING_PROC, 40, 0, 0},
-     {0, HYPER_THREADING_PROC, 30, 1, 0}},
+    {{1, ALL_PROC, 80, 0, 0}, {0, MAIN_CORE_PROC, 40, 0, 0}, {0, HYPER_THREADING_PROC, 40, 0, 0}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_27 = {
     1,
@@ -2377,11 +2365,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_27 = {
      {60, 30, 0, 30, 1, 0},
      {40, 20, 0, 20, 2, 1},
      {20, 10, 0, 10, 3, 1}},
-    {{1, ALL_PROC, 140, -1, 0},
-     {0, MAIN_CORE_PROC, 40, 0, 0},
-     {0, MAIN_CORE_PROC, 30, 1, 0},
-     {0, HYPER_THREADING_PROC, 40, 0, 0},
-     {0, HYPER_THREADING_PROC, 30, 1, 0}},
+    {{1, ALL_PROC, 80, 0, 0}, {0, MAIN_CORE_PROC, 40, 0, 0}, {0, HYPER_THREADING_PROC, 40, 0, 0}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_28 = {
     1,
@@ -2519,9 +2503,7 @@ StreamsCalculationTestCase _1sockets_mock_latency_1 = {
     "LATENCY",
     {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL},
     {{14, 6, 8, 0, 0, 0}},
-    {{1, ALL_PROC, 14, 0, 0},
-     {0, MAIN_CORE_PROC, 6, 0, 0},
-     {0, EFFICIENT_CORE_PROC, 8, 0, 0}},
+    {{1, ALL_PROC, 14, 0, 0}, {0, MAIN_CORE_PROC, 6, 0, 0}, {0, EFFICIENT_CORE_PROC, 8, 0, 0}},
 };
 StreamsCalculationTestCase _1sockets_mock_latency_2 = {
     1,
@@ -2545,9 +2527,7 @@ StreamsCalculationTestCase _1sockets_mock_latency_3 = {
     "LATENCY",
     {},
     {{14, 6, 8, 0, 0, 0}},
-    {{1, ALL_PROC, 14, 0, 0},
-     {0, MAIN_CORE_PROC, 6, 0, 0},
-     {0, EFFICIENT_CORE_PROC, 8, 0, 0}},
+    {{1, ALL_PROC, 14, 0, 0}, {0, MAIN_CORE_PROC, 6, 0, 0}, {0, EFFICIENT_CORE_PROC, 8, 0, 0}},
 };
 StreamsCalculationTestCase _1sockets_mock_latency_4 = {
     1,
@@ -2571,9 +2551,7 @@ StreamsCalculationTestCase _1sockets_mock_latency_5 = {
     "LATENCY",
     {},
     {{14, 6, 8, 0, 0, 0}},
-    {{1, ALL_PROC, 14, 0, 0},
-     {0, MAIN_CORE_PROC, 6, 0, 0},
-     {0, EFFICIENT_CORE_PROC, 8, 0, 0}},
+    {{1, ALL_PROC, 14, 0, 0}, {0, MAIN_CORE_PROC, 6, 0, 0}, {0, EFFICIENT_CORE_PROC, 8, 0, 0}},
 };
 StreamsCalculationTestCase _1sockets_mock_latency_6 = {
     1,
@@ -2617,9 +2595,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_36 = {
      {60, 30, 0, 30, 1, 1},
      {40, 20, 0, 20, 2, 2},
      {20, 10, 0, 10, 3, 3}},
-    {{1, ALL_PROC, 20, 3, 3},
-     {0, MAIN_CORE_PROC, 10, 3, 3},
-     {0, HYPER_THREADING_PROC, 10, 3, 3}},
+    {{1, ALL_PROC, 20, 3, 3}, {0, MAIN_CORE_PROC, 10, 3, 3}, {0, HYPER_THREADING_PROC, 10, 3, 3}},
 };
 StreamsCalculationTestCase _2sockets_mock_latency_37 = {
     1,
@@ -2649,6 +2625,173 @@ StreamsCalculationTestCase _2sockets_mock_latency_38 = {
      {64, 32, 0, 32, 3, 1}},
     {{1, MAIN_CORE_PROC, 64, -1, -1}, {-1, MAIN_CORE_PROC, 32, 0, 0}, {-1, MAIN_CORE_PROC, 32, 2, 1}},
 };
+StreamsCalculationTestCase _2sockets_mock_latency_39 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    0,
+    "LATENCY",
+    {},
+    {{104, 104, 0, 0, -1, -1}, {26, 26, 0, 0, 0, 0}, {26, 26, 0, 0, 1, 0}, {26, 26, 0, 0, 2, 1}, {26, 26, 0, 0, 3, 1}},
+    {{1, MAIN_CORE_PROC, 26, 0, 0}},
+};
+StreamsCalculationTestCase _2sockets_mock_latency_40 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    0,
+    "LATENCY",
+    {},
+    {{104, 104, 0, 0, -1, -1}, {26, 26, 0, 0, 1, 0}, {26, 26, 0, 0, 2, 1}, {26, 26, 0, 0, 3, 1}, {26, 26, 0, 0, 0, 0}},
+    {{1, MAIN_CORE_PROC, 26, 1, 0}},
+};
+StreamsCalculationTestCase _2sockets_mock_latency_41 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    1,
+    "LATENCY",
+    {},
+    {{104, 104, 0, 0, -1, -1}, {26, 26, 0, 0, 2, 1}, {26, 26, 0, 0, 3, 1}, {26, 26, 0, 0, 0, 0}, {26, 26, 0, 0, 1, 0}},
+    {{1, MAIN_CORE_PROC, 26, 2, 1}},
+};
+StreamsCalculationTestCase _2sockets_mock_latency_42 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    1,
+    "LATENCY",
+    {},
+    {{104, 104, 0, 0, -1, -1}, {26, 26, 0, 0, 3, 1}, {26, 26, 0, 0, 0, 0}, {26, 26, 0, 0, 1, 0}, {26, 26, 0, 0, 2, 1}},
+    {{1, MAIN_CORE_PROC, 26, 3, 1}},
+};
+StreamsCalculationTestCase _2sockets_mock_latency_43 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    0,
+    "LATENCY",
+    {},
+    {{208, 104, 0, 104, -1, -1},
+     {52, 26, 0, 26, 0, 0},
+     {52, 26, 0, 26, 1, 0},
+     {52, 26, 0, 26, 2, 1},
+     {52, 26, 0, 26, 3, 1}},
+    {{1, ALL_PROC, 52, 0, 0}, {0, MAIN_CORE_PROC, 26, 0, 0}, {0, HYPER_THREADING_PROC, 26, 0, 0}},
+};
+StreamsCalculationTestCase _2sockets_mock_latency_44 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    1,
+    "LATENCY",
+    {},
+    {{208, 104, 0, 104, -1, -1},
+     {52, 26, 0, 26, 3, 1},
+     {52, 26, 0, 26, 0, 0},
+     {52, 26, 0, 26, 1, 0},
+     {52, 26, 0, 26, 2, 1}},
+    {{1, ALL_PROC, 52, 3, 1}, {0, MAIN_CORE_PROC, 26, 3, 1}, {0, HYPER_THREADING_PROC, 26, 3, 1}},
+};
+StreamsCalculationTestCase _2sockets_mock_latency_45 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    0,
+    "LATENCY",
+    {},
+    {{208, 208, 0, 0, -1, -1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 0}, {52, 52, 0, 0, 2, 1}, {52, 52, 0, 0, 3, 1}},
+    {{1, MAIN_CORE_PROC, 52, 0, 0}},
+};
+StreamsCalculationTestCase _2sockets_mock_latency_46 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    1,
+    "LATENCY",
+    {},
+    {{208, 208, 0, 0, -1, -1}, {52, 52, 0, 0, 2, 1}, {52, 52, 0, 0, 3, 1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 0}},
+    {{1, MAIN_CORE_PROC, 52, 2, 1}},
+};
+StreamsCalculationTestCase _2sockets_mock_latency_47 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    0,
+    "LATENCY",
+    {},
+    {{416, 208, 0, 208, -1, -1},
+     {104, 52, 0, 52, 0, 0},
+     {104, 52, 0, 52, 1, 0},
+     {104, 52, 0, 52, 2, 1},
+     {104, 52, 0, 52, 3, 1}},
+    {{1, ALL_PROC, 104, 0, 0}, {0, MAIN_CORE_PROC, 52, 0, 0}, {0, HYPER_THREADING_PROC, 52, 0, 0}},
+};
+StreamsCalculationTestCase _2sockets_mock_latency_48 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    1,
+    "LATENCY",
+    {},
+    {{416, 208, 0, 208, -1, -1},
+     {104, 52, 0, 52, 3, 1},
+     {104, 52, 0, 52, 0, 0},
+     {104, 52, 0, 52, 1, 0},
+     {104, 52, 0, 52, 2, 1}},
+    {{1, ALL_PROC, 104, 3, 1}, {0, MAIN_CORE_PROC, 52, 3, 1}, {0, HYPER_THREADING_PROC, 52, 3, 1}},
+};
+StreamsCalculationTestCase _2sockets_mock_latency_49 = {
+    1,
+    false,
+    80,
+    0,
+    0,
+    1,
+    "LATENCY",
+    {},
+    {{416, 208, 0, 208, -1, -1},
+     {104, 52, 0, 52, 3, 1},
+     {104, 52, 0, 52, 0, 0},
+     {104, 52, 0, 52, 1, 0},
+     {104, 52, 0, 52, 2, 1}},
+    {{1, ALL_PROC, 80, -1, 1},
+     {0, MAIN_CORE_PROC, 52, 3, 1},
+     {0, MAIN_CORE_PROC, 28, 2, 1}},
+};
+StreamsCalculationTestCase _2sockets_mock_latency_50 = {
+    1,
+    false,
+    80,
+    0,
+    0,
+    1,
+    "LATENCY",
+    {},
+    {{208, 208, 0, 0, -1, -1}, {52, 52, 0, 0, 2, 1}, {52, 52, 0, 0, 3, 1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 0}},
+    {{1, ALL_PROC, 80, -1, 1}, {0, MAIN_CORE_PROC, 52, 2, 1}, {0, MAIN_CORE_PROC, 28, 3, 1}},
+};
+
 TEST_P(StreamsCalculationTests, StreamsCalculation) {}
 
 INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
@@ -2832,6 +2975,18 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                                          _2sockets_mock_latency_36,
                                          _2sockets_mock_latency_37,
                                          _2sockets_mock_latency_38,
+                                         _2sockets_mock_latency_39,
+                                         _2sockets_mock_latency_40,
+                                         _2sockets_mock_latency_41,
+                                         _2sockets_mock_latency_42,
+                                         _2sockets_mock_latency_43,
+                                         _2sockets_mock_latency_44,
+                                         _2sockets_mock_latency_45,
+                                         _2sockets_mock_latency_46,
+                                         _2sockets_mock_latency_47,
+                                         _2sockets_mock_latency_48,
+                                         _2sockets_mock_latency_49,
+                                         _2sockets_mock_latency_50,
                                          _1sockets_mock_latency_1,
                                          _1sockets_mock_latency_2,
                                          _1sockets_mock_latency_3,
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h
index 3ddb5bf8793c29..a8c715af98f198 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h
@@ -299,6 +299,7 @@ inline void update_shapes(kernel_selector::Params& p, const kernel_impl_params&
         const auto& fused_prim = impl_param.fused_desc[i];
         auto& fd = bp.fused_ops[i];
         fd.output_tensor = convert_data_tensor(fused_prim.output_layout);
+        fd.tensors.clear();
         for (size_t i = fd.dep_idx_start; i < fd.dep_idx_start + fd.dep_size; i++) {
             fd.tensors.push_back(convert_data_tensor(impl_param.get_input_layout(i)));
         }
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
index 94d94f29846613..ad4d47ae6531f1 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
@@ -91,8 +91,10 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
     static std::unique_ptr<primitive_impl> create(const typed_program_node<PType>& arg, const kernel_impl_params& impl_param) {
         // concat buffer fusing for dynamic shape is adaptively applied at runtime. So we need to build dynamic impl at build time.
         if (impl_param.can_be_optimized() &&
-            !((impl_param.runtime_skippable() || impl_param.is_type<crop>()) &&
-            impl_param.is_dynamic())) {
+            !((impl_param.is_type<concatenation>() ||
+               impl_param.is_type<strided_slice>() ||
+               impl_param.is_type<crop>() ||
+               impl_param.runtime_skippable()) && impl_param.is_dynamic())) {
             return make_unique<ImplType>(kernel_selector::kernel_data{});
         }
         auto kernel_params = ImplType::get_kernel_params(ImplType::static_canonicalize_shapes(impl_param));
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index 0af0e957df4ea8..24a103379a025c 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -581,7 +581,8 @@ void network::allocate_primitives() {
 
     // Update the output memory address of optimized-out layer if it is not valid.
     for (auto const& node : po) {
-        if (node->can_be_optimized() && !node->is_dynamic()) {
+        if (node->can_be_optimized() && !node->is_dynamic() &&
+            (node->get_dependencies().empty() || !node->get_dependency(0).is_type<read_value>())) {
             auto opt_inst = _primitives.at(node->id());
             // build deps when prim_inst does not update dependencies yet.
             if (!node->get_dependencies().empty() && opt_inst->dependencies().empty()) {
diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp
index 213a0aa175f5d2..e5e33f4ad87b14 100644
--- a/src/plugins/intel_gpu/src/graph/reshape.cpp
+++ b/src/plugins/intel_gpu/src/graph/reshape.cpp
@@ -11,6 +11,7 @@
 #include "openvino/core/validation_util.hpp"
 #include "primitive_type_base.h"
 #include "reshape_inst.h"
+#include "read_value_inst.h"
 #include "reshape_shape_inference.hpp"
 #include "squeeze_shape_inference.hpp"
 #include "unsqueeze_shape_inference.hpp"
@@ -286,7 +287,7 @@ reshape_inst::typed_primitive_inst(network& network, reshape_node const& node) :
 
     // if reshape operated in-place, postpone creation of the output until network run,
     // then create new memory object as the reinterpreted output of the previous primitive
-    if (input_layout.is_static() && output_layout.is_static()) {
+    if (input_layout.is_static() && output_layout.is_static() && !node.get_dependency(0).is_type<read_value>()) {
         if (!node.can_be_optimized()) {
             _outputs = allocate_outputs();
             _mem_allocated = true;
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
index 178e1ea405b6bb..02304512637783 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
@@ -158,6 +158,11 @@ static bool is_weight_horizontal(const fully_connected_params& params, size_t ou
             && output_f / 4 /* tile_ofm=4 */ > min_num_threads * 1.5);
 }
 
+static bool is_weight_small_kn(const fully_connected_params& params, size_t output_f) {
+    size_t min_num_threads = params.engineInfo.computeUnitsCount * simd;
+    return output_f / 2 /*most frequently used tile_ofm*/ <= min_num_threads;
+}
+
 static bool is_suitable_outer_ofm(const fully_connected_params& params, size_t output_f) {
     size_t min_num_threads = params.engineInfo.computeUnitsCount * simd;
     return (params.weights.OFM().v > params.weights.IFM().v * 6
@@ -412,6 +417,11 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params,
                 } else if (params.weights.GetLayout() == WeightsLayout::os_iyx_osv16) {
                     return selector.Default(tune_params(1, 1, 4, 4, 1, 1, 1, EXE_MODE_DEFAULT));
                 }
+            } else if (is_weight_small_kn(params, output_f)) {
+                if (params.weights.GetLayout() == WeightsLayout::os_is_yx_osv32_isv2)
+                    return selector.Default(tune_params(1, 1, 4, 2, 1, 1, 1, EXE_MODE_DEFAULT));
+                else
+                    return selector.Default(tune_params(1, 2, 4, 2, 1, 1, 1, EXE_MODE_DEFAULT));
             } else {
                 if (params.weights.GetLayout() == WeightsLayout::os_iyx_osv16) {
                     return selector.Default(tune_params(1, 1, 4, 4, 1, 1, 1, EXE_MODE_DEFAULT));
diff --git a/src/plugins/intel_gpu/src/plugin/common_utils.cpp b/src/plugins/intel_gpu/src/plugin/common_utils.cpp
index ddd6b5677adc45..8a5e47279d10a0 100644
--- a/src/plugins/intel_gpu/src/plugin/common_utils.cpp
+++ b/src/plugins/intel_gpu/src/plugin/common_utils.cpp
@@ -88,6 +88,7 @@ void convert_and_copy(const void* src_ptr, ov::element::Type src_et, void* dst_p
     CASE(ov::element::f16, ov::element::f16, ov::float16, ov::float16);
     CASE(ov::element::bf16, ov::element::f32, ov::bfloat16, float);
     CASE(ov::element::bf16, ov::element::f16, ov::bfloat16, ov::float16);
+    CASE(ov::element::boolean, ov::element::u8, bool, uint8_t);
 
     OPENVINO_THROW("[GPU] Unsupported element types combination for copy: ", src_et, " -> ", dst_et);
 }
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index 7d010a9b590e2e..b1cc946559ee94 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -596,7 +596,8 @@ std::vector<ov::PropertyName> Plugin::get_supported_properties() const {
         ov::PropertyName{ov::hint::enable_cpu_pinning.name(), PropertyMutability::RW},
         ov::PropertyName{ov::device::id.name(), PropertyMutability::RW},
         ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW},
-        ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW}
+        ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW},
+        ov::PropertyName{ov::weights_path.name(), PropertyMutability::RO},
     };
 
     return supported_properties;
diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
index 985336b801b9d3..6d48849102765e 100644
--- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
@@ -782,7 +782,7 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string
     auto device_tensor_et = convert_to_supported_device_type(element_type);
     bool convert_needed = is_convert_required(element_type, device_tensor_et);
 
-    if (is_remote_tensor_impl && !need_lockable_mem) {
+    if (is_remote_tensor_impl) {
         if (convert_needed) {
             m_plugin_inputs[input_idx] = { create_device_tensor(pshape,
                                                                 cldnn::element_type_to_data_type(element_type),
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index 158dee2ee7ac05..db93696865a971 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -824,6 +824,11 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
             ov::op::v3::Broadcast::get_type_info_static(),
         };
         manager.register_pass<ov::pass::MoveEltwiseUpThroughDataMovScalar>(allowed_data_movement_ops);
+        // FIXME (151111): this Validate is added as a workaround for resolving element
+        // types after MoveEltwiseUpThroughDataMovScalar. It has to be removed
+        // after 141764 is fixed as there's a clear issue with Validate passes
+        // not working properly.
+        manager.register_pass<ov::pass::Validate>();
 
         manager.register_pass<ov::intel_gpu::ClampFP16Output>();
         manager.register_pass<ov::intel_gpu::ConvertMatMulToFullyConnected>();
diff --git a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
index 880868d8666560..839b2640ca180c 100644
--- a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
+++ b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
@@ -34,12 +34,22 @@
 #include "openvino/pass/serialize.hpp"
 
 namespace {
-class CheckWeightlessCacheAccuracy : public ::testing::Test {
+class CheckWeightlessCacheAccuracy : public ::testing::Test,
+                                     public ::testing::WithParamInterface<bool> {
+public:
+    static std::string get_test_case_name(::testing::TestParamInfo<bool> obj) {
+        bool use_compile_model_api = obj.param;
+
+        std::ostringstream result;
+        result << "use_compile_model_api=" << use_compile_model_api;
+        return result.str();
+    }
 protected:
     std::shared_ptr<ov::Model> model;
     std::string xml_path;
     std::string bin_path;
     std::string cache_path;
+    bool use_compile_model_api; // for loading from cache
 
     void SetUp() override;
     void TearDown() override;
@@ -51,6 +61,7 @@ void CheckWeightlessCacheAccuracy::SetUp() {
     xml_path = filePrefix + ".xml";
     bin_path = filePrefix + ".bin";
     cache_path = filePrefix + ".blob";
+    use_compile_model_api = GetParam();
 }
 
 void CheckWeightlessCacheAccuracy::TearDown() {
@@ -74,7 +85,13 @@ void CheckWeightlessCacheAccuracy::run() {
 
     auto ifstr = std::ifstream(cache_path, std::ifstream::binary);
     ov::CompiledModel imported_model;
-    OV_ASSERT_NO_THROW(imported_model = core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config_with_weights_path));
+    if (use_compile_model_api) {
+        OV_ASSERT_NO_THROW(imported_model =
+                               core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config));
+    } else {
+        OV_ASSERT_NO_THROW(imported_model =
+                               core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config_with_weights_path));
+    }
     ifstr.close();
 
     auto orig_req = compiled_model.create_infer_request();
@@ -98,19 +115,23 @@ void CheckWeightlessCacheAccuracy::run() {
     }
 }
 
-TEST_F(CheckWeightlessCacheAccuracy, ReadConcatSplitAssign) {
+TEST_P(CheckWeightlessCacheAccuracy, ReadConcatSplitAssign) {
     model = ov::test::utils::make_read_concat_split_assign({1, 1, 2, 4}, ov::element::f16);
     run();
 }
 
-TEST_F(CheckWeightlessCacheAccuracy, SingleConcatWithConstant) {
+TEST_P(CheckWeightlessCacheAccuracy, SingleConcatWithConstant) {
     model = ov::test::utils::make_single_concat_with_constant({1, 1, 2, 4}, ov::element::f16);
     run();
 }
 
-TEST_F(CheckWeightlessCacheAccuracy, TiWithLstmCell) {
+TEST_P(CheckWeightlessCacheAccuracy, TiWithLstmCell) {
     model = ov::test::utils::make_ti_with_lstm_cell(ov::element::f16);
     run();
 }
 
+INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, CheckWeightlessCacheAccuracy,
+                         ::testing::Bool(),
+                         CheckWeightlessCacheAccuracy::get_test_case_name);
+
 }  // namespace
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
index dbc86c5062da9e..653b8350cfdeda 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
@@ -1797,6 +1797,8 @@ void Partitioner::optimize(const std::string& func_name) {
         // rewr.add_matcher<ov::npuw::patterns::opt::DQUnpackDictMatMulGQi>(std::ref(ctx));
         rewr.add_matcher<ov::npuw::patterns::opt::CompressDictMatMulf32>(std::ref(ctx));
         rewr.add_matcher<ov::npuw::patterns::opt::DQParMMGQ>(std::ref(ctx));
+        // Convert specific convolutions to matmuls
+        rewr.add_matcher<ov::npuw::patterns::opt::ConvToMatmul>(std::ref(ctx));
         rewr.run_on_model(f._model);
 
         // Move Gather to host, if required
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp
index 60f705a0c8f26c..f464f216eadb67 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp
@@ -16,6 +16,7 @@
 #include "openvino/op/subtract.hpp"
 #include "openvino/op/util/op_types.hpp"
 #include "openvino/pass/pattern/op/label.hpp"  // any_input
+#include "openvino/pass/pattern/op/optional.hpp"
 #include "openvino/pass/pattern/op/wrap_type.hpp"
 #include "openvino/util/common_util.hpp"
 
@@ -248,7 +249,7 @@ bool DCOFFPassBase::matcher_callback(ov::pass::pattern::Matcher& m) {
 
     auto matched_paramA = std::static_pointer_cast<ov::op::v0::Parameter>(matched_nodeA);
     auto element_type = matched_paramA->get_element_type();
-    if (element_type == ov::element::i4 || element_type == ov::element::i8) {
+    if (element_type == ov::element::i4 || element_type == ov::element::i8 || element_type == ov::element::nf4) {
         LOG_DEBUG("Matched: " << matched_paramA << ", set element type to " << m_dcoff_type);
         matched_paramA->set_element_type(m_dcoff_type);
 
@@ -296,7 +297,8 @@ bool DCOFFPassBase::matcher_callback(ov::pass::pattern::Matcher& m) {
 void DCOFFPassMatMul::build() {
     DCOFFPassBase::build();
     auto _mmin1 = opp::any_input();
-    matmul = opp::wrap_type<ov::op::v0::MatMul>({_mmin1, mulply});
+    cvtopt = opp::optional<ov::op::v0::Convert>({mulply->output(0)});
+    matmul = opp::wrap_type<ov::op::v0::MatMul>({_mmin1, cvtopt});
     register_matcher(std::make_shared<opp::Matcher>(matmul, "TagDCOFFMatMul"),
                      std::bind(&DCOFFPassMatMul::matcher_callback, this, std::placeholders::_1));
 }
@@ -306,6 +308,13 @@ void DCOFFPassMatMul::reconnect_root_to_convert(ov::pass::pattern::Matcher& m) {
     auto& node_to_output = m.get_pattern_value_map();
     auto matched_convrt = node_to_output.at(toFP32).get_node_shared_ptr();
     auto matched_matmul = node_to_output.at(matmul).get_node_shared_ptr();
+
+    auto cvt = std::static_pointer_cast<ov::op::v0::Convert>(matched_convrt);
+    auto matmul = std::static_pointer_cast<ov::op::v0::MatMul>(matched_matmul);
+
+    // NB: In case convert and matmul types don't match
+    cvt->set_destination_type(matmul->inputs()[1].get_element_type());
+
     matched_matmul->input(1).replace_source_output(matched_convrt);
 }
 
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.hpp
index 55ec9ccd58835c..da06a5304c8bd7 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.hpp
@@ -59,7 +59,7 @@ class DCOFFPassBase : public ov::pass::MatcherPass {
     ov::element::Type m_dcoff_type;
     DCOFFParamRef m_params_to;
 
-    std::shared_ptr<ov::Node> paramA, paramB, toFP32, mulply;
+    std::shared_ptr<ov::Node> paramA, paramB, toFP32, mulply, cvtopt;
     bool matcher_callback(ov::pass::pattern::Matcher& m);
 
 public:
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
index 9693e2e8f2b753..3470739c848dac 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
@@ -116,22 +116,25 @@ namespace opp = ov::pass::pattern;
 namespace uat = ov::npuw::util::at;
 
 // FROM:
-//     ???(Act) ----------------------------------->
-//     Param(W) -> to(f16) -> Multiply -> to(f32) -> MatMul
-//     Param(S) ------------>
+//     ???(Act) ------------------------------------------------------------>
+//     Param(W) -------> (Reshape) -> to(f16/f32) -> Multiply -> (to(f32)) -> MatMul
+//     Param/Const(S) -> (Reshape) -> (to(f32)) --->
 //
 // TO:
-//     ???(Act) -> to(f16) ->
-//     Param(W) -> to(f16) -> MatMul -> Multiply -> to(f32)
-//     Param(S) -> Reshape ----------->
+//     ???(Act) --------------------> to(f16/f32) ->
+//     Param(W) -------> (Reshape) -> to(f16/f32) -> MatMul -> Multiply -> (to(f32))
+//     Param/Const(S) -> (Reshape) -> (to(f32)) -> Reshape -->
 //
 
 DQMatMulCWi::DQMatMulCWi() {
     auto qweight = opp::wrap_type<ov::op::v0::Parameter>();
-    auto qcoeff = opp::wrap_type<ov::op::v0::Parameter>();
-    auto qcvtw = opp::wrap_type<ov::op::v0::Convert>({qweight});
-    auto qmuls = opp::wrap_type<ov::op::v1::Multiply>({qcvtw, qcoeff});
-    auto qcvtm = opp::wrap_type<ov::op::v0::Convert>({qmuls});
+    auto qcoeff = opp::any_input();
+    auto reshapew = opp::optional<ov::op::v1::Reshape>({qweight, opp::any_input()});
+    auto reshapec = opp::optional<ov::op::v1::Reshape>({qcoeff, opp::any_input()});
+    auto qcvtw = opp::wrap_type<ov::op::v0::Convert>({reshapew});
+    auto qcvtc = opp::optional<ov::op::v0::Convert>({reshapec->output(0)});
+    auto qmuls = opp::wrap_type<ov::op::v1::Multiply>({qcvtw, qcvtc});
+    auto qcvtm = opp::optional<ov::op::v0::Convert>({qmuls->output(0)});
     auto qmmi = opp::any_input();
     auto qmm = opp::wrap_type<ov::op::v0::MatMul>({qmmi, qcvtm});
 
@@ -144,22 +147,24 @@ DQMatMulCWi::DQMatMulCWi() {
         auto matched_node_matmul = node_to_output.at(qmm).get_node_shared_ptr();
 
         auto matched_qweight = std::static_pointer_cast<ov::op::v0::Parameter>(matched_node_qweight);
-        auto matched_qcoeff = std::static_pointer_cast<ov::op::v0::Parameter>(matched_node_qcoeff);
         auto matched_matmul = std::static_pointer_cast<ov::op::v0::MatMul>(matched_node_matmul);
 
-        auto qcoeff_shape = matched_qcoeff->output(0).get_shape();
+        auto qcoeff_shape = matched_node_qcoeff->output(0).get_shape();
 
         if ((ov::element::i4 == matched_qweight->get_element_type() ||
              ov::element::i8 == matched_qweight->get_element_type()) &&
+            (ov::op::util::is_parameter(matched_node_qcoeff) || ov::op::util::is_constant(matched_node_qcoeff)) &&
             qcoeff_shape[1] == 1 && !matched_matmul->get_transpose_a() && matched_matmul->get_transpose_b()) {
             auto matched_node_cvtw = node_to_output.at(qcvtw).get_node_shared_ptr();
-            auto matched_node_cvtm = node_to_output.at(qcvtm).get_node_shared_ptr();
             auto matched_node_muls = node_to_output.at(qmuls).get_node_shared_ptr();
             auto matched_node_mmi = node_to_output.at(qmmi).get_node_shared_ptr();
+            auto matched_node_qcoeff_out = uat::_(node_to_output).at_or_at_or_at(qcvtc, reshapec, qcoeff);
+            auto matched_node_muls_out = uat::_(node_to_output).at_or_at(qcvtm, qmuls);
 
             // Reconnect MatMul to read from Convert(W) directly.
-            // Note: ACT is f32 so has to be converted too.
-            auto new_cvt_act = std::make_shared<ov::op::v0::Convert>(matched_node_mmi, ov::element::f16);
+            // Note: ACT has to be converted too.
+            auto cvt_prec = matched_node_cvtw->output(0).get_element_type();
+            auto new_cvt_act = std::make_shared<ov::op::v0::Convert>(matched_node_mmi, cvt_prec);
             matched_matmul->input(0).replace_source_output(new_cvt_act);
             matched_matmul->input(1).replace_source_output(matched_node_cvtw);
 
@@ -169,7 +174,7 @@ DQMatMulCWi::DQMatMulCWi() {
             // Introduce a Reshape to alter Scale factor's shape
             auto new_dims = std::vector<std::size_t>{qcoeff_shape[1], qcoeff_shape[0]};
             auto new_const = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, new_dims);
-            auto new_reshape = std::make_shared<ov::op::v1::Reshape>(matched_node_qcoeff, new_const, false);
+            auto new_reshape = std::make_shared<ov::op::v1::Reshape>(matched_node_qcoeff_out, new_const, false);
 
             // Reconnect Multiply's both inputs. Drop all outputs
             matched_node_muls->input(0).replace_source_output(matched_matmul);
@@ -178,16 +183,18 @@ DQMatMulCWi::DQMatMulCWi() {
                 matched_node_muls->output(0).remove_target_input(r);
             }
 
-            // Reconnect Convert(M) to convert the Multiply's result
-            matched_node_cvtm->input(0).replace_source_output(matched_node_muls);
+            // Reconnect Convert(M) to convert the Multiply's result (optional)
+            if (matched_node_muls_out != matched_node_muls) {
+                matched_node_muls_out.get_node()->input(0).replace_source_output(matched_node_muls);
+            }
 
             // Reconnect MatMul's old readers to Convert(Multiply)
             for (auto&& r : mm_readers) {
-                r.replace_source_output(matched_node_cvtm);
+                r.replace_source_output(matched_node_muls_out);
             }
+            return true;  // root has changed
         }
-
-        return true;  // root has changed
+        return false;  // root hasn't changed
     };
     register_matcher(std::make_shared<opp::Matcher>(qmm, "OptDQMatMulCWi"), std::move(callback));
 }
@@ -1461,6 +1468,97 @@ SliceLastMatmulMultiply::SliceLastMatmulMultiply() {
     register_matcher(std::make_shared<opp::Matcher>(res, "SliceLastMatmulMultiply"), std::move(callback));
 }
 
+// FROM:
+//     -> Transpose ------------------------------>
+//     Param --------> Convert(f32) --> Multiply -> Convolution -> Transpose ->
+//     Param/Const -> (Convert(f32)) ->
+//
+// TO:
+//     ------------------------------------------------------>
+//     Param -------> Reshape --> Convert(f32) --> Multiply -> MatMul ->
+//     Param/Const -> Reshape -> (Convert(f32)) ->
+//
+
+ConvToMatmul::ConvToMatmul(Context::Ref ctx) {
+    auto param = opp::wrap_type<ov::op::v0::Parameter>();
+    auto convert = opp::wrap_type<ov::op::v0::Convert>({param->output(0)});
+    auto param2 = opp::any_input();
+    auto convert2 = opp::optional<ov::op::v0::Convert>({param2->output(0)});
+    auto multiply = opp::wrap_type<ov::op::v1::Multiply>({convert, convert2});
+    auto tr_input = opp::any_input();
+    auto transpose_in = opp::wrap_type<ov::op::v1::Transpose>({tr_input, opp::any_input()});
+    auto conv = opp::wrap_type<ov::op::v1::Convolution>({transpose_in, multiply});
+    auto transpose_out = opp::wrap_type<ov::op::v1::Transpose>({conv, opp::any_input()});
+
+    // Note: Use [=] to make sure the above objects stay alive in the callback
+    auto callback = [=](ov::pass::pattern::Matcher& m) {
+        auto& node_to_output = m.get_pattern_value_map();
+
+        auto matched_node_param = node_to_output.at(param).get_node_shared_ptr();
+        auto matched_node_param2 = node_to_output.at(param2).get_node_shared_ptr();
+        auto matched_node_convert = node_to_output.at(convert).get_node_shared_ptr();
+        auto matched_node_tr_input = node_to_output.at(tr_input);
+        auto matched_node_transpose_in = node_to_output.at(transpose_in).get_node_shared_ptr();
+        auto matched_node_transpose_out = node_to_output.at(transpose_out).get_node_shared_ptr();
+        auto matched_node_multiply = node_to_output.at(multiply).get_node_shared_ptr();
+
+        const auto& shape = matched_node_param->get_shape();
+        const auto& shape2 = matched_node_param2->get_shape();
+        const auto& tr_in_shape = matched_node_transpose_in->input(0).get_shape();
+        const auto& tr_out_shape = matched_node_transpose_out->output(0).get_shape();
+
+        auto check_shape = [](const ov::Shape& shape) {
+            // last 2 dims are 1
+            return shape.size() == 4 && shape[2] == 1 && shape[3] == 1;
+        };
+
+        auto check_transpose_shape = [](const ov::Shape& shape) {
+            // first 2 dims are 1
+            return shape.size() == 4 && shape[0] == 1 && shape[1] == 1;
+        };
+
+        if ((matched_node_param->get_element_type() == ov::element::i4 ||
+             matched_node_param->get_element_type() == ov::element::i8) &&
+            (matched_node_param2->get_element_type() == ov::element::f32 ||
+             matched_node_param2->get_element_type() == ov::element::f16) &&
+            (ov::op::util::is_parameter(matched_node_param2) || ov::op::util::is_constant(matched_node_param2)) &&
+            check_shape(shape) && check_shape(shape2) && check_transpose_shape(tr_in_shape) &&
+            check_transpose_shape(tr_out_shape)) {
+            // Add Reshape before Params/Const
+            auto new_dims = std::vector<std::size_t>{shape[0], shape[1]};
+            auto new_const = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, new_dims);
+            auto new_reshape = std::make_shared<ov::op::v1::Reshape>(matched_node_param, new_const, false);
+            matched_node_convert->input(0).replace_source_output(new_reshape);
+            matched_node_convert->validate_and_infer_types();
+
+            auto new_dims2 = std::vector<std::size_t>{shape2[0], shape2[1]};
+            auto new_const2 = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, new_dims2);
+            auto new_reshape2 = std::make_shared<ov::op::v1::Reshape>(matched_node_param2, new_const2, false);
+
+            // Connect to Reshape
+            if (ov::op::util::is_parameter(matched_node_param2)) {
+                matched_node_multiply->input(1).replace_source_output(new_reshape2);
+                matched_node_multiply->validate_and_infer_types();
+            } else {  // constant -> convert -> multiply
+                node_to_output.at(convert2).get_node_shared_ptr()->input(0).replace_source_output(new_reshape2);
+                node_to_output.at(convert2).get_node_shared_ptr()->validate_and_infer_types();
+                matched_node_multiply->validate_and_infer_types();
+            }
+
+            // Get rid of Transposes
+            auto matmul =
+                std::make_shared<ov::op::v0::MatMul>(matched_node_tr_input, matched_node_multiply, false, true);
+
+            for (auto&& r : matched_node_transpose_out->output(0).get_target_inputs()) {
+                r.replace_source_output(matmul);
+            }
+            return true;  // root has changed
+        }
+        return false;  // root hasn't changed
+    };
+    register_matcher(std::make_shared<opp::Matcher>(transpose_out, "ConvToMatmul"), std::move(callback));
+}
+
 }  // namespace opt
 }  // namespace patterns
 }  // namespace npuw
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.hpp
index 323d443fa781f4..8bd4e173ff210a 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.hpp
@@ -170,6 +170,12 @@ class SliceLastMatmulMultiply : public ov::pass::MatcherPass {
     SliceLastMatmulMultiply();
 };
 
+// Convolution to MatMul
+class ConvToMatmul : public ov::pass::MatcherPass {
+public:
+    ConvToMatmul(Context::Ref ctx);
+};
+
 }  // namespace opt
 }  // namespace patterns
 }  // namespace npuw
diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.cpp b/src/plugins/intel_npu/src/plugin/npuw/util.cpp
index 99a53430295a89..a878b244bc41e9 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/util.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/util.cpp
@@ -9,6 +9,7 @@
 #include <openvino/core/parallel.hpp>
 #include <openvino/core/type/bfloat16.hpp>
 #include <openvino/core/type/float16.hpp>
+#include <openvino/core/type/nf4.hpp>
 #include <sstream>
 
 #include "logging.hpp"
@@ -50,6 +51,59 @@ inline uint8_t hi4(uint8_t x) {
 inline uint8_t lo4(uint8_t x) {
     return x & 0xF;
 }
+
+void unpack_nf4f16(const ov::SoPtr<ov::ITensor>& from,
+                   const ov::SoPtr<ov::ITensor>& scale,
+                   const ov::SoPtr<ov::ITensor>& to,
+                   const ov::npuw::util::UnpackOptions& unpack_options) {
+    auto from_shape = from->get_shape();
+    auto scale_shape = scale->get_shape();
+
+    NPUW_ASSERT(from->is_continuous());
+    NPUW_ASSERT(to->is_continuous());
+    NPUW_ASSERT(scale->is_continuous());
+    NPUW_ASSERT(from->get_size() == to->get_size());
+    NPUW_ASSERT(from_shape[0] == scale_shape[0]);
+
+    const auto* from_ptr = static_cast<const uint8_t*>(from->data());
+    const auto* scale_ptr = scale->data<ov::float16>();
+    auto* to_ptr = to->data<ov::float16>();
+
+    const auto size = from->get_size();
+    ov::parallel_for(size / 2, [&](size_t idx) {
+        const uint8_t nf4_2xval = from_ptr[idx];
+        const float low_scale = scale_ptr[(idx * 2) / from_shape[1]];
+        const float high_scale = scale_ptr[(idx * 2 + 1) / from_shape[1]];
+        to_ptr[idx * 2] = ov::ConvertNF4::dequantize(lo4(nf4_2xval)) * low_scale;
+        to_ptr[idx * 2 + 1] = ov::ConvertNF4::dequantize(hi4(nf4_2xval)) * high_scale;
+    });
+    if (size % 2 != 0) {
+        const float low_scale = scale_ptr[size - 1 / from_shape[1]];
+        to_ptr[size - 1] = ov::ConvertNF4::dequantize(lo4(from_ptr[size / 2 + 1])) * low_scale;
+    }
+}
+
+void unpack_nf4f16(const ov::SoPtr<ov::ITensor>& from,
+                   const ov::SoPtr<ov::ITensor>& to,
+                   const ov::npuw::util::UnpackOptions& unpack_options) {
+    NPUW_ASSERT(from->is_continuous());
+    NPUW_ASSERT(to->is_continuous());
+    NPUW_ASSERT(from->get_size() == to->get_size());
+
+    const auto* from_ptr = static_cast<const uint8_t*>(from->data());
+    auto* to_ptr = to->data<ov::float16>();
+
+    const auto size = from->get_size();
+    ov::parallel_for(size / 2, [&](size_t idx) {
+        const uint8_t nf4_2xval = from_ptr[idx];
+        to_ptr[idx * 2] = ov::ConvertNF4::dequantize(lo4(nf4_2xval));
+        to_ptr[idx * 2 + 1] = ov::ConvertNF4::dequantize(hi4(nf4_2xval));
+    });
+    if (size % 2 != 0) {
+        to_ptr[size - 1] = ov::ConvertNF4::dequantize(lo4(from_ptr[size / 2 + 1]));
+    }
+}
+
 }  // namespace
 
 ov::Tensor ov::npuw::util::tensor_from_const(const std::shared_ptr<ov::Node>& node) {
@@ -81,6 +135,12 @@ void ov::npuw::util::unpack(const ov::SoPtr<ov::ITensor>& from,
     auto type_from = from->get_element_type();
     auto type_to = to->get_element_type();
 
+    // FIXME: Move under common switch when XARCH::unpack is implemented
+    if (type_from == ov::element::nf4 && type_to == ov::element::f16) {
+        unpack_nf4f16(from, to, unpack_options);
+        return;
+    }
+
     namespace ove = ov::element;
 #define CAST(x)    static_cast<int>((x).operator ove::Type_t())
 #define PAIR(f, t) (CAST(f) << 16 | CAST(t))
@@ -128,6 +188,8 @@ void ov::npuw::util::unpack(const ov::SoPtr<ov::ITensor>& from,
         }
     } else if (type_from == ov::element::i8) {
         ov::npuw::util::XARCH::unpack_i8f16_scale(from, scale, to, unpack_options);
+    } else if (type_from == ov::element::nf4) {
+        unpack_nf4f16(from, scale, to, unpack_options);
     } else {
         NPUW_ASSERT(false && "Unsupported combination");
     }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.hpp b/src/plugins/intel_npu/src/plugin/npuw/util.hpp
index 4343316db5569f..7a942f0b6c6351 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/util.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/util.hpp
@@ -99,6 +99,15 @@ struct Impl {
         return iter->second;
     }
 
+    template <typename K>
+    V& at_or_at_or_at(const K& k1, const K& k2, const K& k3) {
+        const auto iter = m->find(k1);
+        if (iter == m->end()) {
+            return at_or_at(k2, k3);
+        }
+        return iter->second;
+    }
+
     template <typename K>
     const V& at(const K& k) const {
         return const_cast<Impl*>(this)->at(k);
@@ -108,6 +117,11 @@ struct Impl {
     const V& at_or_at(const K& k1, const K& k2) const {
         return const_cast<Impl*>(this)->at_or_at(k1, k2);
     }
+
+    template <typename K>
+    const V& at_or_at_or_at(const K& k1, const K& k2, const K& k3) const {
+        return const_cast<Impl*>(this)->at_or_at_or_at(k1, k2, k3);
+    }
 };
 
 template <typename M>
diff --git a/src/plugins/template/tests/functional/op_reference/group_convolution_backprop.cpp b/src/plugins/template/tests/functional/op_reference/group_convolution_backprop.cpp
index cc2162d4d95829..0e3c5de6289f99 100644
--- a/src/plugins/template/tests/functional/op_reference/group_convolution_backprop.cpp
+++ b/src/plugins/template/tests/functional/op_reference/group_convolution_backprop.cpp
@@ -104,14 +104,13 @@ class ReferenceGroupConvolutionBackpropDataLayerTest
       public CommonReferenceTest {
 public:
     void SetUp() override {
-        legacy_compare = true;
-        auto params = GetParam();
+        const auto& params = GetParam();
         function = CreateFunction(params);
         inputData = {params.inputData, params.filterData};
         refOutData = {params.refData};
     }
     static std::string getTestCaseName(const testing::TestParamInfo<GroupConvolutionBackpropDataParams>& obj) {
-        auto param = obj.param;
+        const auto& param = obj.param;
         std::ostringstream result;
         result << "inputShape=" << param.inputShape << "_";
         result << "filterShape=" << param.filterShape << "_";
@@ -163,14 +162,13 @@ class ReferenceGroupConvolutionBackpropDataLayerOutShapeTest
       public CommonReferenceTest {
 public:
     void SetUp() override {
-        legacy_compare = true;
-        auto params = GetParam();
+        const auto& params = GetParam();
         function = CreateFunction(params);
         inputData = {params.inputData, params.filterData};
         refOutData = {params.refData};
     }
     static std::string getTestCaseName(const testing::TestParamInfo<GroupConvolutionBackpropDataOutShapeParams>& obj) {
-        auto param = obj.param;
+        const auto& param = obj.param;
         std::ostringstream result;
         result << "inputShape=" << param.inputShape << "_";
         result << "filterShape=" << param.filterShape << "_";
diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_gru.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_gru.py
index 66b91e9d64daca..fad5c188d38d7f 100644
--- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_gru.py
+++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_gru.py
@@ -1,23 +1,30 @@
 # Copyright (C) 2022-2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import numpy as np
 import pytest
 import tensorflow as tf
-
 from common.tf2_layer_test_class import CommonTF2LayerTest
 
+rng = np.random.default_rng(233534)
+
 
 class TestKerasGru(CommonTF2LayerTest):
-    def create_keras_gru_net(self, input_names, input_shapes, input_type, units, activation,
-                             recurrent_activation,
-                             use_bias, dropouts, flags, ir_version):
-        """
-                create TensorFlow 2 model with Keras GRU operation
-        """
+    def _prepare_input(self, inputs_info):
+        assert 'x' in inputs_info, "Test error: inputs_info must contain `x`"
+        x_shape = inputs_info['x']
+        inputs_data = {}
+        inputs_data['x'] = rng.uniform(-2.0, 2.0, x_shape).astype(self.input_type)
+        return inputs_data
+
+    def create_keras_gru_net(self, input_shapes, input_type, units,
+                             activation, recurrent_activation,
+                             dropouts, use_bias, flag1, flag2):
+        self.input_type = input_type
         tf.keras.backend.clear_session()  # For easy reset of notebook state
-        x1 = tf.keras.Input(shape=input_shapes[0][1:], name=input_names[0])
+        x1 = tf.keras.Input(shape=input_shapes[0][1:], dtype=input_type, name='x')
         dropout, recurrent_dropout = dropouts
-        go_backwards, reset_after = flags
+        go_backwards, reset_after = flag1, flag2
         y = tf.keras.layers.GRU(units=units, activation=activation,
                                 recurrent_activation=recurrent_activation,
                                 use_bias=use_bias, dropout=dropout,
@@ -25,111 +32,30 @@ def create_keras_gru_net(self, input_names, input_shapes, input_type, units, act
                                 return_sequences=False, return_state=False,
                                 go_backwards=go_backwards, reset_after=reset_after)(x1)
         tf2_net = tf.keras.Model(inputs=[x1], outputs=[y])
-
-        # TODO: add reference IR net. Now it is omitted since inference is more
-        #  important and needs to be checked in the first
         ref_net = None
 
         return tf2_net, ref_net
 
-    test_data_simple = [
-        dict(input_names=["x"], input_shapes=[[2, 2, 3]], input_type=tf.float32, units=1,
-             activation='tanh', recurrent_activation='sigmoid', dropouts=(.0, .3), use_bias=True,
-             flags=(False, False)),
-        dict(input_names=["x"], input_shapes=[[1, 2, 3]], input_type=tf.float32, units=4,
-             activation='relu', recurrent_activation='sigmoid', dropouts=(.2, .4), use_bias=True,
-             flags=(False, False)),
-        dict(input_names=["x"], input_shapes=[[3, 2, 3]], input_type=tf.float32, units=2,
-             activation='elu', recurrent_activation='tanh', dropouts=(.3, .5), use_bias=True,
-             flags=(False, False)),
-        dict(input_names=["x"], input_shapes=[[2, 3, 4]], input_type=tf.float32, units=1,
-             activation='elu', recurrent_activation='softmax', dropouts=(.0, .5), use_bias=True,
-             flags=(False, False)),
-        dict(input_names=["x"], input_shapes=[[1, 3, 4]], input_type=tf.float32, units=3,
-             activation='linear', recurrent_activation='sigmoid', dropouts=(.4, .6),
-             flags=(False, False), use_bias=True)
-    ]
-
-    @pytest.mark.parametrize("params", test_data_simple)
-    @pytest.mark.nightly
-    @pytest.mark.precommit
-    def test_keras_gru_with_bias_float32(self, params, ie_device, precision, temp_dir, ir_version,
-                                         use_legacy_frontend):
-        self._test(*self.create_keras_gru_net(**params, ir_version=ir_version),
-                   ie_device, precision, temp_dir=temp_dir, ir_version=ir_version,
-                   use_legacy_frontend=use_legacy_frontend, **params)
-
-    test_data_without_bias = [
-        dict(input_names=["x"], input_shapes=[[2, 2, 7]], input_type=tf.float32, units=1,
-             activation='tanh', recurrent_activation='sigmoid', dropouts=(.0, .3), use_bias=False,
-             flags=(False, False)),
-        dict(input_names=["x"], input_shapes=[[3, 8, 3]], input_type=tf.float32, units=4,
-             activation='relu', recurrent_activation='sigmoid', dropouts=(.7, .4), use_bias=False,
-             flags=(False, False)),
-        dict(input_names=["x"], input_shapes=[[4, 2, 2]], input_type=tf.float32, units=2,
-             activation='elu', recurrent_activation='tanh', dropouts=(.0, .5), use_bias=False,
-             flags=(False, False))
-    ]
-
-    @pytest.mark.parametrize("params", test_data_without_bias)
-    @pytest.mark.nightly
-    @pytest.mark.precommit
-    def test_keras_gru_without_bias_float32(self, params, ie_device, precision, temp_dir,
-                                            ir_version, use_legacy_frontend):
-        self._test(*self.create_keras_gru_net(**params, ir_version=ir_version),
-                   ie_device, precision, temp_dir=temp_dir, ir_version=ir_version,
-                   use_legacy_frontend=use_legacy_frontend, **params)
-
-    test_data_different_flags = [
-        dict(input_names=["x"], input_shapes=[[2, 3, 2]], input_type=tf.float32, units=1,
-             activation='elu', recurrent_activation='sigmoid', dropouts=(.0, .3), use_bias=True,
-             flags=(True, False)),
-        dict(input_names=["x"], input_shapes=[[4, 8, 3]], input_type=tf.float32, dropouts=(.1, .3),
-             units=3, activation='relu', use_bias=False, recurrent_activation='tanh',
-             flags=(False, True)),
-        dict(input_names=["x"], input_shapes=[[4, 2, 7]], input_type=tf.float32, units=5,
-             activation='relu', recurrent_activation='tanh', dropouts=(.2, .6),
-             use_bias=True, flags=(False, False)),
-        dict(input_names=["x"], input_shapes=[[4, 16, 2]], input_type=tf.float32, units=5,
-             activation='relu', recurrent_activation='tanh', dropouts=(.2, .6),
-             use_bias=True, flags=(False, True)),
-        dict(input_names=["x"], input_shapes=[[4, 8, 7]], input_type=tf.float32, units=5,
-             activation='elu', recurrent_activation='sigmoid', dropouts=(.2, .6),
-             use_bias=True, flags=(True, True)),
-    ]
-
-    @pytest.mark.parametrize("params", test_data_different_flags)
-    @pytest.mark.nightly
-    @pytest.mark.precommit
-    @pytest.mark.xfail(reason="sporadic inference mismatch")
-    def test_keras_gru_flags_float32(self, params, ie_device, precision, temp_dir, ir_version,
-                                     use_legacy_frontend):
-        self._test(*self.create_keras_gru_net(**params, ir_version=ir_version),
-                   ie_device, precision, temp_dir=temp_dir, ir_version=ir_version,
-                   use_legacy_frontend=use_legacy_frontend, **params)
-
-    test_data_zero_recurrent_dropout = [
-        dict(input_names=["x"], input_shapes=[[8, 2, 3]], input_type=tf.float32, units=3,
-             activation='elu', recurrent_activation='tanh', dropouts=(.7, .0), use_bias=True,
-             flags=(False, False)),
-        dict(input_names=["x"], input_shapes=[[4, 8, 5]], input_type=tf.float32, dropouts=(.6, .0),
-             units=2, activation='elu', use_bias=True, recurrent_activation='tanh',
-             flags=(False, False)),
-        dict(input_names=["x"], input_shapes=[[4, 3, 1]], input_type=tf.float32, units=8,
-             activation='elu', recurrent_activation='tanh', dropouts=(.5, .0),
-             use_bias=True, flags=(True, False)),
-        dict(input_names=["x"], input_shapes=[[3, 4, 2]], input_type=tf.float32, units=3,
-             activation='elu', recurrent_activation='tanh', dropouts=(.7, .0), use_bias=True,
-             flags=(True, False)),
-    ]
-
-    @pytest.mark.parametrize("params", test_data_zero_recurrent_dropout)
+    @pytest.mark.parametrize('input_shapes', [[[2, 3, 4]]])
+    @pytest.mark.parametrize('input_type', [np.float32, np.float64])
+    @pytest.mark.parametrize('units', [1, 2, 3])
+    @pytest.mark.parametrize('activation', ['tanh', 'relu', 'elu', 'linear'])
+    @pytest.mark.parametrize('recurrent_activation', ['sigmoid', 'tanh', 'softmax'])
+    @pytest.mark.parametrize('dropouts', [(.0, .0), (.0, .3), (.2, .4), ])
+    @pytest.mark.parametrize('use_bias', [True, False])
+    @pytest.mark.parametrize('flag1', [True, False])
+    @pytest.mark.parametrize('flag2', [True, False])
     @pytest.mark.nightly
     @pytest.mark.precommit
-    @pytest.mark.xfail(reason="50176")
-    def test_keras_gru_flags_zero_recurrent_dropout_float32(self, params, ie_device, precision,
-                                                            temp_dir, ir_version,
-                                                            use_legacy_frontend):
-        self._test(*self.create_keras_gru_net(**params, ir_version=ir_version),
+    def test_keras_gru(self, input_shapes, input_type, units,
+                       activation, recurrent_activation,
+                       dropouts, use_bias, flag1, flag2,
+                       ie_device, precision, temp_dir, ir_version,
+                       use_legacy_frontend):
+        params = {}
+        params['input_shapes'] = input_shapes
+        self._test(*self.create_keras_gru_net(input_shapes, input_type, units,
+                                              activation, recurrent_activation,
+                                              dropouts, use_bias, flag1, flag2),
                    ie_device, precision, temp_dir=temp_dir, ir_version=ir_version,
                    use_legacy_frontend=use_legacy_frontend, **params)
diff --git a/tests/samples_tests/smoke_tests/test_benchmark_app.py b/tests/samples_tests/smoke_tests/test_benchmark_app.py
old mode 100644
new mode 100755
index f9b37e87614d42..3be4f4b88eaab8
--- a/tests/samples_tests/smoke_tests/test_benchmark_app.py
+++ b/tests/samples_tests/smoke_tests/test_benchmark_app.py
@@ -38,13 +38,16 @@ def create_random_4bit_bin_file(tmp_path, shape, name):
         f.write(raw_data)
 
 
-def verify(sample_language, device, api=None, nireq=None, shape=None, data_shape=None, nstreams=None, layout=None, pin=None, cache=None, tmp_path=None, model='bvlcalexnet-12.onnx', inp='dog-224x224.bmp', batch='1', niter='10', tm=None):
+def verify(sample_language, device, api=None, nireq=None, shape=None, data_shape=None, nstreams=None,
+           layout=None, pin=None, cache=None, tmp_path=None, model='bvlcalexnet-12.onnx',
+           inp='dog-224x224.bmp', batch='1', niter='10', max_irate=None, tm=None):
     output = get_cmd_output(
         get_executable(sample_language),
         *prepend(cache, inp, model, tmp_path),
         *('-nstreams', nstreams) if nstreams else '',
         *('-layout', layout) if layout else '',
         *('-nireq', nireq) if nireq else '',
+        *('-max_irate', max_irate) if max_irate else '',
         *('-shape', shape) if shape else '',
         *('-data_shape', data_shape) if data_shape else '',
         *('-hint', 'none') if nstreams or pin else '',
@@ -84,6 +87,13 @@ def test_nireq(sample_language, api, nireq, device, cache, tmp_path):
     verify(sample_language, device, api=api, nireq=nireq, cache=cache, tmp_path=tmp_path)
 
 
+@pytest.mark.parametrize('sample_language', ['C++', 'Python'])
+@pytest.mark.parametrize('max_irate', ['', '0', '10'])
+@pytest.mark.parametrize('device', get_devices())
+def test_max_irate(sample_language, device, max_irate, cache, tmp_path):
+    verify(sample_language, device, max_irate=max_irate, cache=cache, tmp_path=tmp_path)
+
+
 @pytest.mark.skipif('CPU' not in get_devices(), reason='affinity is a CPU property')
 @pytest.mark.parametrize('sample_language', ['C++', 'Python'])
 @pytest.mark.parametrize('pin', ['YES', 'NO', 'NUMA', 'HYBRID_AWARE'])
diff --git a/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py b/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py
index adba697b598b4a..fb6f5a8ecd7a6d 100644
--- a/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import os
+import time
 from datetime import datetime
 from math import ceil
 from openvino.runtime import Core, get_version, AsyncInferQueue
@@ -15,7 +16,8 @@ def percentile(values, percent):
 
 class Benchmark:
     def __init__(self, device: str, number_infer_requests: int = 0, number_iterations: int = None,
-                 duration_seconds: int = None, api_type: str = 'async', inference_only = None):
+                 duration_seconds: int = None, api_type: str = 'async', inference_only = None,
+                 maximum_inference_rate: float = 0):
         self.device = device
         self.core = Core()
         self.nireq = number_infer_requests if api_type == 'async' else 1
@@ -24,6 +26,7 @@ def __init__(self, device: str, number_infer_requests: int = 0, number_iteration
         self.api_type = api_type
         self.inference_only = inference_only
         self.latency_groups = []
+        self.max_irate = maximum_inference_rate
 
     def __del__(self):
         del self.core
@@ -83,13 +86,21 @@ def first_infer(self, requests):
             requests.wait_all()
             return requests[id].latency
 
+    def inference_rate_delay(self, processed_frames, exec_time):
+        if self.max_irate > 0:
+            nextRunFinishTime = 1 / self.max_irate * processed_frames
+            delay = nextRunFinishTime - exec_time
+            time.sleep(delay if delay > 0 else 0)
+
     def sync_inference(self, request, data_queue):
+        processed_frames = 0
         exec_time = 0
         iteration = 0
         times = []
         start_time = datetime.utcnow()
         while (self.niter and iteration < self.niter) or \
               (self.duration_seconds and exec_time < self.duration_seconds):
+            processed_frames += data_queue.get_next_batch_size()
             if self.inference_only == False:
                 request.set_input_tensors(data_queue.get_next_input())
             request.infer()
@@ -97,10 +108,12 @@ def sync_inference(self, request, data_queue):
             iteration += 1
 
             exec_time = (datetime.utcnow() - start_time).total_seconds()
+            self.inference_rate_delay(processed_frames, exec_time)
         total_duration_sec = (datetime.utcnow() - start_time).total_seconds()
         return sorted(times), total_duration_sec, iteration
 
-    def async_inference_only(self, infer_queue):
+    def async_inference_only(self, infer_queue, data_queue):
+        processed_frames = 0
         exec_time = 0
         iteration = 0
         times = []
@@ -109,6 +122,7 @@ def async_inference_only(self, infer_queue):
         while (self.niter and iteration < self.niter) or \
               (self.duration_seconds and exec_time < self.duration_seconds) or \
               (iteration % self.nireq):
+            processed_frames += data_queue.get_next_batch_size()
             idle_id = infer_queue.get_idle_request_id()
             if idle_id in in_fly:
                 times.append(infer_queue[idle_id].latency)
@@ -118,6 +132,8 @@ def async_inference_only(self, infer_queue):
             iteration += 1
 
             exec_time = (datetime.utcnow() - start_time).total_seconds()
+            self.inference_rate_delay(processed_frames, exec_time)
+
         infer_queue.wait_all()
         total_duration_sec = (datetime.utcnow() - start_time).total_seconds()
         for infer_request_id in in_fly:
@@ -149,6 +165,7 @@ def async_inference_full_mode(self, infer_queue, data_queue, pcseq):
             iteration += 1
 
             exec_time = (datetime.utcnow() - start_time).total_seconds()
+            self.inference_rate_delay(processed_frames, exec_time)
         infer_queue.wait_all()
         total_duration_sec = (datetime.utcnow() - start_time).total_seconds()
         
@@ -164,7 +181,7 @@ def main_loop(self, requests, data_queue, batch_size, latency_percentile, pcseq)
             times, total_duration_sec, iteration = self.sync_inference(requests[0], data_queue)
             fps = len(batch_size) * iteration / total_duration_sec
         elif self.inference_only:
-            times, total_duration_sec, iteration = self.async_inference_only(requests)
+            times, total_duration_sec, iteration = self.async_inference_only(requests, data_queue)
             fps = len(batch_size) * iteration / total_duration_sec
         else:
             times, total_duration_sec, processed_frames, iteration = self.async_inference_full_mode(requests, data_queue, pcseq)
diff --git a/tools/benchmark_tool/openvino/tools/benchmark/main.py b/tools/benchmark_tool/openvino/tools/benchmark/main.py
old mode 100644
new mode 100755
index c77b50a7fd4721..acec4d17bdc377
--- a/tools/benchmark_tool/openvino/tools/benchmark/main.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/main.py
@@ -88,7 +88,8 @@ def is_flag_set_in_command_line(flag):
         next_step(step_id=2)
 
         benchmark = Benchmark(args.target_device, args.number_infer_requests,
-                              args.number_iterations, args.time, args.api_type, args.inference_only)
+                              args.number_iterations, args.time, args.api_type,
+                              args.inference_only, args.maximum_inference_rate)
 
         if args.extensions:
             benchmark.add_extension(path_to_extensions=args.extensions)
diff --git a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py
index aa79767cecc397..dac2b1490bf534 100644
--- a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py
@@ -72,6 +72,10 @@ def parse_args():
     args.add_argument('-niter', '--number_iterations', type=check_positive, required=False, default=None,
                       help='Optional. Number of iterations. '
                            'If not specified, the number of iterations is calculated depending on a device.')
+    args.add_argument('-max_irate', '--maximum_inference_rate', type=float, required=False, default=0,
+                      help='Optional. Maximum inference rate by frame per second. '
+                           'If not specified, default value is 0, the inference will run at maximium rate depending on a device capabilities. '
+                           'Tweaking this value allow better accuracy in power usage measurement by limiting the execution.')
     args.add_argument('-t', '--time', type=check_positive, required=False, default=None,
                       help='Optional. Time in seconds to execute topology.')