diff --git a/.github/workflows/_helm-e2e.yaml b/.github/workflows/_helm-e2e.yaml
index 0062127b0..9deff4af6 100644
--- a/.github/workflows/_helm-e2e.yaml
+++ b/.github/workflows/_helm-e2e.yaml
@@ -65,7 +65,7 @@ jobs:
           echo "CHART_NAME=$CHART_NAME" >> $GITHUB_ENV
           echo "RELEASE_NAME=${CHART_NAME}$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
           echo "NAMESPACE=${CHART_NAME}-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
-          echo "ROLLOUT_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV
+          echo "ROLLOUT_TIMEOUT_SECONDS=1200s" >> $GITHUB_ENV
           echo "TEST_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV
           echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
           echo "should_cleanup=false" >> $GITHUB_ENV
diff --git a/helm-charts/chatqna/Chart.yaml b/helm-charts/chatqna/Chart.yaml
index 7d22d2811..e9a0d00fa 100644
--- a/helm-charts/chatqna/Chart.yaml
+++ b/helm-charts/chatqna/Chart.yaml
@@ -18,6 +18,19 @@ dependencies:
   - name: tgi
     version: 1.0.0
     repository: "file://../common/tgi"
+    condition: tgi.enabled
+  - name: vllm
+    version: 1.0.0
+    repository: "file://../common/vllm"
+    condition: vllm.enabled
+  - name: llm-uservice
+    version: 1.0.0
+    repository: "file://../common/llm-uservice"
+    condition: tgi.enabled
+  - name: llm-ctrl-uservice
+    version: 1.0.0
+    repository: "file://../common/llm-ctrl-uservice"
+    condition: vllm.enabled
   - name: tei
     version: 1.0.0
     repository: "file://../common/tei"
diff --git a/helm-charts/chatqna/README.md b/helm-charts/chatqna/README.md
index a591ff1e6..e329c105f 100644
--- a/helm-charts/chatqna/README.md
+++ b/helm-charts/chatqna/README.md
@@ -9,37 +9,91 @@ Helm chart for deploying ChatQnA service. ChatQnA depends on the following servi
 - [redis-vector-db](../common/redis-vector-db/README.md)
 - [reranking-usvc](../common/reranking-usvc/README.md)
 - [teirerank](../common/teirerank/README.md)
-- [llm-uservice](../common/llm-uservice/README.md)
-- [tgi](../common/tgi/README.md)
+
+For LLM inference, two more microservices will be required. We can either use [TGI](https://github.com/huggingface/text-generation-inference) or [vLLM](https://github.com/vllm-project/vllm) as our LLM backend. Depending on that, we will have following microservices as part of dependencies for ChatQnA application.
+
+1. For using **TGI** as an inference service, following 2 microservices will be required:
+
+   - [llm-uservice](../common/llm-uservice/README.md)
+   - [tgi](../common/tgi/README.md)
+
+2. For using **vLLM** as an inference service, following 2 microservices would be required:
+
+   - [llm-ctrl-uservice](../common/llm-ctrl-uservice/README.md)
+   - [vllm](../common/vllm/README.md)
+
+> **_NOTE :_** We shouldn't have both inference engine deployed. It is required to only setup either of them. To achieve this, conditional flags are added in the chart dependency. We will be switching off flag corresponding to one service and switching on the other, in order to have a proper setup of all ChatQnA dependencies.
 
 ## Installing the Chart
 
-To install the chart, run the following:
+Please follow the following steps to install the ChatQnA Chart:
+
+1. Clone the GenAIInfra repository:
+
+```bash
+git clone https://github.com/opea-project/GenAIInfra.git
+```
+
+2. Setup the dependencies and required environment variables:
 
-```console
+```bash
 cd GenAIInfra/helm-charts/
 ./update_dependency.sh
 helm dependency update chatqna
 export HFTOKEN="insert-your-huggingface-token-here"
 export MODELDIR="/mnt/opea-models"
 export MODELNAME="Intel/neural-chat-7b-v3-3"
+```
+
+3. Depending on the device which we are targeting for running ChatQnA, please use one the following installation commands:
+
+```bash
+# Install the chart on a Xeon machine
+
 # If you would like to use the traditional UI, please change the image as well as the containerport within the values
 # append these at the end of the command "--set chatqna-ui.image.repository=opea/chatqna-ui,chatqna-ui.image.tag=latest,chatqna-ui.containerPort=5173"
+
 helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME}
+```
+
+```bash
 # To use Gaudi device
-#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-values.yaml
+helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-values.yaml
+```
+
+```bash
 # To use Nvidia GPU
-#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/nv-values.yaml
+helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/nv-values.yaml
+```
+
+```bash
 # To include guardrail component in chatqna on Xeon
-#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-values.yaml
+helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-values.yaml
+```
+
+```bash
 # To include guardrail component in chatqna on Gaudi
-#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-gaudi-values.yaml
+helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-gaudi-values.yaml
+```
+
+> **_NOTE :_** Default installation will use [TGI (Text Generation Inference)](https://github.com/huggingface/text-generation-inference) as inference engine. To use vLLM as inference engine, please see below.
+
+```bash
+# To use vLLM inference engine on XEON device
+
+helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-ctrl-uservice.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} --set tgi.enabled=false --set vllm.enabled=true
+
+# To use OpenVINO optimized vLLM inference engine on XEON device
+
+helm install -f ./chatqna/vllm-openvino-values.yaml chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-ctrl-uservice.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} --set tgi.enabled=false --set vllm.enabled=true
 ```
 
 ### IMPORTANT NOTE
 
 1. Make sure your `MODELDIR` exists on the node where your workload is scheduled so you can cache the downloaded model for next time use. Otherwise, set `global.modelUseHostPath` to 'null' if you don't want to cache the model.
 
+2. Please set `http_proxy`, `https_proxy` and `no_proxy` values while installing chart, if you are behind a proxy.
+
 ## Verify
 
 To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
@@ -52,8 +106,9 @@ Run the command `kubectl port-forward svc/chatqna 8888:8888` to expose the servi
 
 Open another terminal and run the following command to verify the service if working:
 
-```console
+```bash
 curl http://localhost:8888/v1/chatqna \
+    -X POST \
     -H "Content-Type: application/json" \
     -d '{"messages": "What is the revenue of Nike in 2023?"}'
 ```
@@ -71,12 +126,13 @@ Open a browser to access `http://<k8s-node-ip-address>:${port}` to play with the
 
 ## Values
 
-| Key               | Type   | Default                       | Description                                                                            |
-| ----------------- | ------ | ----------------------------- | -------------------------------------------------------------------------------------- |
-| image.repository  | string | `"opea/chatqna"`              |                                                                                        |
-| service.port      | string | `"8888"`                      |                                                                                        |
-| tgi.LLM_MODEL_ID  | string | `"Intel/neural-chat-7b-v3-3"` | Models id from https://huggingface.co/, or predownloaded model directory               |
-| global.monitoring | bop;   | false                         | Enable usage metrics for the service components. See ../monitoring.md before enabling! |
+| Key                        | Type   | Default                       | Description                                                                            |
+| -------------------------- | ------ | ----------------------------- | -------------------------------------------------------------------------------------- |
+| image.repository           | string | `"opea/chatqna"`              |                                                                                        |
+| service.port               | string | `"8888"`                      |                                                                                        |
+| tgi.LLM_MODEL_ID           | string | `"Intel/neural-chat-7b-v3-3"` | Models id from https://huggingface.co/, or predownloaded model directory               |
+| vllm-openvino.LLM_MODEL_ID | string | `"Intel/neural-chat-7b-v3-3"` | Models id from https://huggingface.co/, or predownloaded model directory               |
+| global.monitoring          | bop;   | false                         | Enable usage metrics for the service components. See ../monitoring.md before enabling! |
 
 ## Troubleshooting
 
diff --git a/helm-charts/chatqna/ci-vllm-openvino-values.yaml b/helm-charts/chatqna/ci-vllm-openvino-values.yaml
new file mode 100644
index 000000000..653953d3d
--- /dev/null
+++ b/helm-charts/chatqna/ci-vllm-openvino-values.yaml
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  enabled: false
+
+vllm:
+  enabled: true
+  openvino_enabled: true
+  image:
+    repository: opea/vllm-openvino
+    pullPolicy: IfNotPresent
+    # Overrides the image tag whose default is the chart appVersion.
+    tag: "latest"
+
+  extraCmdArgs: []
+
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+
+  CUDA_GRAPHS: "0"
+  VLLM_CPU_KVCACHE_SPACE: 50
+  VLLM_OPENVINO_KVCACHE_SPACE: 32
+  OMPI_MCA_btl_vader_single_copy_mechanism: none
+
+  ov_command: ["/bin/bash"]
diff --git a/helm-charts/chatqna/ci-vllm-values.yaml b/helm-charts/chatqna/ci-vllm-values.yaml
new file mode 100644
index 000000000..d16040d28
--- /dev/null
+++ b/helm-charts/chatqna/ci-vllm-values.yaml
@@ -0,0 +1,8 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tgi:
+  enabled: false
+
+vllm:
+  enabled: true
diff --git a/helm-charts/chatqna/templates/deployment.yaml b/helm-charts/chatqna/templates/deployment.yaml
index 812d38486..a1e5e11d3 100644
--- a/helm-charts/chatqna/templates/deployment.yaml
+++ b/helm-charts/chatqna/templates/deployment.yaml
@@ -33,12 +33,25 @@ spec:
       containers:
         - name: {{ .Release.Name }}
           env:
+            {{- if .Values.vllm.enabled }}
+            - name: LLM_SERVICE_HOST_IP
+              value: {{ .Release.Name }}-llm-ctrl-uservice
+            - name: LLM_SERVER_HOST_IP
+              value: {{ .Release.Name }}-vllm
+            - name: LLM_MODEL
+              value: {{ .Values.vllm.LLM_MODEL_ID | quote }}
+            {{- else }}
+            - name: LLM_SERVICE_HOST_IP
+              value: {{ .Release.Name }}-llm-uservice
             - name: LLM_SERVER_HOST_IP
               value: {{ .Release.Name }}-tgi
-            - name: LLM_SERVER_PORT
-              value: "80"
             - name: LLM_MODEL
               value: {{ .Values.tgi.LLM_MODEL_ID | quote }}
+            {{- end }}
+            - name: RERANK_SERVICE_HOST_IP
+              value: {{ .Release.Name }}-reranking-usvc
+            - name: LLM_SERVER_PORT
+              value: "80"
             - name: RERANK_SERVER_HOST_IP
               value: {{ .Release.Name }}-teirerank
             - name: RERANK_SERVER_PORT
diff --git a/helm-charts/chatqna/values.yaml b/helm-charts/chatqna/values.yaml
index d0f64f3e0..182cfc656 100644
--- a/helm-charts/chatqna/values.yaml
+++ b/helm-charts/chatqna/values.yaml
@@ -22,6 +22,14 @@ nginx:
   service:
     type: NodePort
 
+imagePullSecrets: []
+
+podAnnotations: {}
+
+podSecurityContext: {}
+
+resources: {}
+
 securityContext:
   readOnlyRootFilesystem: true
   allowPrivilegeEscalation: false
@@ -47,6 +55,14 @@ horizontalPodAutoscaler:
 # Override values in specific subcharts
 tgi:
   LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  enabled: true
+
+vllm:
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  enabled: false
+
+llm-ctrl-uservice:
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
 
 # disable guardrails-usvc by default
 # See guardrails-values.yaml for guardrail related options
@@ -66,9 +82,9 @@ global:
   https_proxy: ""
   no_proxy: ""
   HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+
   # set modelUseHostPath or modelUsePVC to use model cache.
   modelUseHostPath: ""
-  # modelUseHostPath: /mnt/opea-models
   # modelUsePVC: model-volume
 
   # Install Prometheus serviceMonitors for service components
diff --git a/helm-charts/chatqna/vllm-openvino-values.yaml b/helm-charts/chatqna/vllm-openvino-values.yaml
new file mode 100644
index 000000000..4097b0ee4
--- /dev/null
+++ b/helm-charts/chatqna/vllm-openvino-values.yaml
@@ -0,0 +1,21 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+vllm:
+  openvino_enabled: true
+  image:
+    repository: opea/vllm-openvino
+    pullPolicy: IfNotPresent
+    # Overrides the image tag whose default is the chart appVersion.
+    tag: "latest"
+
+  extraCmdArgs: []
+
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+
+  CUDA_GRAPHS: "0"
+  VLLM_CPU_KVCACHE_SPACE: 50
+  VLLM_OPENVINO_KVCACHE_SPACE: 32
+  OMPI_MCA_btl_vader_single_copy_mechanism: none
+
+  ov_command: ["/bin/bash"]
diff --git a/helm-charts/common/llm-ctrl-uservice/.helmignore b/helm-charts/common/llm-ctrl-uservice/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/common/llm-ctrl-uservice/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/common/llm-ctrl-uservice/Chart.yaml b/helm-charts/common/llm-ctrl-uservice/Chart.yaml
new file mode 100644
index 000000000..bb10012ea
--- /dev/null
+++ b/helm-charts/common/llm-ctrl-uservice/Chart.yaml
@@ -0,0 +1,14 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: llm-ctrl-uservice
+description: A Helm chart for LLM controller microservice which connects with vLLM microservice to provide inferences.
+type: application
+version: 1.0.0
+appVersion: "v1.0"
+dependencies:
+  - name: vllm
+    version: 1.0.0
+    repository: file://../vllm
+    condition: vllm.enabled
diff --git a/helm-charts/common/llm-ctrl-uservice/README.md b/helm-charts/common/llm-ctrl-uservice/README.md
new file mode 100644
index 000000000..b74190ca3
--- /dev/null
+++ b/helm-charts/common/llm-ctrl-uservice/README.md
@@ -0,0 +1,91 @@
+# llm-ctrl Microservice
+
+Helm chart for deploying LLM controller microservice which facilitates connections and handles responses from OpenVINO vLLM microservice.
+
+`llm-ctrl-uservice` depends on vLLM microservice. You should properly set `vLLM_ENDPOINT` as the HOST URI of vLLM microservice. If not set, it will consider the default value : `http://<helm-release-name>-vllm:80`
+
+As this service depends on vLLM microservice, we can proceed in either of 2 ways:
+
+- Install both microservices individually.
+- Install the vLLM microservice as dependency for `llm-ctrl-uservice` microservice.
+
+## (Option 1): Installing the charts individually:
+
+First, you need to install the `vllm` chart, please refer to the [vllm](../vllm) chart for more information.
+
+After you've deployed the `vllm` chart successfully, please run `kubectl get svc` to get the vLLM service name with port. We need to provide this to `llm-ctrl-uservice` as a value for vLLM_ENDPOINT for letting it discover and connect to the vLLM microservice.
+
+> **_NOTE:_** While installing charts separately, if you don't provide any vLLM endpoint explicitly, it will take the default endpoint as `http://<helm-release-name>-vllm:80`. So, if you are not providing the vLLM endpoint explicitly, please make sure to provide same helm release name to both the charts while installing.
+
+Get the service name for vLLM deployment by running: `kubectl get svc`. In the current case, service name would be `myvllm`.
+
+> **_NOTE:_** Please add the service name for vLLM to the value of no_proxy env var, if you are behind a proxy.
+
+To install the chart, run the following:
+
+```bash
+cd GenAIInfra/helm-charts/common/llm-ctrl-uservice
+export HFTOKEN="insert-your-huggingface-token-here"
+export vLLM_ENDPOINT="http://myvllm"
+export MODELNAME="Intel/neural-chat-7b-v3-3"
+
+# If proxy is required, please export the appropriate proxy values.
+export http_proxy=<your_http_proxy>
+export https_proxy=<your_https_proxy>
+
+helm dependency update
+helm install llm-ctrl-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set vLLM_ENDPOINT=${vLLM_ENDPOINT} --set LLM_MODEL_ID=${MODELNAME} --set global.http_proxy=${http_proxy} --set global.https_proxy=${https_proxy} --wait
+```
+
+## (Option 2): Installing the chart with automatic installation of dependency:
+
+```bash
+cd GenAIInfra/helm-charts/common/llm-ctrl-uservice
+export HFTOKEN="insert-your-huggingface-token-here"
+export MODELDIR="/mnt/opea-models"
+export MODELNAME="Intel/neural-chat-7b-v3-3"
+
+# If proxy is required, please export the appropriate proxy values.
+export http_proxy=<your_http_proxy>
+export https_proxy=<your_https_proxy>
+
+helm dependency update
+helm install llm-ctrl-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} --set vllm.enabled=true --set global.http_proxy=${http_proxy} --set global.https_proxy=${https_proxy} --wait
+```
+
+`--wait` flag in above installation command will make sure that all the dependencies are resolved and all services are deployed.
+
+## Verify
+
+To verify the installation, run the following command to make sure all pods are running.
+
+```bash
+kubectl get pod
+```
+
+Once you see `llm-ctrl-uservice` pod and `llm-ctrl-uservice-vllm` pod in ready and running state, run the following command:
+
+```bash
+kubectl port-forward svc/llm-ctrl-uservice 9000:9000
+```
+
+This exposes the port 9000, on which `llm-ctrl-uservice` is running inside the pod, at port 9000 on the host.
+
+Now, we can access the service from the host machine. Open another terminal and run the following command to verify whether `llm-ctrl-uservice` is working:
+
+```bash
+curl http://localhost:9000/v1/chat/completions \
+    -X POST \
+    -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+    -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                             | Type   | Default              | Description                                                                                                                                                   |
+| ------------------------------- | ------ | -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `""`                 | Your own Hugging Face API token                                                                                                                               |
+| global.modelUseHostPath         | string | `"/mnt/opea-models"` | Cached models directory, vLLM will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory |
+| image.repository                | string | `"opea/llm-vllm"`    |                                                                                                                                                               |
+| service.port                    | string | `"9000"`             |                                                                                                                                                               |
+| vLLM_ENDPOINT                   | string | `""`                 | OpenVINO vLLM service endpoint                                                                                                                                |
diff --git a/helm-charts/common/llm-ctrl-uservice/ci-values.yaml b/helm-charts/common/llm-ctrl-uservice/ci-values.yaml
new file mode 100644
index 000000000..763f5c3f2
--- /dev/null
+++ b/helm-charts/common/llm-ctrl-uservice/ci-values.yaml
@@ -0,0 +1,5 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+vllm:
+  enabled: true
diff --git a/helm-charts/common/llm-ctrl-uservice/templates/_helpers.tpl b/helm-charts/common/llm-ctrl-uservice/templates/_helpers.tpl
new file mode 100644
index 000000000..3cf82f83a
--- /dev/null
+++ b/helm-charts/common/llm-ctrl-uservice/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "llm-ctrl-uservice.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "llm-ctrl-uservice.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "llm-ctrl-uservice.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "llm-ctrl-uservice.labels" -}}
+helm.sh/chart: {{ include "llm-ctrl-uservice.chart" . }}
+{{ include "llm-ctrl-uservice.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "llm-ctrl-uservice.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "llm-ctrl-uservice.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "llm-ctrl-uservice.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "llm-ctrl-uservice.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/common/llm-ctrl-uservice/templates/configmap.yaml b/helm-charts/common/llm-ctrl-uservice/templates/configmap.yaml
new file mode 100644
index 000000000..4bc0fcea4
--- /dev/null
+++ b/helm-charts/common/llm-ctrl-uservice/templates/configmap.yaml
@@ -0,0 +1,33 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "llm-ctrl-uservice.fullname" . }}-config
+  labels:
+    {{- include "llm-ctrl-uservice.labels" . | nindent 4 }}
+data:
+  {{- if .Values.vLLM_ENDPOINT }}
+  vLLM_ENDPOINT: {{ .Values.vLLM_ENDPOINT | quote}}
+  {{- else }}
+  vLLM_ENDPOINT: "http://{{ .Release.Name }}-vllm"
+  {{- end }}
+  LLM_MODEL: {{ .Values.LLM_MODEL_ID | quote }}
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
+  HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.vLLM_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
+  no_proxy: "{{ .Release.Name }}-vllm,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: "{{ .Values.global.no_proxy }},myvllm,vllm"
+  {{- end }}
+  LANGCHAIN_TRACING_V2: {{ .Values.global.LANGCHAIN_TRACING_V2 | quote }}
+  LANGCHAIN_API_KEY: {{ .Values.global.LANGCHAIN_API_KEY }}
+  LANGCHAIN_PROJECT: "opea-llm-uservice"
+  HF_HUB_DISABLE_PROGRESS_BARS: {{ .Values.HF_HUB_DISABLE_PROGRESS_BARS | quote }}
+  HF_HUB_ENABLE_HF_TRANSFER: {{ .Values.HF_HUB_ENABLE_HF_TRANSFER | quote }}
diff --git a/helm-charts/common/llm-ctrl-uservice/templates/deployment.yaml b/helm-charts/common/llm-ctrl-uservice/templates/deployment.yaml
new file mode 100644
index 000000000..9020a59de
--- /dev/null
+++ b/helm-charts/common/llm-ctrl-uservice/templates/deployment.yaml
@@ -0,0 +1,82 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "llm-ctrl-uservice.fullname" . }}
+  labels:
+    {{- include "llm-ctrl-uservice.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "llm-ctrl-uservice.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "llm-ctrl-uservice.labels" . | nindent 8 }}
+        {{- with .Values.podLabels }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Chart.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "llm-ctrl-uservice.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          ports:
+            - name: llm-vllm
+              containerPort: {{ .Values.service.targetPort }}
+              protocol: TCP
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
diff --git a/helm-charts/common/llm-ctrl-uservice/templates/service.yaml b/helm-charts/common/llm-ctrl-uservice/templates/service.yaml
new file mode 100644
index 000000000..ef5ae38fb
--- /dev/null
+++ b/helm-charts/common/llm-ctrl-uservice/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "llm-ctrl-uservice.fullname" . }}
+  labels:
+    {{- include "llm-ctrl-uservice.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: {{ .Values.service.targetPort }}
+      protocol: TCP
+      name: llm-vllm
+  selector:
+    {{- include "llm-ctrl-uservice.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/common/llm-ctrl-uservice/templates/tests/test-pod.yaml b/helm-charts/common/llm-ctrl-uservice/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..a0d86f3da
--- /dev/null
+++ b/helm-charts/common/llm-ctrl-uservice/templates/tests/test-pod.yaml
@@ -0,0 +1,29 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "llm-ctrl-uservice.fullname" . }}-testpod"
+  labels:
+    {{- include "llm-ctrl-uservice.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "llm-ctrl-uservice.fullname" . }}:{{ .Values.service.port }}/v1/chat/completions -sS --fail-with-body \
+              -X POST \
+              -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+              -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/common/llm-ctrl-uservice/values.yaml b/helm-charts/common/llm-ctrl-uservice/values.yaml
new file mode 100644
index 000000000..b4f2521e8
--- /dev/null
+++ b/helm-charts/common/llm-ctrl-uservice/values.yaml
@@ -0,0 +1,104 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for llm-ctrl-uservice.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+vLLM_ENDPOINT: ""
+HF_HUB_DISABLE_PROGRESS_BARS: 1
+HF_HUB_ENABLE_HF_TRANSFER: 0
+
+image:
+  repository: opea/llm-vllm
+  pullPolicy: IfNotPresent
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+podLabels: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: false
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+
+service:
+  type: ClusterIP
+  port: 9000
+  targetPort: 9000
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: llm-vllm
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: llm-vllm
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: llm-vllm
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+# Model ID to be used by llm-vllm microservice
+LLM_MODEL_ID: "Intel/neural-chat-7b-v3-3"
+
+# Overriding the Model ID being used by vllm-openvino service.(As llm-vllm microservice depends on vllm-openvino, these 2 values should be same.)
+vllm:
+  enabled: false
+  LLM_MODEL_ID: "Intel/neural-chat-7b-v3-3"
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+  LANGCHAIN_TRACING_V2: false
+  LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
+  # set modelUseHostPath to host directory if you want to use hostPath volume for model storage
+  # comment out modeluseHostPath if you want to download the model from huggingface
+  # modelUseHostPath: ""
+  modelUseHostPath: ""
diff --git a/helm-charts/common/vllm/README.md b/helm-charts/common/vllm/README.md
index 0235a7443..d366667be 100644
--- a/helm-charts/common/vllm/README.md
+++ b/helm-charts/common/vllm/README.md
@@ -10,23 +10,43 @@ To install the chart, run the following:
 
 Note that you cannot use vllm as the service release name due to [environment variables conflict](https://docs.vllm.ai/en/stable/serving/env_vars.html#environment-variables).
 
-```console
+```bash
 cd GenAIInfra/helm-charts/common
 export MODELDIR=/mnt/opea-models
 export MODELNAME="Intel/neural-chat-7b-v3-3"
 export HFTOKEN="insert-your-huggingface-token-here"
+
+# If you are behind a proxy, please export the appropriate proxy values.
+export http_proxy=<your_http_proxy>
+export https_proxy=<your_https_proxy>
+
+```
+
+- Deploy on XEON device:
+
+```bash
 helm install myvllm vllm --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN}
-# To deploy on Gaudi enabled kubernetes cluster
-# helm install myvllm vllm --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values gaudi-values.yaml
 ```
 
-By default, the vllm service will downloading the "Intel/neural-chat-7b-v3-3".
+- To deploy on Gaudi enabled Kubernetes cluster:
 
-If you already cached the model locally, you can pass it to container like this example:
+```bash
+helm install myvllm vllm --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values vllm/gaudi-values.yaml
+```
+
+- To deploy OpenVINO optimized vLLM on XEON device:
+
+```bash
+helm -f vllm/openvino-values.yaml install myvllm vllm --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.http_proxy=${http_proxy} --set global.https_proxy=${https_proxy}
+```
+
+By default, the vLLM service will download "Intel/neural-chat-7b-v3-3" model. If you already cached the model locally, you can pass it to container like this example:
 
+```bash
 MODELDIR=/mnt/opea-models
 
 MODELNAME="facebook/opt-125m"
+```
 
 ## Verify
 
diff --git a/helm-charts/common/vllm/ci-openvino-values.yaml b/helm-charts/common/vllm/ci-openvino-values.yaml
new file mode 120000
index 000000000..81b2b0484
--- /dev/null
+++ b/helm-charts/common/vllm/ci-openvino-values.yaml
@@ -0,0 +1 @@
+openvino-values.yaml
\ No newline at end of file
diff --git a/helm-charts/common/vllm/openvino-values.yaml b/helm-charts/common/vllm/openvino-values.yaml
new file mode 100644
index 000000000..5e72d5b00
--- /dev/null
+++ b/helm-charts/common/vllm/openvino-values.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Values for OpenVINO optimized vLLM.
+
+openvino_enabled: true
+
+image:
+  repository: opea/vllm-openvino
+  pullPolicy: IfNotPresent
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+extraCmdArgs: []
+
+LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+
+CUDA_GRAPHS: "0"
+VLLM_CPU_KVCACHE_SPACE: 50
+VLLM_OPENVINO_KVCACHE_SPACE: 32
+OMPI_MCA_btl_vader_single_copy_mechanism: none
+
+ov_command: ["/bin/bash"]
diff --git a/helm-charts/common/vllm/templates/configmap.yaml b/helm-charts/common/vllm/templates/configmap.yaml
index 80b9a97da..c794b6cb0 100644
--- a/helm-charts/common/vllm/templates/configmap.yaml
+++ b/helm-charts/common/vllm/templates/configmap.yaml
@@ -8,10 +8,17 @@ metadata:
   labels:
     {{- include "vllm.labels" . | nindent 4 }}
 data:
+  {{- if .Values.openvino_enabled }}
+  MODEL_ID: {{ .Values.LLM_MODEL_ID | quote }}
+  PORT: {{ .Values.port | quote }}
+  {{- end }}
   HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
   {{- if .Values.global.HF_ENDPOINT }}
   HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
   {{- end }}
+  {{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }}
+  OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote }}
+  {{-  end}}
   http_proxy: {{ .Values.global.http_proxy | quote }}
   https_proxy: {{ .Values.global.https_proxy | quote }}
   no_proxy: {{ .Values.global.no_proxy | quote }}
@@ -23,3 +30,9 @@ data:
   {{- if .Values.VLLM_CPU_KVCACHE_SPACE }}
   VLLM_CPU_KVCACHE_SPACE: {{ .Values.VLLM_CPU_KVCACHE_SPACE | quote}}
   {{- end }}
+  {{- if .Values.VLLM_OPENVINO_KVCACHE_SPACE }}
+  VLLM_OPENVINO_KVCACHE_SPACE: {{ .Values.VLLM_OPENVINO_KVCACHE_SPACE | quote }}
+  {{- end }}
+  {{- if .Values.CUDA_GRAPHS }}
+  CUDA_GRAPHS: {{ .Values.CUDA_GRAPHS | quote }}
+  {{- end }}
diff --git a/helm-charts/common/vllm/templates/deployment.yaml b/helm-charts/common/vllm/templates/deployment.yaml
index 14c65f76b..799e32c83 100644
--- a/helm-charts/common/vllm/templates/deployment.yaml
+++ b/helm-charts/common/vllm/templates/deployment.yaml
@@ -45,7 +45,20 @@ spec:
             {{- end }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
           imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- if .Values.openvino_enabled }}
+          command: {{ .Values.ov_command }}
+          {{- end }}
           args:
+          {{- if .Values.openvino_enabled }}
+            - "-c"
+            - |
+              cd / && \
+              python3 -m vllm.entrypoints.openai.api_server \
+                --model {{ .Values.LLM_MODEL_ID | quote }} \
+                --host 0.0.0.0 \
+                --port {{ .Values.port | quote }} \
+                --download-dir /data
+          {{- else }}
           {{- if .Values.extraCmdArgs }}
             {{- range .Values.extraCmdArgs }}
             - {{ . | quote }}
@@ -59,6 +72,7 @@ spec:
             - {{ .Values.port | quote }}
             - "--download-dir"
             - "/data"
+          {{- end }}
           volumeMounts:
             - mountPath: /data
               name: model-volume