From 613c33b28cd51b82208463669f5d2577bd40a8ba Mon Sep 17 00:00:00 2001 From: Eero Tamminen Date: Fri, 22 Nov 2024 13:03:23 +0000 Subject: [PATCH] Add vLLM support for DocSum Signed-off-by: Eero Tamminen --- helm-charts/docsum/Chart.yaml | 5 +++ helm-charts/docsum/README.md | 6 ++- helm-charts/docsum/ci-gaudi-tgi-values.yaml | 1 + helm-charts/docsum/ci-gaudi-values.yaml | 1 - helm-charts/docsum/ci-gaudi-vllm-values.yaml | 1 + ...audi-values.yaml => gaudi-tgi-values.yaml} | 0 helm-charts/docsum/gaudi-vllm-values.yaml | 45 +++++++++++++++++++ helm-charts/docsum/values.yaml | 7 ++- 8 files changed, 62 insertions(+), 4 deletions(-) create mode 120000 helm-charts/docsum/ci-gaudi-tgi-values.yaml delete mode 120000 helm-charts/docsum/ci-gaudi-values.yaml create mode 120000 helm-charts/docsum/ci-gaudi-vllm-values.yaml rename helm-charts/docsum/{gaudi-values.yaml => gaudi-tgi-values.yaml} (100%) create mode 100644 helm-charts/docsum/gaudi-vllm-values.yaml diff --git a/helm-charts/docsum/Chart.yaml b/helm-charts/docsum/Chart.yaml index 69e900c0b..8acd38959 100644 --- a/helm-charts/docsum/Chart.yaml +++ b/helm-charts/docsum/Chart.yaml @@ -9,6 +9,11 @@ dependencies: - name: tgi version: 0-latest repository: "file://../common/tgi" + condition: tgi.enabled + - name: vllm + version: 0-latest + repository: "file://../common/vllm" + condition: vllm.enabled - name: llm-uservice version: 0-latest repository: "file://../common/llm-uservice" diff --git a/helm-charts/docsum/README.md b/helm-charts/docsum/README.md index d9f4035a1..8ee5c504f 100644 --- a/helm-charts/docsum/README.md +++ b/helm-charts/docsum/README.md @@ -16,8 +16,10 @@ export HFTOKEN="insert-your-huggingface-token-here" export MODELDIR="/mnt/opea-models" export MODELNAME="Intel/neural-chat-7b-v3-3" helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -# To use Gaudi device -# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/gaudi-values.yaml +# To use Gaudi device with TGI +# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/gaudi-tgi-values.yaml ... +# To use Gaudi device with vLLM +# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/gaudi-vllm-values.yaml .. ``` ## Verify diff --git a/helm-charts/docsum/ci-gaudi-tgi-values.yaml b/helm-charts/docsum/ci-gaudi-tgi-values.yaml new file mode 120000 index 000000000..8702c8f68 --- /dev/null +++ b/helm-charts/docsum/ci-gaudi-tgi-values.yaml @@ -0,0 +1 @@ +gaudi-tgi-values.yaml \ No newline at end of file diff --git a/helm-charts/docsum/ci-gaudi-values.yaml b/helm-charts/docsum/ci-gaudi-values.yaml deleted file mode 120000 index 7243d31b2..000000000 --- a/helm-charts/docsum/ci-gaudi-values.yaml +++ /dev/null @@ -1 +0,0 @@ -gaudi-values.yaml \ No newline at end of file diff --git a/helm-charts/docsum/ci-gaudi-vllm-values.yaml b/helm-charts/docsum/ci-gaudi-vllm-values.yaml new file mode 120000 index 000000000..d9ab8c698 --- /dev/null +++ b/helm-charts/docsum/ci-gaudi-vllm-values.yaml @@ -0,0 +1 @@ +gaudi-vllm-values.yaml \ No newline at end of file diff --git a/helm-charts/docsum/gaudi-values.yaml b/helm-charts/docsum/gaudi-tgi-values.yaml similarity index 100% rename from helm-charts/docsum/gaudi-values.yaml rename to helm-charts/docsum/gaudi-tgi-values.yaml diff --git a/helm-charts/docsum/gaudi-vllm-values.yaml b/helm-charts/docsum/gaudi-vllm-values.yaml new file mode 100644 index 000000000..6f376c93d --- /dev/null +++ b/helm-charts/docsum/gaudi-vllm-values.yaml @@ -0,0 +1,45 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Accelerate inferencing in heaviest components to improve performance +# by overriding their subchart values + +tgi: + enabled: false + +llm-uservice: + image: + repository: opea/llm-docsum-vllm + tag: "latest" + +vllm: + enabled: true + image: + repository: opea/vllm-gaudi + tag: "latest" + resources: + limits: + habana.ai/gaudi: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + + PT_HPU_ENABLE_LAZY_COLLECTIVES: "true" + OMPI_MCA_btl_vader_single_copy_mechanism: "none" + + extraCmdArgs: [ + "--tensor-parallel-size", "1", + "--block-size", "128", + "--max-num-seqs", "256", + "--max-seq_len-to-capture", "2048" + ] diff --git a/helm-charts/docsum/values.yaml b/helm-charts/docsum/values.yaml index a473a15f3..bd1bba89f 100644 --- a/helm-charts/docsum/values.yaml +++ b/helm-charts/docsum/values.yaml @@ -81,11 +81,16 @@ affinity: {} # To override values in subchart llm-uservice llm-uservice: + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 image: repository: opea/llm-docsum-tgi -# To override values in subchart tgi +# To override values in tgi/vllm subcharts tgi: + enabled: true + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 +vllm: + enabled: false LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 docsum-ui: