From 869233f0e4f03dc23e5fae43cf7cb55350afdee9 Mon Sep 17 00:00:00 2001
From: Naveen Kannan <107133694+naveenk2022@users.noreply.github.com>
Date: Wed, 17 Jan 2024 12:10:45 -0500
Subject: [PATCH] fix: Adding an LLM param to fix broken generator from
 llamacpp (#1519)

---
 private_gpt/components/llm/llm_component.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
index d6a335f8c..971cfa3bf 100644
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@@ -42,7 +42,7 @@ def __init__(self, settings: Settings) -> None:
                     context_window=settings.llm.context_window,
                     generate_kwargs={},
                     # All to GPU
-                    model_kwargs={"n_gpu_layers": -1},
+                    model_kwargs={"n_gpu_layers": -1, "offload_kqv": True},
                     # transform inputs into Llama2 format
                     messages_to_prompt=prompt_style.messages_to_prompt,
                     completion_to_prompt=prompt_style.completion_to_prompt,