zylon-ai · lopagela · Nov 26, 2023 · Nov 26, 2023 · Nov 26, 2023 · Nov 26, 2023
diff --git a/fern/docs/pages/manual/settings.mdx b/fern/docs/pages/manual/settings.mdx
@@ -77,4 +77,19 @@ Missing variables with no default will produce an error.
 ```yaml
 server:
   port: ${PORT:8001}
+```
+
+## LLM config options
+
+The `llm` section of the settings allows for the following configurations:
+
+- mode = how to run your llm
+- max_new_tokens = this lets you configure the number of new tokens the llm will generate and add to the context window (by default Llama.cpp uses 256)
+
+Example:
+
+```yaml
+llm:
+  mode: local
+  max_new_tokens: 256
 ```
diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
@@ -31,6 +31,7 @@ def __init__(self, settings: Settings) -> None:
                 self.llm = LlamaCPP(
                     model_path=str(models_path / settings.local.llm_hf_model_file),
                     temperature=0.1,
+                    max_new_tokens=settings.llm.max_new_tokens,
                     # llama2 has a context window of 4096 tokens,
                     # but we set it lower to allow for some wiggle room
                     context_window=3900,

diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
@@ -82,6 +82,7 @@ class DataSettings(BaseModel):
 
 class LLMSettings(BaseModel):
     mode: Literal["local", "openai", "sagemaker", "mock"]
+    max_new_tokens: int
 
 
 class VectorstoreSettings(BaseModel):

diff --git a/settings.yaml b/settings.yaml
@@ -22,6 +22,7 @@ ui:
 
 llm:
   mode: local
+  max_new_tokens: 256
 embedding:
   # Should be matching the value above in most cases
   mode: local