diff --git a/frontend/src/components/LLMSelection/NativeLLMOptions/index.jsx b/frontend/src/components/LLMSelection/NativeLLMOptions/index.jsx
index a41a81fe87..457c09322d 100644
--- a/frontend/src/components/LLMSelection/NativeLLMOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/NativeLLMOptions/index.jsx
@@ -54,31 +54,49 @@ function NativeModelSelection({ settings }) {
}
return (
-
-
-
-
+ <>
+
+
+
+
+
+
+ e.target.blur()}
+ defaultValue={settings?.NativeLLMTokenLimit}
+ required={true}
+ autoComplete="off"
+ />
+
+ >
);
}
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index 53d42f2e2e..1c4069ac9f 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -174,6 +174,7 @@ const SystemSettings = {
...(llmProvider === "native"
? {
NativeLLMModelPref: process.env.NATIVE_LLM_MODEL_PREF,
+ NativeLLMTokenLimit: process.env.NATIVE_LLM_MODEL_TOKEN_LIMIT,
// For embedding credentials when ollama is selected.
OpenAiKey: !!process.env.OPEN_AI_KEY,
diff --git a/server/utils/AiProviders/native/index.js b/server/utils/AiProviders/native/index.js
index fff904c462..de1a97f3d7 100644
--- a/server/utils/AiProviders/native/index.js
+++ b/server/utils/AiProviders/native/index.js
@@ -94,8 +94,6 @@ class NativeLLM {
}
// Ensure the user set a value for the token limit
- // and if undefined - assume 4096 window.
- // DEV: Currently this ENV is not configurable.
promptWindowLimit() {
const limit = process.env.NATIVE_LLM_MODEL_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index 54e6840291..f44b040b71 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -110,6 +110,11 @@ const KEY_MAPPING = {
checks: [isDownloadedModel],
},
+ NativeLLMTokenLimit: {
+ envKey: "NATIVE_LLM_MODEL_TOKEN_LIMIT",
+ checks: [nonZero],
+ },
+
EmbeddingEngine: {
envKey: "EMBEDDING_ENGINE",
checks: [supportedEmbeddingModel],