From ae510619f069ef2530bbba752a240d501dc2d047 Mon Sep 17 00:00:00 2001
From: Sean Hatfield <seanhatfield5@gmail.com>
Date: Tue, 17 Dec 2024 04:16:20 +0800
Subject: [PATCH] Purge cached docs and remove docs from all workspaces on
 vectorDB/embedder changes (#2819)

* wip remove all docs clear vector db on embedder/vector db change

* purge all cached docs and remove docs from workspaces on vectordb/embedder change

* lint

* remove unneeded console log

* remove reset vector stores endpoint and move to server side updateENV with postUpdate check

* reset embed module

* remove unused import

* simplify deletion process
rescoped document deletion to be more general for speed, everything needs to be reset anyway
fixed issue where unembedded docs not in any workspaces, but cached, were not removed

* add back missing readme file
update warning text modals

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
---
 .../src/components/ChangeWarning/index.jsx    | 74 +++++++++++--------
 .../EmbeddingPreference/index.jsx             |  2 +-
 .../GeneralSettings/VectorDatabase/index.jsx  |  2 +-
 server/models/vectors.js                      | 34 ++++++---
 server/utils/files/index.js                   | 11 +++
 server/utils/helpers/index.js                 |  5 +-
 server/utils/helpers/updateENV.js             | 23 ++++++
 .../utils/vectorStore/resetAllVectorStores.js | 48 ++++++++++++
 8 files changed, 153 insertions(+), 46 deletions(-)
 create mode 100644 server/utils/vectorStore/resetAllVectorStores.js

diff --git a/frontend/src/components/ChangeWarning/index.jsx b/frontend/src/components/ChangeWarning/index.jsx
index 42b211baf4..2e0950a080 100644
--- a/frontend/src/components/ChangeWarning/index.jsx
+++ b/frontend/src/components/ChangeWarning/index.jsx
@@ -1,4 +1,4 @@
-import { Warning } from "@phosphor-icons/react";
+import { Warning, X } from "@phosphor-icons/react";
 
 export default function ChangeWarningModal({
   warningText = "",
@@ -6,41 +6,55 @@ export default function ChangeWarningModal({
   onConfirm,
 }) {
   return (
-    <div className="relative w-full max-w-2xl max-h-full">
-      <div className="relative bg-main-gradient rounded-lg shadow">
-        <div className="flex items-start justify-between p-4 border-b rounded-t border-gray-500/50">
-          <div className="flex items-center gap-2">
-            <Warning
-              className="text-yellow-300 text-lg w-6 h-6"
-              weight="fill"
-            />
-            <h3 className="text-xl font-semibold text-yellow-300">Warning</h3>
-          </div>
+    <div className="w-full max-w-2xl bg-theme-bg-secondary rounded-lg shadow border-2 border-theme-modal-border overflow-hidden z-9999">
+      <div className="relative p-6 border-b rounded-t border-theme-modal-border">
+        <div className="w-full flex gap-x-2 items-center">
+          <Warning className="text-red-500 w-6 h-6" weight="fill" />
+          <h3 className="text-xl font-semibold text-red-500 overflow-hidden overflow-ellipsis whitespace-nowrap">
+            WARNING - This action is irreversible
+          </h3>
         </div>
-        <div className="w-[550px] p-6 text-white">
-          <p>
-            {warningText}
+        <button
+          onClick={onClose}
+          type="button"
+          className="absolute top-4 right-4 transition-all duration-300 bg-transparent rounded-lg text-sm p-1 inline-flex items-center hover:bg-theme-modal-border hover:border-theme-modal-border hover:border-opacity-50 border-transparent border"
+        >
+          <X size={24} weight="bold" className="text-white" />
+        </button>
+      </div>
+      <div
+        className="h-full w-full overflow-y-auto"
+        style={{ maxHeight: "calc(100vh - 200px)" }}
+      >
+        <div className="py-7 px-9 space-y-2 flex-col">
+          <p className="text-white">
+            {warningText.split("\\n").map((line, index) => (
+              <span key={index}>
+                {line}
+                <br />
+              </span>
+            ))}
             <br />
             <br />
             Are you sure you want to proceed?
           </p>
         </div>
-
-        <div className="flex w-full justify-between items-center p-6 space-x-2 border-t rounded-b border-gray-500/50">
-          <button
-            onClick={onClose}
-            type="button"
-            className="px-4 py-2 rounded-lg text-white hover:bg-red-500 transition-all duration-300"
-          >
-            Cancel
-          </button>
-          <button
-            onClick={onConfirm}
-            className="transition-all duration-300 border border-slate-200 px-4 py-2 rounded-lg text-white text-sm items-center flex gap-x-2 hover:bg-slate-200 hover:text-slate-800 focus:ring-gray-800"
-          >
-            Confirm
-          </button>
-        </div>
+      </div>
+      <div className="flex w-full justify-end items-center p-6 space-x-2 border-t border-theme-modal-border rounded-b">
+        <button
+          onClick={onClose}
+          type="button"
+          className="transition-all duration-300 bg-transparent text-white hover:opacity-60 px-4 py-2 rounded-lg text-sm"
+        >
+          Cancel
+        </button>
+        <button
+          onClick={onConfirm}
+          type="submit"
+          className="transition-all duration-300 bg-red-500 light:text-white text-white hover:opacity-60 px-4 py-2 rounded-lg text-sm"
+        >
+          Confirm
+        </button>
       </div>
     </div>
   );
diff --git a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
index 893948472d..77853e0a99 100644
--- a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
@@ -361,7 +361,7 @@ export default function GeneralEmbeddingPreference() {
       )}
       <ModalWrapper isOpen={isOpen}>
         <ChangeWarningModal
-          warningText="Switching the embedding model will break previously embedded documents from working during chat. They will need to un-embed from every workspace and fully removed and re-uploaded so they can be embed by the new embedding model."
+          warningText="Switching the embedding model will reset all previously embedded documents in all workspaces.\n\nConfirming will clear all embeddings from your vector database and remove all documents from your workspaces. Your uploaded documents will not be deleted, they will be available for re-embedding."
           onClose={closeModal}
           onConfirm={handleSaveSettings}
         />
diff --git a/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx b/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx
index f7246de542..11e70d8fd7 100644
--- a/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx
+++ b/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx
@@ -308,7 +308,7 @@ export default function GeneralVectorDatabase() {
       )}
       <ModalWrapper isOpen={isOpen}>
         <ChangeWarningModal
-          warningText="Switching the vector database will ignore previously embedded documents and future similarity search results. They will need to be re-added to each workspace."
+          warningText="Switching the vector database will reset all previously embedded documents in all workspaces.\n\nConfirming will clear all embeddings from your vector database and remove all documents from your workspaces. Your uploaded documents will not be deleted, they will be available for re-embedding."
           onClose={closeModal}
           onConfirm={handleSaveSettings}
         />
diff --git a/server/models/vectors.js b/server/models/vectors.js
index f6b79964a0..3653303da2 100644
--- a/server/models/vectors.js
+++ b/server/models/vectors.js
@@ -25,6 +25,19 @@ const DocumentVectors = {
     }
   },
 
+  where: async function (clause = {}, limit) {
+    try {
+      const results = await prisma.document_vectors.findMany({
+        where: clause,
+        take: limit || undefined,
+      });
+      return results;
+    } catch (error) {
+      console.error("Where query failed", error);
+      return [];
+    }
+  },
+
   deleteForWorkspace: async function (workspaceId) {
     const documents = await Document.forWorkspace(workspaceId);
     const docIds = [...new Set(documents.map((doc) => doc.docId))];
@@ -40,27 +53,24 @@ const DocumentVectors = {
     }
   },
 
-  where: async function (clause = {}, limit) {
+  deleteIds: async function (ids = []) {
     try {
-      const results = await prisma.document_vectors.findMany({
-        where: clause,
-        take: limit || undefined,
+      await prisma.document_vectors.deleteMany({
+        where: { id: { in: ids } },
       });
-      return results;
+      return true;
     } catch (error) {
-      console.error("Where query failed", error);
-      return [];
+      console.error("Delete IDs failed", error);
+      return false;
     }
   },
 
-  deleteIds: async function (ids = []) {
+  delete: async function (clause = {}) {
     try {
-      await prisma.document_vectors.deleteMany({
-        where: { id: { in: ids } },
-      });
+      await prisma.document_vectors.deleteMany({ where: clause });
       return true;
     } catch (error) {
-      console.error("Delete IDs failed", error);
+      console.error("Delete failed", error);
       return false;
     }
   },
diff --git a/server/utils/files/index.js b/server/utils/files/index.js
index 598884f999..625d8582cd 100644
--- a/server/utils/files/index.js
+++ b/server/utils/files/index.js
@@ -281,6 +281,16 @@ async function getWatchedDocumentFilenames(filenames = []) {
   }, {});
 }
 
+/**
+ * Purges the entire vector-cache folder and recreates it.
+ * @returns {void}
+ */
+function purgeEntireVectorCache() {
+  fs.rmSync(vectorCachePath, { recursive: true, force: true });
+  fs.mkdirSync(vectorCachePath);
+  return;
+}
+
 module.exports = {
   findDocumentInDocuments,
   cachedVectorInformation,
@@ -293,4 +303,5 @@ module.exports = {
   isWithin,
   documentsPath,
   hasVectorCachedFiles,
+  purgeEntireVectorCache,
 };
diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js
index e599078b6a..748e4fb1b1 100644
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@@ -52,10 +52,11 @@
 
 /**
  * Gets the systems current vector database provider.
+ * @param {('pinecone' | 'chroma' | 'lancedb' | 'weaviate' | 'qdrant' | 'milvus' | 'zilliz' | 'astra') | null} getExactly - If provided, this will return an explit provider.
  * @returns { BaseVectorDatabaseProvider}
  */
-function getVectorDbClass() {
-  const vectorSelection = process.env.VECTOR_DB || "lancedb";
+function getVectorDbClass(getExactly = null) {
+  const vectorSelection = getExactly ?? process.env.VECTOR_DB ?? "lancedb";
   switch (vectorSelection) {
     case "pinecone":
       const { Pinecone } = require("../vectorDbProviders/pinecone");
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index 3cfc13e6e1..948703dca2 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -1,3 +1,5 @@
+const { resetAllVectorStores } = require("../vectorStore/resetAllVectorStores");
+
 const KEY_MAPPING = {
   LLMProvider: {
     envKey: "LLM_PROVIDER",
@@ -248,6 +250,7 @@ const KEY_MAPPING = {
   EmbeddingEngine: {
     envKey: "EMBEDDING_ENGINE",
     checks: [supportedEmbeddingModel],
+    postUpdate: [handleVectorStoreReset],
   },
   EmbeddingBasePath: {
     envKey: "EMBEDDING_BASE_PATH",
@@ -256,6 +259,7 @@ const KEY_MAPPING = {
   EmbeddingModelPref: {
     envKey: "EMBEDDING_MODEL_PREF",
     checks: [isNotEmpty],
+    postUpdate: [handleVectorStoreReset],
   },
   EmbeddingModelMaxChunkLength: {
     envKey: "EMBEDDING_MODEL_MAX_CHUNK_LENGTH",
@@ -276,6 +280,7 @@ const KEY_MAPPING = {
   VectorDB: {
     envKey: "VECTOR_DB",
     checks: [isNotEmpty, supportedVectorDB],
+    postUpdate: [handleVectorStoreReset],
   },
 
   // Chroma Options
@@ -878,6 +883,24 @@ function noRestrictedChars(input = "") {
     : null;
 }
 
+async function handleVectorStoreReset(key, prevValue, nextValue) {
+  if (prevValue === nextValue) return;
+  if (key === "VectorDB") {
+    console.log(
+      `Vector configuration changed from ${prevValue} to ${nextValue} - resetting ${prevValue} namespaces`
+    );
+    return await resetAllVectorStores({ vectorDbKey: prevValue });
+  }
+
+  if (key === "EmbeddingEngine" || key === "EmbeddingModelPref") {
+    console.log(
+      `${key} changed from ${prevValue} to ${nextValue} - resetting ${process.env.VECTOR_DB} namespaces`
+    );
+    return await resetAllVectorStores({ vectorDbKey: process.env.VECTOR_DB });
+  }
+  return false;
+}
+
 // This will force update .env variables which for any which reason were not able to be parsed or
 // read from an ENV file as this seems to be a complicating step for many so allowing people to write
 // to the process will at least alleviate that issue. It does not perform comprehensive validity checks or sanity checks
diff --git a/server/utils/vectorStore/resetAllVectorStores.js b/server/utils/vectorStore/resetAllVectorStores.js
new file mode 100644
index 0000000000..3bb9a5ec4c
--- /dev/null
+++ b/server/utils/vectorStore/resetAllVectorStores.js
@@ -0,0 +1,48 @@
+const { Workspace } = require("../../models/workspace");
+const { Document } = require("../../models/documents");
+const { DocumentVectors } = require("../../models/vectors");
+const { EventLogs } = require("../../models/eventLogs");
+const { purgeEntireVectorCache } = require("../files");
+const { getVectorDbClass } = require("../helpers");
+
+/**
+ * Resets all vector database and associated content:
+ * - Purges the entire vector-cache folder.
+ * - Deletes all document vectors from the database.
+ * - Deletes all documents from the database.
+ * - Deletes all vector db namespaces for each workspace.
+ * - Logs an event indicating the reset.
+ * @param {string} vectorDbKey - The _previous_ vector database provider name that we will be resetting.
+ * @returns {Promise<boolean>} - True if successful, false otherwise.
+ */
+async function resetAllVectorStores({ vectorDbKey }) {
+  try {
+    const workspaces = await Workspace.where();
+    purgeEntireVectorCache(); // Purges the entire vector-cache folder.
+    await DocumentVectors.delete(); // Deletes all document vectors from the database.
+    await Document.delete(); // Deletes all documents from the database.
+    await EventLogs.logEvent("workspace_vectors_reset", {
+      reason: "System vector configuration changed",
+    });
+
+    console.log(
+      "Resetting anythingllm managed vector namespaces for",
+      vectorDbKey
+    );
+    const VectorDb = getVectorDbClass(vectorDbKey);
+    for (const workspace of workspaces) {
+      try {
+        await VectorDb["delete-namespace"]({ namespace: workspace.slug });
+      } catch (e) {
+        console.error(e.message);
+      }
+    }
+
+    return true;
+  } catch (error) {
+    console.error("Failed to reset vector stores:", error);
+    return false;
+  }
+}
+
+module.exports = { resetAllVectorStores };