-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #588 from asoderlind/fix/as/embedding-size-mismatch
raise more legible error if the word embedding dimensions don't match
- Loading branch information
Showing
3 changed files
with
42 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -104,3 +104,4 @@ urllib3==1.26.17 | |
vine==5.0.0 | ||
wcwidth==0.2.6 | ||
yarl==1.8.2 | ||
sentence-transformers==2.2.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,44 @@ | ||
from application.vectorstore.base import BaseVectorStore | ||
from langchain.vectorstores import FAISS | ||
from application.vectorstore.base import BaseVectorStore | ||
from application.core.settings import settings | ||
|
||
class FaissStore(BaseVectorStore): | ||
|
||
def __init__(self, path, embeddings_key, docs_init=None): | ||
super().__init__() | ||
self.path = path | ||
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key) | ||
if docs_init: | ||
self.docsearch = FAISS.from_documents( | ||
docs_init, self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key) | ||
docs_init, embeddings | ||
) | ||
else: | ||
self.docsearch = FAISS.load_local( | ||
self.path, self._get_embeddings(settings.EMBEDDINGS_NAME, settings.EMBEDDINGS_KEY) | ||
self.path, embeddings | ||
) | ||
self.assert_embedding_dimensions(embeddings) | ||
|
||
def search(self, *args, **kwargs): | ||
return self.docsearch.similarity_search(*args, **kwargs) | ||
|
||
def add_texts(self, *args, **kwargs): | ||
return self.docsearch.add_texts(*args, **kwargs) | ||
|
||
def save_local(self, *args, **kwargs): | ||
return self.docsearch.save_local(*args, **kwargs) | ||
|
||
def assert_embedding_dimensions(self, embeddings): | ||
""" | ||
Check that the word embedding dimension of the docsearch index matches | ||
the dimension of the word embeddings used | ||
""" | ||
if settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2": | ||
try: | ||
word_embedding_dimension = embeddings.client[1].word_embedding_dimension | ||
except AttributeError as e: | ||
raise AttributeError("word_embedding_dimension not found in embeddings.client[1]") from e | ||
docsearch_index_dimension = self.docsearch.index.d | ||
if word_embedding_dimension != docsearch_index_dimension: | ||
raise ValueError(f"word_embedding_dimension ({word_embedding_dimension}) " + | ||
f"!= docsearch_index_word_embedding_dimension ({docsearch_index_dimension})") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
""" | ||
Tests regarding the vector store class, including checking | ||
compatibility between different transformers and local vector | ||
stores (index.faiss) | ||
""" | ||
import pytest | ||
from application.vectorstore.faiss import FaissStore | ||
from application.core.settings import settings | ||
|
||
def test_init_local_faiss_store_huggingface(): | ||
""" | ||
Test that asserts that trying to initialize a FaissStore with | ||
the huggingface sentence transformer below together with the | ||
index.faiss file in the application/ folder results in a | ||
dimension mismatch error. | ||
""" | ||
settings.EMBEDDINGS_NAME = "huggingface_sentence-transformers/all-mpnet-base-v2" | ||
with pytest.raises(ValueError): | ||
FaissStore("application/", "", None) |
d899b6a
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Successfully deployed to the following URLs:
docs-gpt – ./frontend
docs-gpt-git-main-arc53.vercel.app
docs-gpt-arc53.vercel.app
docs-gpt-brown.vercel.app
d899b6a
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Successfully deployed to the following URLs:
nextra-docsgpt – ./docs
nextra-docsgpt.vercel.app
nextra-docsgpt-arc53.vercel.app
docs.docsgpt.co.uk
nextra-docsgpt-git-main-arc53.vercel.app