Skip to content

Commit

Permalink
Merge pull request #588 from asoderlind/fix/as/embedding-size-mismatch
Browse files Browse the repository at this point in the history
raise more legible error if the word embedding dimensions don't match
  • Loading branch information
dartpain authored Oct 16, 2023
2 parents 450dde3 + d51cd8d commit d899b6a
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 4 deletions.
1 change: 1 addition & 0 deletions application/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,4 @@ urllib3==1.26.17
vine==5.0.0
wcwidth==0.2.6
yarl==1.8.2
sentence-transformers==2.2.2
26 changes: 22 additions & 4 deletions application/vectorstore/faiss.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,44 @@
from application.vectorstore.base import BaseVectorStore
from langchain.vectorstores import FAISS
from application.vectorstore.base import BaseVectorStore
from application.core.settings import settings

class FaissStore(BaseVectorStore):

def __init__(self, path, embeddings_key, docs_init=None):
super().__init__()
self.path = path
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
if docs_init:
self.docsearch = FAISS.from_documents(
docs_init, self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
docs_init, embeddings
)
else:
self.docsearch = FAISS.load_local(
self.path, self._get_embeddings(settings.EMBEDDINGS_NAME, settings.EMBEDDINGS_KEY)
self.path, embeddings
)
self.assert_embedding_dimensions(embeddings)

def search(self, *args, **kwargs):
return self.docsearch.similarity_search(*args, **kwargs)

def add_texts(self, *args, **kwargs):
return self.docsearch.add_texts(*args, **kwargs)

def save_local(self, *args, **kwargs):
return self.docsearch.save_local(*args, **kwargs)

def assert_embedding_dimensions(self, embeddings):
"""
Check that the word embedding dimension of the docsearch index matches
the dimension of the word embeddings used
"""
if settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2":
try:
word_embedding_dimension = embeddings.client[1].word_embedding_dimension
except AttributeError as e:
raise AttributeError("word_embedding_dimension not found in embeddings.client[1]") from e
docsearch_index_dimension = self.docsearch.index.d
if word_embedding_dimension != docsearch_index_dimension:
raise ValueError(f"word_embedding_dimension ({word_embedding_dimension}) " +
f"!= docsearch_index_word_embedding_dimension ({docsearch_index_dimension})")

19 changes: 19 additions & 0 deletions tests/test_vector_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
Tests regarding the vector store class, including checking
compatibility between different transformers and local vector
stores (index.faiss)
"""
import pytest
from application.vectorstore.faiss import FaissStore
from application.core.settings import settings

def test_init_local_faiss_store_huggingface():
"""
Test that asserts that trying to initialize a FaissStore with
the huggingface sentence transformer below together with the
index.faiss file in the application/ folder results in a
dimension mismatch error.
"""
settings.EMBEDDINGS_NAME = "huggingface_sentence-transformers/all-mpnet-base-v2"
with pytest.raises(ValueError):
FaissStore("application/", "", None)

2 comments on commit d899b6a

@vercel
Copy link

@vercel vercel bot commented on d899b6a Oct 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

docs-gpt – ./frontend

docs-gpt-git-main-arc53.vercel.app
docs-gpt-arc53.vercel.app
docs-gpt-brown.vercel.app

@vercel
Copy link

@vercel vercel bot commented on d899b6a Oct 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

nextra-docsgpt – ./docs

nextra-docsgpt.vercel.app
nextra-docsgpt-arc53.vercel.app
docs.docsgpt.co.uk
nextra-docsgpt-git-main-arc53.vercel.app

Please sign in to comment.