Skip to content

Commit

Permalink
Merge pull request #69 from berry/iterator_bug
Browse files Browse the repository at this point in the history
Bug in index.query() prevents content from similar documents to be included in LLM query
  • Loading branch information
tomusher authored Jun 19, 2024
2 parents 33344c2 + ede5cd4 commit 5cc1e0b
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/wagtail_vector_index/storage/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def query(
except StopIteration as e:
raise ValueError("No embeddings were generated for the given query.") from e

similar_documents = self.get_similar_documents(query_embedding)
similar_documents = list(self.get_similar_documents(query_embedding))

sources = self._deduplicate_list(
self.get_converter().bulk_from_documents(similar_documents)
Expand Down
18 changes: 18 additions & 0 deletions tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,24 @@ def gen_pages(cls, *args, **kwargs):
case.assertCountEqual(actual, pages)


@pytest.mark.django_db
def test_query_passes_sources_to_backend(mocker):
ExamplePageFactory.create_batch(2)
index = ExamplePage.vector_index
documents = index.get_documents()[:2]

def get_similar_documents(query_embedding, limit=0):
yield from documents

query_mock = mocker.patch("conftest.ChatMockBackend.chat")
expected_content = "\n".join([doc.metadata["content"] for doc in documents])
similar_documents_mock = mocker.patch.object(index, "get_similar_documents")
similar_documents_mock.side_effect = get_similar_documents
index.query("")
first_call_messages = query_mock.call_args.kwargs["messages"]
assert first_call_messages[1] == {"content": expected_content, "role": "system"}


DEDUPLICATE_LIST_TESTDATA = [
pytest.param([3, 1, 1, 2], None, [3, 1, 2]),
pytest.param([3, 1, 1, 2], [], [3, 1, 2]),
Expand Down

0 comments on commit 5cc1e0b

Please sign in to comment.