From 1ced93ca1ab7eac89feda2da0183e30f24cd5c53 Mon Sep 17 00:00:00 2001 From: ChengZi Date: Thu, 14 Mar 2024 16:56:09 +0800 Subject: [PATCH] refine doc Signed-off-by: ChengZi --- README.md | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index dbc68a0..fbe7c9e 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,13 @@ # Milvus Document Store for Haystack +[![PyPI - Version](https://img.shields.io/pypi/v/milvus-haystack.svg)](https://pypi.org/project/milvus-haystack) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/milvus-haystack.svg)](https://pypi.org/project/milvus-haystack) + ## Installation ```console -pip install -e milvus-haystack +pip install milvus-haystack ``` ## Usage @@ -26,6 +29,100 @@ document_store.write_documents(documents) document_store.count_documents() # 1 ``` +## Dive deep usage + +Here are the ways to build index, retrieval, and build rag pipeline respectively. + +```py +# Create the indexing Pipeline and index some documents +import glob +import os + +from haystack import Pipeline +from haystack.components.converters import MarkdownToDocument +from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder +from haystack.components.preprocessors import DocumentSplitter +from haystack.components.writers import DocumentWriter + +from milvus_haystack import MilvusDocumentStore +from milvus_haystack.milvus_embedding_retriever import MilvusEmbeddingRetriever + +file_paths = glob.glob("./your_docs.md") + +document_store = MilvusDocumentStore( + connection_args={ + "host": "localhost", + "port": "19530", + "user": "", + "password": "", + "secure": False, + }, + drop_old=True, +) +indexing_pipeline = Pipeline() +indexing_pipeline.add_component("converter", MarkdownToDocument()) +indexing_pipeline.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=2)) +indexing_pipeline.add_component("embedder", SentenceTransformersDocumentEmbedder()) +indexing_pipeline.add_component("writer", DocumentWriter(document_store)) +indexing_pipeline.connect("converter", "splitter") +indexing_pipeline.connect("splitter", "embedder") +indexing_pipeline.connect("embedder", "writer") +indexing_pipeline.run({"converter": {"sources": file_paths}}) + +print("Number of documents:", document_store.count_documents()) + +# ------------------------------------------------------------------------------------ +# Create the retrieval pipeline and try a query +question = "What is Milvus?" + +retrieval_pipeline = Pipeline() +retrieval_pipeline.add_component("embedder", SentenceTransformersTextEmbedder()) +retrieval_pipeline.add_component("retriever", MilvusEmbeddingRetriever(document_store=document_store, top_k=3)) +retrieval_pipeline.connect("embedder", "retriever") + +retrieval_results = retrieval_pipeline.run({"embedder": {"text": question}}) + +for doc in retrieval_results["retriever"]["documents"]: + print(doc.content) + print("-" * 10) + +# ------------------------------------------------------------------------------------ +# Create the RAG pipeline and try a query +from haystack.utils import Secret +from haystack.components.embedders import SentenceTransformersTextEmbedder +from haystack.components.builders import PromptBuilder +from haystack.components.generators import OpenAIGenerator + +prompt_template = """Answer the following query based on the provided context. If the context does + not include an answer, reply with 'I don't know'.\n + Query: {{query}} + Documents: + {% for doc in documents %} + {{ doc.content }} + {% endfor %} + Answer: + """ + +rag_pipeline = Pipeline() +rag_pipeline.add_component("text_embedder", SentenceTransformersTextEmbedder()) +rag_pipeline.add_component("retriever", MilvusEmbeddingRetriever(document_store=document_store, top_k=3)) +rag_pipeline.add_component("prompt_builder", PromptBuilder(template=prompt_template)) +rag_pipeline.add_component("generator", OpenAIGenerator(api_key=Secret.from_token(os.getenv("OPENAI_API_KEY")), + generation_kwargs={"temperature": 0})) +rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding") +rag_pipeline.connect("retriever.documents", "prompt_builder.documents") +rag_pipeline.connect("prompt_builder", "generator") + +results = rag_pipeline.run( + { + "text_embedder": {"text": question}, + "prompt_builder": {"query": question}, + } +) +print('RAG answer:', results["generator"]["replies"][0]) + +``` + ## License `milvus-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. \ No newline at end of file