This repository has been archived by the owner on Mar 1, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 738
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into 894/ArangoDB-integration
- Loading branch information
Showing
19 changed files
with
883 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# XML Loader | ||
|
||
This loader extracts the text from a local XML file. A single local file is passed in each time you call `load_data`. | ||
|
||
## Usage | ||
|
||
To use this loader, you need to pass in a `Path` to a local file. | ||
|
||
```python | ||
from pathlib import Path | ||
from llama_index import download_loader | ||
|
||
XMLReader = download_loader("XMLReader") | ||
|
||
loader = XMLReader() | ||
documents = loader.load_data(file=Path('../example.xml')) | ||
``` | ||
|
||
This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/run-llama/llama_index/tree/main/llama_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/run-llama/llama-hub/tree/main/llama_hub) for examples. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
"""Init file.""" | ||
from llama_hub.file.xml.base import ( | ||
XMLReader, | ||
) | ||
|
||
__all__ = ["XMLReader"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
"""JSON Reader.""" | ||
|
||
import re | ||
from pathlib import Path | ||
from typing import Dict, List, Optional | ||
|
||
from llama_index.readers.base import BaseReader | ||
from llama_index.readers.schema.base import Document | ||
import xml.etree.ElementTree as ET | ||
|
||
|
||
def _get_leaf_nodes_up_to_level(root: ET.Element, level: int) -> List[ET.Element]: | ||
"""Get collection of nodes up to certain level including leaf nodes | ||
Args: | ||
root (ET.Element): XML Root Element | ||
level (int): Levels to traverse in the tree | ||
Returns: | ||
List[ET.Element]: List of target nodes | ||
""" | ||
|
||
def traverse(current_node, current_level): | ||
if len(current_node) == 0 or level == current_level: | ||
# Keep leaf nodes and target level nodes | ||
nodes.append(current_node) | ||
elif current_level < level: | ||
# Move to the next level | ||
for child in current_node: | ||
traverse(child, current_level + 1) | ||
|
||
nodes = [] | ||
traverse(root, 0) | ||
return nodes | ||
|
||
|
||
class XMLReader(BaseReader): | ||
"""XML reader. | ||
Reads XML documents with options to help suss out relationships between nodes. | ||
Args: | ||
tree_level_split (int): From which level in the xml tree we split documents, | ||
the default level is the root which is level 0 | ||
""" | ||
|
||
def __init__(self, tree_level_split: Optional[int] = 0) -> None: | ||
"""Initialize with arguments.""" | ||
super().__init__() | ||
self.tree_level_split = tree_level_split | ||
|
||
def _parse_xmlelt_to_document( | ||
self, root: ET.Element, extra_info: Optional[Dict] = None | ||
) -> List[Document]: | ||
"""Parse the xml object into a list of Documents. | ||
Args: | ||
root: The XML Element to be converted. | ||
extra_info (Optional[Dict]): Additional information. Default is None. | ||
Returns: | ||
Document: The documents. | ||
""" | ||
nodes = _get_leaf_nodes_up_to_level(root, self.tree_level_split) | ||
documents = [] | ||
for node in nodes: | ||
content = ET.tostring(node, encoding="utf8").decode("utf-8") | ||
content = re.sub(r"^<\?xml.*", "", content) | ||
content = content.strip() | ||
documents.append(Document(text=content, extra_info=extra_info or {})) | ||
|
||
return documents | ||
|
||
def load_data( | ||
self, | ||
file: Path, | ||
extra_info: Optional[Dict] = None, | ||
) -> List[Document]: | ||
"""Load data from the input file. | ||
Args: | ||
file (Path): Path to the input file. | ||
extra_info (Optional[Dict]): Additional information. Default is None. | ||
Returns: | ||
List[Document]: List of documents. | ||
""" | ||
if not isinstance(file, Path): | ||
file = Path(file) | ||
|
||
tree = ET.parse(file) | ||
documents = self._parse_xmlelt_to_document(tree.getroot(), extra_info) | ||
|
||
return documents |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1224,4 +1224,8 @@ | |
"id": "arangodb", | ||
"author": "mmaatouk" | ||
} | ||
"XMLReader": { | ||
"id": "file/xml", | ||
"author": "mmaatouk" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
24 changes: 24 additions & 0 deletions
24
llama_hub/llama_packs/query/rag_fusion_pipeline/example.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Required Environment Variables: OPENAI_API_KEY | ||
|
||
from pathlib import Path | ||
from llama_index import download_loader | ||
from llama_index.llama_pack import download_llama_pack | ||
from llama_index.llms.openai import OpenAI | ||
|
||
# download and install dependencies | ||
RAGFusionPipelinePack = download_llama_pack( | ||
"RAGFusionPipelinePack", "./rag_fusion_pipeline_pack" | ||
) | ||
PDFReader = download_loader("PDFReader") | ||
|
||
# load documents | ||
loader = PDFReader() | ||
document_path = Path("./data/101.pdf") # replace with your own document | ||
documents = loader.load_data(file=document_path) | ||
|
||
# create the pack | ||
pack = RAGFusionPipelinePack(documents, llm=OpenAI(model="gpt-3.5-turbo")) | ||
|
||
# run the pack | ||
response = pack.run(input="How to rewrite history?") | ||
print(response) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Required Environment Variables: OPENAI_API_KEY | ||
|
||
from llama_index.llama_dataset import download_llama_dataset | ||
from llama_index.llama_pack import download_llama_pack | ||
from llama_index import VectorStoreIndex | ||
|
||
# download a LabelledRagDataset from llama-hub | ||
rag_dataset, documents = download_llama_dataset( | ||
"PaulGrahamEssayDataset", "./paul_graham" | ||
) | ||
|
||
# build a basic RAG pipeline off of the source documents | ||
index = VectorStoreIndex.from_documents(documents=documents) | ||
query_engine = index.as_query_engine() | ||
|
||
# Time to benchmark/evaluate this RAG pipeline | ||
# Download and install dependencies | ||
RagEvaluatorPack = download_llama_pack("RagEvaluatorPack", "./rag_evaluator_pack") | ||
|
||
# construction requires a query_engine, a rag_dataset, and optionally a judge_llm | ||
rag_evaluator_pack = RagEvaluatorPack( | ||
query_engine=query_engine, rag_dataset=rag_dataset | ||
) | ||
|
||
# PERFORM EVALUATION | ||
benchmark_df = rag_evaluator_pack.run() # async arun() also supported | ||
print(benchmark_df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# Vanna AI LLamaPack | ||
|
||
Vanna AI is an open-source RAG framework for SQL generation. It works in two steps: | ||
1. Train a RAG model on your data | ||
2. Ask questions (use reference corpus to generate SQL queries that can run on your db). | ||
|
||
Check out the [Github project](https://github.com/vanna-ai/vanna) and the [docs](https://vanna.ai/docs/) for more details. | ||
|
||
This LlamaPack creates a simple `VannaQueryEngine` with vanna, ChromaDB and OpenAI, and allows you to train and ask questions over a SQL database. | ||
|
||
## CLI Usage | ||
|
||
You can download llamapacks directly using `llamaindex-cli`, which comes installed with the `llama-index` python package: | ||
|
||
```bash | ||
llamaindex-cli download-llamapack VannaPack --download-dir ./vanna_pack | ||
``` | ||
|
||
You can then inspect the files at `./vanna_pack` and use them as a template for your own project! | ||
|
||
## Code Usage | ||
|
||
You can download the pack to a `./vanna_pack` directory: | ||
|
||
```python | ||
from llama_index.llama_pack import download_llama_pack | ||
|
||
# download and install dependencies | ||
VannaPack = download_llama_pack( | ||
"VannaPack", "./vanna_pack" | ||
) | ||
``` | ||
|
||
From here, you can use the pack, or inspect and modify the pack in `./vanna_pack`. | ||
|
||
Then, you can set up the pack like so: | ||
|
||
```python | ||
pack = VannaPack( | ||
openai_api_key="<openai_api_key>", | ||
sql_db_url="chinook.db", | ||
openai_model="gpt-3.5-turbo" | ||
) | ||
``` | ||
|
||
The `run()` function is a light wrapper around `llm.complete()`. | ||
|
||
```python | ||
response = pack.run("List some sample albums") | ||
``` | ||
|
||
You can also use modules individually. | ||
|
||
```python | ||
query_engine = pack.get_modules()["vanna_query_engine"] | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""Init params.""" |
Oops, something went wrong.