Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

Commit

Permalink
Merge branch 'main' into 894/ArangoDB-integration
Browse files Browse the repository at this point in the history
  • Loading branch information
MarouaneMaatouk authored Jan 25, 2024
2 parents d988b14 + 57af091 commit fa2eb53
Show file tree
Hide file tree
Showing 19 changed files with 883 additions and 5 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# ChangeLog

## v[0.0.75] - 2024-01-20

### New Features

- add vanna pack (#889)

## v[0.0.74] - 2024-01-19

### New Features
Expand Down
19 changes: 19 additions & 0 deletions llama_hub/file/xml/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# XML Loader

This loader extracts the text from a local XML file. A single local file is passed in each time you call `load_data`.

## Usage

To use this loader, you need to pass in a `Path` to a local file.

```python
from pathlib import Path
from llama_index import download_loader

XMLReader = download_loader("XMLReader")

loader = XMLReader()
documents = loader.load_data(file=Path('../example.xml'))
```

This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/run-llama/llama_index/tree/main/llama_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/run-llama/llama-hub/tree/main/llama_hub) for examples.
6 changes: 6 additions & 0 deletions llama_hub/file/xml/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Init file."""
from llama_hub.file.xml.base import (
XMLReader,
)

__all__ = ["XMLReader"]
95 changes: 95 additions & 0 deletions llama_hub/file/xml/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""JSON Reader."""

import re
from pathlib import Path
from typing import Dict, List, Optional

from llama_index.readers.base import BaseReader
from llama_index.readers.schema.base import Document
import xml.etree.ElementTree as ET


def _get_leaf_nodes_up_to_level(root: ET.Element, level: int) -> List[ET.Element]:
"""Get collection of nodes up to certain level including leaf nodes
Args:
root (ET.Element): XML Root Element
level (int): Levels to traverse in the tree
Returns:
List[ET.Element]: List of target nodes
"""

def traverse(current_node, current_level):
if len(current_node) == 0 or level == current_level:
# Keep leaf nodes and target level nodes
nodes.append(current_node)
elif current_level < level:
# Move to the next level
for child in current_node:
traverse(child, current_level + 1)

nodes = []
traverse(root, 0)
return nodes


class XMLReader(BaseReader):
"""XML reader.
Reads XML documents with options to help suss out relationships between nodes.
Args:
tree_level_split (int): From which level in the xml tree we split documents,
the default level is the root which is level 0
"""

def __init__(self, tree_level_split: Optional[int] = 0) -> None:
"""Initialize with arguments."""
super().__init__()
self.tree_level_split = tree_level_split

def _parse_xmlelt_to_document(
self, root: ET.Element, extra_info: Optional[Dict] = None
) -> List[Document]:
"""Parse the xml object into a list of Documents.
Args:
root: The XML Element to be converted.
extra_info (Optional[Dict]): Additional information. Default is None.
Returns:
Document: The documents.
"""
nodes = _get_leaf_nodes_up_to_level(root, self.tree_level_split)
documents = []
for node in nodes:
content = ET.tostring(node, encoding="utf8").decode("utf-8")
content = re.sub(r"^<\?xml.*", "", content)
content = content.strip()
documents.append(Document(text=content, extra_info=extra_info or {}))

return documents

def load_data(
self,
file: Path,
extra_info: Optional[Dict] = None,
) -> List[Document]:
"""Load data from the input file.
Args:
file (Path): Path to the input file.
extra_info (Optional[Dict]): Additional information. Default is None.
Returns:
List[Document]: List of documents.
"""
if not isinstance(file, Path):
file = Path(file)

tree = ET.parse(file)
documents = self._parse_xmlelt_to_document(tree.getroot(), extra_info)

return documents
Empty file.
4 changes: 4 additions & 0 deletions llama_hub/library.json
Original file line number Diff line number Diff line change
Expand Up @@ -1224,4 +1224,8 @@
"id": "arangodb",
"author": "mmaatouk"
}
"XMLReader": {
"id": "file/xml",
"author": "mmaatouk"
}
}
14 changes: 11 additions & 3 deletions llama_hub/llama_packs/library.json
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@
"VoyageQueryEnginePack": {
"id": "llama_packs/voyage_query_engine",
"author": "Liuhong99",
"keywords": ["voyage", "query", "retrieval", "embeddings"]
"keywords": ["voyage", "query", "retrieval", "embeddings"],
"example": true
},
"VectaraRagPack": {
"id": "llama_packs/vectara_rag",
Expand Down Expand Up @@ -153,7 +154,8 @@
"RagEvaluatorPack": {
"id": "llama_packs/rag_evaluator",
"author": "nerdai",
"keywords": ["rag", "evaluation", "benchmarks"]
"keywords": ["rag", "evaluation", "benchmarks"],
"example": true
},
"LlamaDatasetMetadataPack": {
"id": "llama_packs/llama_dataset_metadata",
Expand Down Expand Up @@ -236,7 +238,8 @@
"RAGFusionPipelinePack": {
"id": "llama_packs/query/rag_fusion_pipeline",
"author": "jerryjliu",
"keywords": ["rag", "fusion", "pipeline", "query"]
"keywords": ["rag", "fusion", "pipeline", "query"],
"example": true
},
"AgentSearchRetrieverPack": {
"id": "llama_packs/agent_search_retriever",
Expand All @@ -262,5 +265,10 @@
"id": "llama_packs/stock_market_data_query_engine",
"author": "anoopshrma",
"keywords": ["stock", "market", "data", "query", "engine"]
},
"VannaPack": {
"id": "llama_packs/vanna",
"author": "jerryjliu",
"keywords": ["vanna", "sql", "ai", "text-to-sql"]
}
}
24 changes: 24 additions & 0 deletions llama_hub/llama_packs/query/rag_fusion_pipeline/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Required Environment Variables: OPENAI_API_KEY

from pathlib import Path
from llama_index import download_loader
from llama_index.llama_pack import download_llama_pack
from llama_index.llms.openai import OpenAI

# download and install dependencies
RAGFusionPipelinePack = download_llama_pack(
"RAGFusionPipelinePack", "./rag_fusion_pipeline_pack"
)
PDFReader = download_loader("PDFReader")

# load documents
loader = PDFReader()
document_path = Path("./data/101.pdf") # replace with your own document
documents = loader.load_data(file=document_path)

# create the pack
pack = RAGFusionPipelinePack(documents, llm=OpenAI(model="gpt-3.5-turbo"))

# run the pack
response = pack.run(input="How to rewrite history?")
print(response)
27 changes: 27 additions & 0 deletions llama_hub/llama_packs/rag_evaluator/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Required Environment Variables: OPENAI_API_KEY

from llama_index.llama_dataset import download_llama_dataset
from llama_index.llama_pack import download_llama_pack
from llama_index import VectorStoreIndex

# download a LabelledRagDataset from llama-hub
rag_dataset, documents = download_llama_dataset(
"PaulGrahamEssayDataset", "./paul_graham"
)

# build a basic RAG pipeline off of the source documents
index = VectorStoreIndex.from_documents(documents=documents)
query_engine = index.as_query_engine()

# Time to benchmark/evaluate this RAG pipeline
# Download and install dependencies
RagEvaluatorPack = download_llama_pack("RagEvaluatorPack", "./rag_evaluator_pack")

# construction requires a query_engine, a rag_dataset, and optionally a judge_llm
rag_evaluator_pack = RagEvaluatorPack(
query_engine=query_engine, rag_dataset=rag_dataset
)

# PERFORM EVALUATION
benchmark_df = rag_evaluator_pack.run() # async arun() also supported
print(benchmark_df)
56 changes: 56 additions & 0 deletions llama_hub/llama_packs/vanna/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Vanna AI LLamaPack

Vanna AI is an open-source RAG framework for SQL generation. It works in two steps:
1. Train a RAG model on your data
2. Ask questions (use reference corpus to generate SQL queries that can run on your db).

Check out the [Github project](https://github.com/vanna-ai/vanna) and the [docs](https://vanna.ai/docs/) for more details.

This LlamaPack creates a simple `VannaQueryEngine` with vanna, ChromaDB and OpenAI, and allows you to train and ask questions over a SQL database.

## CLI Usage

You can download llamapacks directly using `llamaindex-cli`, which comes installed with the `llama-index` python package:

```bash
llamaindex-cli download-llamapack VannaPack --download-dir ./vanna_pack
```

You can then inspect the files at `./vanna_pack` and use them as a template for your own project!

## Code Usage

You can download the pack to a `./vanna_pack` directory:

```python
from llama_index.llama_pack import download_llama_pack

# download and install dependencies
VannaPack = download_llama_pack(
"VannaPack", "./vanna_pack"
)
```

From here, you can use the pack, or inspect and modify the pack in `./vanna_pack`.

Then, you can set up the pack like so:

```python
pack = VannaPack(
openai_api_key="<openai_api_key>",
sql_db_url="chinook.db",
openai_model="gpt-3.5-turbo"
)
```

The `run()` function is a light wrapper around `llm.complete()`.

```python
response = pack.run("List some sample albums")
```

You can also use modules individually.

```python
query_engine = pack.get_modules()["vanna_query_engine"]
```
1 change: 1 addition & 0 deletions llama_hub/llama_packs/vanna/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Init params."""
Loading

0 comments on commit fa2eb53

Please sign in to comment.