This repository has been archived by the owner on Mar 1, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 738
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into loader-894/ArangoDB-integration
- Loading branch information
Showing
16 changed files
with
999 additions
and
601 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# Couchbase Loader | ||
|
||
This loader loads documents from Couchbase cluster. | ||
The user specifies a Couchbase client or credentials to initialize the reader. They can specify the SQL++ query to | ||
fetch the relevant docs. | ||
|
||
## Usage | ||
|
||
Here's an example usage of the CouchbaseReader. | ||
|
||
```python | ||
from llama_index import download_loader | ||
import os | ||
|
||
CouchbaseLoader = download_loader('CouchbaseReader') | ||
|
||
connection_string = "couchbase://localhost" # valid Couchbase connection string | ||
db_username = "<valid_database_user_with_read_access_to_bucket_with_data>" | ||
db_password = "<password_for_database_user>" | ||
|
||
# query is a valid SQL++ query that is passed to client.query() | ||
query = """ | ||
SELECT h.* FROM `travel-sample`.inventory.hotel h | ||
WHERE h.country = 'United States' | ||
LIMIT 5 | ||
""" | ||
|
||
reader = CouchbaseLoader( | ||
connection_string=connection_string, | ||
db_username=db_username, | ||
db_password=db_password | ||
) | ||
|
||
# It is also possible to pass an initialized Couchbase client to the document loader | ||
# from couchbase.auth import PasswordAuthenticator # noqa: E402 | ||
# from couchbase.cluster import Cluster # noqa: E402 | ||
# from couchbase.options import ClusterOptions # noqa: E402 | ||
|
||
# auth = PasswordAuthenticator( | ||
# db_username, | ||
# db_password, | ||
# ) | ||
|
||
# couchbase_client = Cluster(connection_string, ClusterOptions(auth)) | ||
# reader = CouchbaseLoader(client=couchbase_client) | ||
|
||
# fields to be written to the document | ||
text_fields=["name", "title", "address", "reviews"] | ||
|
||
# metadata fields to be written to the document's metadata | ||
metadata_fields=["country", "city"], | ||
|
||
documents = reader.load_data(query=query, text_fields=text_fields, metadata_fields=metadata_fields) | ||
``` | ||
|
||
This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/run-llama/llama_index/tree/main/llama_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/run-llama/llama-hub/tree/main) for examples. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
"""Init file.""" | ||
from llama_hub.couchbase.base import ( | ||
CouchbaseReader, | ||
) | ||
|
||
__all__ = ["CouchbaseReader"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
"""Couchbase document loader""" | ||
|
||
from typing import Any, Iterable, List, Optional | ||
from llama_index.readers.base import BaseReader | ||
from llama_index.readers.schema.base import Document | ||
|
||
|
||
class CouchbaseReader(BaseReader): | ||
"""Couchbase document loader. | ||
Loads data from a Couchbase cluster into Document used by LlamaIndex. | ||
Args: | ||
client(Optional[Any]): A Couchbase client to use. | ||
If not provided, the client will be created based on the connection_string | ||
and database credentials. | ||
connection_string (Optional[str]): The connection string to the Couchbase cluster. | ||
db_username (Optional[str]): The username to connect to the Couchbase cluster. | ||
db_password (Optional[str]): The password to connect to the Couchbase cluster. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
client: Optional[Any] = None, | ||
connection_string: Optional[str] = None, | ||
db_username: Optional[str] = None, | ||
db_password: Optional[str] = None, | ||
) -> None: | ||
"""Initialize Couchbase document loader.""" | ||
import_err_msg = "`couchbase` package not found, please run `pip install --upgrade couchbase`" | ||
try: | ||
from couchbase.auth import PasswordAuthenticator | ||
from couchbase.cluster import Cluster | ||
from couchbase.options import ClusterOptions | ||
except ImportError: | ||
raise ImportError(import_err_msg) | ||
|
||
if not client: | ||
if not connection_string or not db_username or not db_password: | ||
raise ValueError( | ||
"You need to pass either a couchbase client or connection_string and credentials must be provided." | ||
) | ||
else: | ||
auth = PasswordAuthenticator( | ||
db_username, | ||
db_password, | ||
) | ||
|
||
self._client: Cluster = Cluster(connection_string, ClusterOptions(auth)) | ||
else: | ||
self._client = client | ||
|
||
def lazy_load_data( | ||
self, | ||
query: str, | ||
text_fields: Optional[List[str]] = None, | ||
metadata_fields: Optional[List[str]] = [], | ||
) -> Iterable[Document]: | ||
"""Load data from the Couchbase cluster lazily. | ||
Args: | ||
query (str): The SQL++ query to execute. | ||
text_fields (Optional[List[str]]): The columns to write into the | ||
`text` field of the document. By default, all columns are | ||
written. | ||
metadata_fields (Optional[List[str]]): The columns to write into the | ||
`metadata` field of the document. By default, no columns are written. | ||
""" | ||
from datetime import timedelta | ||
|
||
if not query: | ||
raise ValueError("Query must be provided.") | ||
|
||
# Ensure connection to Couchbase cluster | ||
self._client.wait_until_ready(timedelta(seconds=5)) | ||
|
||
# Run SQL++ Query | ||
result = self._client.query(query) | ||
for row in result: | ||
if not text_fields: | ||
text_fields = list(row.keys()) | ||
|
||
metadata = {field: row[field] for field in metadata_fields} | ||
|
||
document = "\n".join( | ||
f"{k}: {v}" for k, v in row.items() if k in text_fields | ||
) | ||
|
||
yield (Document(text=document, metadata=metadata)) | ||
|
||
def load_data( | ||
self, | ||
query: str, | ||
text_fields: Optional[List[str]] = None, | ||
metadata_fields: Optional[List[str]] = None, | ||
) -> List[Document]: | ||
"""Load data from the Couchbase cluster. | ||
Args: | ||
query (str): The SQL++ query to execute. | ||
text_fields (Optional[List[str]]): The columns to write into the | ||
`text` field of the document. By default, all columns are | ||
written. | ||
metadata_fields (Optional[List[str]]): The columns to write into the | ||
`metadata` field of the document. By default, no columns are written. | ||
""" | ||
return list(self.lazy_load_data(query, text_fields, metadata_fields)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
couchbase |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# Exa (formerly Metaphor) Tool | ||
|
||
This tool connects to [Exa](https://exa.ai/) to easily enable | ||
your agent to search and get HTML content from the Internet. | ||
|
||
To begin, you need to obtain an API key on the [Exa developer dashboard](https://dashboard.exa.ai). | ||
|
||
## Usage | ||
|
||
This tool has more a extensive example usage documented in a Jupyter notebook [here](https://github.com/emptycrown/llama-hub/tree/main/llama_hub/tools/notebooks/exa.ipynb) | ||
|
||
Here's an example usage of the ExaToolSpec. | ||
|
||
```python | ||
from llama_hub.tools.exa import ExaToolSpec | ||
from llama_index.agent import OpenAIAgent | ||
|
||
exa_tool = ExaToolSpec( | ||
api_key='your-key', | ||
) | ||
agent = OpenAIAgent.from_tools(exa_tool.to_tool_list()) | ||
|
||
agent.chat('Can you summarize the news published in the last month on superconductors') | ||
``` | ||
|
||
`search`: Search for a list of articles relating to a natural language query | ||
`retrieve_documents`: Retrieve a list of documents returned from `exa_search`. | ||
`search_and_retrieve_documents`: Combines search and retrieve_documents to directly return a list of documents related to a search | ||
`find_similar`: Find similar documents to a given URL. | ||
`current_date`: Utility for the Agent to get todays date | ||
|
||
This loader is designed to be used as a way to load data as a Tool in a Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
## init | ||
from llama_hub.tools.exa.base import ( | ||
ExaToolSpec, | ||
) | ||
|
||
__all__ = ["ExaToolSpec"] |
Oops, something went wrong.