Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

Wikipedia tools should return text, not documents (Tools aren't DataLoaders!) #828

Merged
merged 3 commits into from
Jan 4, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 19 additions & 25 deletions llama_hub/tools/wikipedia/base.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,40 @@
"""Wikipedia tool spec."""

from typing import Any, List
from typing import Any, Dict

from llama_index.readers.schema.base import Document
from llama_index.tools.tool_spec.base import BaseToolSpec


class WikipediaToolSpec(BaseToolSpec):
"""Wikipedia tool spec.

Currently a simple wrapper around the data loader.

"""
Specifies two tools for querying information from Wikipedia.
"""

spec_functions = ["load_data", "search_data"]

def load_data(
self, pages: List[str], lang: str = "en", **load_kwargs: Any
) -> List[Document]:
"""Load data from wikipedia.
self, page: str, lang: str = "en", **load_kwargs: Dict[str, Any]
) -> str:
"""
Retrieve a Wikipedia page. Useful for learning about a particular concept that isn't private information.

Args:
pages (List[str]): List of pages to read.
lang (str): language of wikipedia texts (default English)
page (str): Title of the page to read.
lang (str): Language of Wikipedia to read. (default: English)
"""
import wikipedia
from wikipedia import PageError

wikipedia.set_lang(lang)
try:
results = []
for page in pages:
wikipedia.set_lang(lang)
page_content = wikipedia.page(
page, **load_kwargs, auto_suggest=False
).content
results.append(Document(text=page_content))
return results
except PageError:
wikipedia_page = wikipedia.page(page, **load_kwargs, auto_suggest=False)
except wikipedia.PageError:
return "Unable to load page. Try searching instead."
return wikipedia_page.content

def search_data(self, query: str, lang: str = "en") -> List[Document]:
"""Searchs wikipedia for pages related to a query. Use this endpoint when load_data returns no results.
def search_data(self, query: str, lang: str = "en") -> str:
"""
Search Wikipedia for a page related to the given query.
Use this tool when `load_data` returns no results.

Args:
query (str): the string to search for
Expand All @@ -49,5 +43,5 @@ def search_data(self, query: str, lang: str = "en") -> List[Document]:

pages = wikipedia.search(query)
if len(pages) == 0:
return "Unable to find any details on this search"
return self.load_data(pages, lang)
return "No search results."
return self.load_data(pages[0], lang)