diff --git a/src/backend/tools/web_scrape.py b/src/backend/tools/web_scrape.py index 6c87e5421e..849ac606ed 100644 --- a/src/backend/tools/web_scrape.py +++ b/src/backend/tools/web_scrape.py @@ -76,13 +76,14 @@ async def call( async def handle_response(self, response: aiohttp.ClientResponse, url: str): content_type = response.headers.get("content-type") + results = [] # If URL is a PDF, read contents using helper function if "application/pdf" in content_type: - return { + results.append({ "text": read_pdf(response.content), "url": url, - } + }) elif "text/html" in content_type: content = await response.text() soup = BeautifulSoup(content, "html.parser") @@ -98,6 +99,8 @@ async def handle_response(self, response: aiohttp.ClientResponse, url: str): if title: data["title"] = title - return data + results.append(data) else: raise ValueError(f"Unsupported Content Type using web scrape: {content_type}") + + return results