Skip to content

Commit

Permalink
backend: Fix web scrape issue with results format (#878)
Browse files Browse the repository at this point in the history
Fix web scrape issue
  • Loading branch information
tianjing-li authored Dec 13, 2024
1 parent f370c46 commit 925f3c2
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions src/backend/tools/web_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,14 @@ async def call(

async def handle_response(self, response: aiohttp.ClientResponse, url: str):
content_type = response.headers.get("content-type")
results = []

# If URL is a PDF, read contents using helper function
if "application/pdf" in content_type:
return {
results.append({
"text": read_pdf(response.content),
"url": url,
}
})
elif "text/html" in content_type:
content = await response.text()
soup = BeautifulSoup(content, "html.parser")
Expand All @@ -98,6 +99,8 @@ async def handle_response(self, response: aiohttp.ClientResponse, url: str):
if title:
data["title"] = title

return data
results.append(data)
else:
raise ValueError(f"Unsupported Content Type using web scrape: {content_type}")

return results

0 comments on commit 925f3c2

Please sign in to comment.