diff --git a/harambe/core.py b/harambe/core.py index 30e2620..cf80f4f 100644 --- a/harambe/core.py +++ b/harambe/core.py @@ -175,7 +175,7 @@ async def capture_download( res = await asyncio.gather( *[ - o.on_download(download.suggested_filename, content) + o.on_download(download.url, download.suggested_filename, content) for o in self._observers ] ) @@ -189,9 +189,9 @@ async def capture_pdf( from the observer to transform to a usable URL """ pdf_content = await self.page.pdf() - file_name = f"{self.page.url}-screen.pdf" + file_name = f"reworkd_page_snapshot.pdf" res = await asyncio.gather( - *[o.on_download(file_name, pdf_content) for o in self._observers] + *[o.on_download(self.page.url, file_name, pdf_content) for o in self._observers] ) return res[0] diff --git a/harambe/observer.py b/harambe/observer.py index c1fd721..3e86c3c 100644 --- a/harambe/observer.py +++ b/harambe/observer.py @@ -1,5 +1,6 @@ from abc import abstractmethod from typing import Any, Dict, List, Protocol, Tuple, runtime_checkable, TypedDict +from urllib.parse import quote from harambe.tracker import FileDataTracker from harambe.types import URL, Context, Stage @@ -31,7 +32,7 @@ async def on_queue_url(self, url: URL, context: Dict[str, Any]) -> None: async def on_download(self, download_url: str, filename: str, content: bytes) -> "DownloadMeta": print(f"Downloading file: {filename}") # TODO: use logger return { - "url": f"{download_url}/{filename}", + "url": f"{download_url}/{quote(filename)}", "filename": filename, } @@ -48,7 +49,7 @@ async def on_queue_url(self, url: URL, context: Dict[str, Any]) -> None: async def on_download(self, download_url: str, filename: str, content: bytes) -> "DownloadMeta": data = { - "url": f"{download_url}/{filename}", + "url": f"{download_url}/{quote(filename)}", "filename": filename, } self._tracker.save_data(data) @@ -69,7 +70,7 @@ async def on_queue_url(self, url: URL, context: Dict[str, Any]) -> None: async def on_download(self, download_url: str, filename: str, content: bytes) -> "DownloadMeta": data = { - "url": f"{download_url}/{filename}", + "url": f"{download_url}/{quote(filename)}", "filename": filename, } self._files.append((filename, content)) diff --git a/pyproject.toml b/pyproject.toml index 053fe31..0a226b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "harambe-sdk" -version = "0.8.2" +version = "0.8.3" description = "Data extraction SDK for Playwright 🐒🍌" authors = ["awtkns "] readme = "README.md"