From c0e218b49aa8e050c38557a10b2b314d3095637c Mon Sep 17 00:00:00 2001 From: khoomeik <32777448+KhoomeiK@users.noreply.github.com> Date: Fri, 15 Mar 2024 17:19:25 -0700 Subject: [PATCH] formatting fix --- harambe/__init__.py | 9 ++++++++- harambe/core.py | 9 +++++++-- harambe/observer.py | 16 ++++++++++++---- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/harambe/__init__.py b/harambe/__init__.py index c8c0543..3b8988b 100644 --- a/harambe/__init__.py +++ b/harambe/__init__.py @@ -2,4 +2,11 @@ from .types import AsyncScraperType, ScrapeResult from .utils import PlaywrightUtils -__all__ = ["ScrapeResult", "SDK", "PlaywrightUtils", "AsyncScraperType", "AsyncScraper", "PAGE_PDF_FILENAME"] +__all__ = [ + "ScrapeResult", + "SDK", + "PlaywrightUtils", + "AsyncScraperType", + "AsyncScraper", + "PAGE_PDF_FILENAME", +] diff --git a/harambe/core.py b/harambe/core.py index c5ce950..35bca23 100644 --- a/harambe/core.py +++ b/harambe/core.py @@ -188,11 +188,16 @@ async def capture_pdf( Capture the current page as a pdf and then apply some download handling logic from the observer to transform to a usable URL """ - await self.page.wait_for_timeout(1000) # Allow for some extra time for the page to load + await self.page.wait_for_timeout( + 1000 + ) # Allow for some extra time for the page to load pdf_content = await self.page.pdf() file_name = PAGE_PDF_FILENAME res = await asyncio.gather( - *[o.on_download(self.page.url, file_name, pdf_content) for o in self._observers] + *[ + o.on_download(self.page.url, file_name, pdf_content) + for o in self._observers + ] ) return res[0] diff --git a/harambe/observer.py b/harambe/observer.py index 3e86c3c..b3564e2 100644 --- a/harambe/observer.py +++ b/harambe/observer.py @@ -17,7 +17,9 @@ async def on_queue_url(self, url: URL, context: Dict[str, Any]) -> None: raise NotImplementedError() @abstractmethod - async def on_download(self, download_url: str, filename: str, content: bytes) -> "DownloadMeta": + async def on_download( + self, download_url: str, filename: str, content: bytes + ) -> "DownloadMeta": raise NotImplementedError() @@ -29,7 +31,9 @@ async def on_save_data(self, data: Dict[str, Any]): async def on_queue_url(self, url: URL, context: Dict[str, Any]) -> None: print(f"Enqueuing: {url} with context {context}") - async def on_download(self, download_url: str, filename: str, content: bytes) -> "DownloadMeta": + async def on_download( + self, download_url: str, filename: str, content: bytes + ) -> "DownloadMeta": print(f"Downloading file: {filename}") # TODO: use logger return { "url": f"{download_url}/{quote(filename)}", @@ -47,7 +51,9 @@ async def on_save_data(self, data: Dict[str, Any]): async def on_queue_url(self, url: URL, context: Dict[str, Any]) -> None: self._tracker.save_data({"url": url, "context": context}) - async def on_download(self, download_url: str, filename: str, content: bytes) -> "DownloadMeta": + async def on_download( + self, download_url: str, filename: str, content: bytes + ) -> "DownloadMeta": data = { "url": f"{download_url}/{quote(filename)}", "filename": filename, @@ -68,7 +74,9 @@ async def on_save_data(self, data: Dict[str, Any]): async def on_queue_url(self, url: URL, context: Dict[str, Any]) -> None: self._urls.append((url, context)) - async def on_download(self, download_url: str, filename: str, content: bytes) -> "DownloadMeta": + async def on_download( + self, download_url: str, filename: str, content: bytes + ) -> "DownloadMeta": data = { "url": f"{download_url}/{quote(filename)}", "filename": filename,