Skip to content

Commit

Permalink
formatting fix
Browse files Browse the repository at this point in the history
  • Loading branch information
KhoomeiK committed Mar 16, 2024
1 parent c2f5603 commit c0e218b
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 7 deletions.
9 changes: 8 additions & 1 deletion harambe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,11 @@
from .types import AsyncScraperType, ScrapeResult
from .utils import PlaywrightUtils

__all__ = ["ScrapeResult", "SDK", "PlaywrightUtils", "AsyncScraperType", "AsyncScraper", "PAGE_PDF_FILENAME"]
__all__ = [
"ScrapeResult",
"SDK",
"PlaywrightUtils",
"AsyncScraperType",
"AsyncScraper",
"PAGE_PDF_FILENAME",
]
9 changes: 7 additions & 2 deletions harambe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,16 @@ async def capture_pdf(
Capture the current page as a pdf and then apply some download handling logic
from the observer to transform to a usable URL
"""
await self.page.wait_for_timeout(1000) # Allow for some extra time for the page to load
await self.page.wait_for_timeout(
1000
) # Allow for some extra time for the page to load
pdf_content = await self.page.pdf()
file_name = PAGE_PDF_FILENAME
res = await asyncio.gather(
*[o.on_download(self.page.url, file_name, pdf_content) for o in self._observers]
*[
o.on_download(self.page.url, file_name, pdf_content)
for o in self._observers
]
)
return res[0]

Expand Down
16 changes: 12 additions & 4 deletions harambe/observer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ async def on_queue_url(self, url: URL, context: Dict[str, Any]) -> None:
raise NotImplementedError()

@abstractmethod
async def on_download(self, download_url: str, filename: str, content: bytes) -> "DownloadMeta":
async def on_download(
self, download_url: str, filename: str, content: bytes
) -> "DownloadMeta":
raise NotImplementedError()


Expand All @@ -29,7 +31,9 @@ async def on_save_data(self, data: Dict[str, Any]):
async def on_queue_url(self, url: URL, context: Dict[str, Any]) -> None:
print(f"Enqueuing: {url} with context {context}")

async def on_download(self, download_url: str, filename: str, content: bytes) -> "DownloadMeta":
async def on_download(
self, download_url: str, filename: str, content: bytes
) -> "DownloadMeta":
print(f"Downloading file: {filename}") # TODO: use logger
return {
"url": f"{download_url}/{quote(filename)}",
Expand All @@ -47,7 +51,9 @@ async def on_save_data(self, data: Dict[str, Any]):
async def on_queue_url(self, url: URL, context: Dict[str, Any]) -> None:
self._tracker.save_data({"url": url, "context": context})

async def on_download(self, download_url: str, filename: str, content: bytes) -> "DownloadMeta":
async def on_download(
self, download_url: str, filename: str, content: bytes
) -> "DownloadMeta":
data = {
"url": f"{download_url}/{quote(filename)}",
"filename": filename,
Expand All @@ -68,7 +74,9 @@ async def on_save_data(self, data: Dict[str, Any]):
async def on_queue_url(self, url: URL, context: Dict[str, Any]) -> None:
self._urls.append((url, context))

async def on_download(self, download_url: str, filename: str, content: bytes) -> "DownloadMeta":
async def on_download(
self, download_url: str, filename: str, content: bytes
) -> "DownloadMeta":
data = {
"url": f"{download_url}/{quote(filename)}",
"filename": filename,
Expand Down

0 comments on commit c0e218b

Please sign in to comment.