Skip to content

Commit

Permalink
✨ Download
Browse files Browse the repository at this point in the history
  • Loading branch information
asim-shrestha committed Mar 5, 2024
1 parent 869673d commit e1e1787
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
23 changes: 23 additions & 0 deletions harambe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
)
from playwright_stealth import stealth_async

from harambe.downloader import LoggingDownloader, DownloadMeta, Downloader
from harambe.handlers import (
ResourceRequestHandler,
ResourceType,
Expand Down Expand Up @@ -48,6 +49,7 @@ def __init__(
observer: Optional[Union[OutputObserver, List[OutputObserver]]] = None,
scraper: Optional[AsyncScraperType] = None,
context: Optional[Context] = None,
downloader: Optional[Downloader] = None,
):
self.page = page
self._id = run_id or uuid.uuid4()
Expand All @@ -64,6 +66,10 @@ def __init__(

self._observers = observer

if not downloader:
downloader = LoggingDownloader()
self._downloader = downloader

async def save_data(self, *data: ScrapeResult) -> None:
"""
Save scraped data. This will be passed to the on_save_data callback.
Expand Down Expand Up @@ -148,6 +154,22 @@ async def capture_url(

return handler.captured_url()

async def capture_download(
self,
clickable: ElementHandle,
) -> DownloadMeta:
"""
Capture the download of a click event. This will click the element, download the resulting file
and apply some download handling logic from the download_handler
"""

async with self.page.expect_download() as download_info:
await clickable.click()
download = await download_info.value
await download.path() # This will wait for the download to complete

return await self._downloader.on_download(download)

@staticmethod
async def run(
scraper: AsyncScraperType,
Expand Down Expand Up @@ -199,6 +221,7 @@ async def run(
context,
)
except Exception as e:
# TODO: Fix path for non Mr. Watkins
await ctx.tracing.stop(
path="/Users/awtkns/PycharmProjects/harambe-public/trace.zip"
)
Expand Down
19 changes: 19 additions & 0 deletions harambe/downloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from typing import Protocol, TypedDict

from playwright.async_api import Download


class DownloadMeta(TypedDict):
url: str
filename: str


class Downloader(Protocol):
async def on_download(self, download: Download) -> DownloadMeta:
raise NotImplementedError()


class LoggingDownloader(Downloader):
async def on_download(self, download: Download) -> DownloadMeta:
print("on_download", download) # TODO: use logger
return {"url": str(await download.path()), "filename": download.suggested_filename}

0 comments on commit e1e1787

Please sign in to comment.