diff --git a/core/pyproject.toml b/core/pyproject.toml index 7a3d917..b9adfbf 100644 --- a/core/pyproject.toml +++ b/core/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "harambe-core" -version = "0.44.0" +version = "0.44.1" description = "Core types for harambe SDK 🐒🍌" authors = [ { name = "Adam Watkins", email = "adam@reworkd.ai" } diff --git a/core/uv.lock b/core/uv.lock index b880486..c2c2c1a 100644 --- a/core/uv.lock +++ b/core/uv.lock @@ -141,7 +141,7 @@ wheels = [ [[package]] name = "harambe-core" -version = "0.43.1" +version = "0.44.1" source = { virtual = "." } dependencies = [ { name = "dateparser" }, diff --git a/sdk/harambe/core.py b/sdk/harambe/core.py index 93256c0..cb9d4bd 100644 --- a/sdk/harambe/core.py +++ b/sdk/harambe/core.py @@ -272,7 +272,7 @@ async def capture_html( :return: HTMLMetadata containing download URL, HTML content and inner text. :raises ValueError: If the specified selector doesn't match any element. """ - html, inner_text = await self._get_html(selector, exclude_selectors or []) + html, text = await self._get_html(selector, exclude_selectors or []) downloads = await self._notify_observers( method="on_download", @@ -286,7 +286,7 @@ async def capture_html( "url": downloads[0]["url"], "filename": downloads[0]["filename"], "html": html, - "inner_text": inner_text, + "text": text, } async def _get_html( @@ -303,9 +303,9 @@ async def _get_html( for selector in exclude_selectors: for element_to_remove in soup.select(selector): element_to_remove.decompose() - inner_text = soup.get_text(separator="\n", strip=True) + text = soup.get_text(separator="\n", strip=True) - return str(soup), inner_text + return str(soup), text async def capture_pdf( self, diff --git a/sdk/harambe/observer.py b/sdk/harambe/observer.py index b955400..8f044bc 100644 --- a/sdk/harambe/observer.py +++ b/sdk/harambe/observer.py @@ -33,7 +33,7 @@ class DownloadMeta(TypedDict): class HTMLMetadata(DownloadMeta): html: str - inner_text: str + text: str @runtime_checkable diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml index b02720a..bc05d5b 100644 --- a/sdk/pyproject.toml +++ b/sdk/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "harambe-sdk" -version = "0.44.0" +version = "0.44.1" description = "Data extraction SDK for Playwright 🐒🍌" authors = [ { name = "Adam Watkins", email = "adam@reworkd.ai" } diff --git a/sdk/test/test_e2e.py b/sdk/test/test_e2e.py index a85d8f5..d3aab77 100644 --- a/sdk/test/test_e2e.py +++ b/sdk/test/test_e2e.py @@ -462,7 +462,7 @@ async def scraper(sdk: SDK, *args, **kwargs): doc_data = observer.data[0] assert "