NODRIVER#

+
+

CLICK HERE FOR DOCS#

This package provides next level webscraping and browser automation using a relatively simple interface.

    @@ -284,7 +286,7 @@

    NODRIVER to fully customizable everything using the entire array of CDP domains, methods and events available.

    -

    Some features#

    +

    Some features#

    • A blazing fast undetected chrome (-ish) automation library

    • No chromedriver binary or Selenium dependency

    • @@ -303,7 +305,7 @@

      Some features -

      Installation#

      +

      Installation#

      Since it’s a part of undetected-chromedriver, installation goes via

      pip install undetected-chromedriver
       
      @@ -316,7 +318,7 @@

      Installation -

      usage example#

      +

      usage example#

      The aim of this project (just like undetected-chromedriver, somewhere long ago) is to keep it short and simple, so you can quickly open an editor or interactive session, type or paste a few lines and off you go.

      @@ -483,6 +485,7 @@

      Installation
      • NODRIVER
          +
        • CLICK HERE FOR DOCS
          • Some features
            • Installation
            • usage example
            • @@ -530,6 +534,8 @@

              Installation -### Installation +#### Installation Since it’s a part of undetected-chromedriver, installation goes via @@ -78,7 +80,7 @@ pip install nodriver -### usage example +#### usage example The aim of this project (just like undetected-chromedriver, somewhere long ago) is to keep it short and simple, so you can quickly open an editor or interactive session, diff --git a/docs/readme.rst b/docs/readme.rst index 394323e..39cc8ad 100644 --- a/docs/readme.rst +++ b/docs/readme.rst @@ -4,6 +4,8 @@ NODRIVER ################## +`CLICK HERE FOR DOCS `_ +____ **This package provides next level webscraping and browser automation using a relatively simple interface.** diff --git a/example/fetch_domain.py b/example/fetch_domain.py new file mode 100644 index 0000000..533e05e --- /dev/null +++ b/example/fetch_domain.py @@ -0,0 +1,45 @@ +try: + from nodriver import * +except (ModuleNotFoundError, ImportError): + import sys, os + sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + from nodriver import * + +import logging +logging.basicConfig(level=logging.INFO) + + +async def request_handler(ev: cdp.fetch.RequestPaused, tab: Tab): + print('\nRequestPaused handler\n', ev, type(ev)) + print('TAB = ', tab) + tab.feed_cdp(cdp.fetch.continue_request(request_id=ev.request_id)) + + +async def main(): + + browser = await start() + + [await browser.get( + 'https://www.google.com', + new_window=True) + for _ in range(10)] + + for tab in browser: + print(tab) + tab.add_handler(cdp.fetch.RequestPaused, request_handler) + await tab.send(cdp.fetch.enable()) + + for tab in browser: + await tab + + for tab in browser: + await tab.activate() + + for tab in reversed(browser): + await tab.activate() + await tab.close() + + browser.stop() + + +browser = loop().run_until_complete(main()) diff --git a/nodriver/core/browser.py b/nodriver/core/browser.py index 2f030af..d7e1670 100644 --- a/nodriver/core/browser.py +++ b/nodriver/core/browser.py @@ -526,6 +526,9 @@ def __iter__(self): self._i = self.tabs.index(self.main_tab) return self + def __reversed__(self): + return reversed(list(self.tabs)) + def __next__(self): try: return self.tabs[self._i] diff --git a/nodriver/core/connection.py b/nodriver/core/connection.py index 104f848..6efc6cc 100644 --- a/nodriver/core/connection.py +++ b/nodriver/core/connection.py @@ -255,6 +255,7 @@ def add_handler( :type event_type_or_domain: :param handler: :type handler: + :return: :rtype: """ @@ -317,6 +318,22 @@ async def sleep(self, t: Union[int, float] = 0.25): await self.update_target() await asyncio.sleep(t) + def feed_cdp(self, cdp_obj): + """ + used in specific cases, mostly during cdp.fetch.RequestPaused events, + in which the browser literally blocks. using feed_cdp you can issue + a response without a blocking "await". + + note: this method won't cause a response. + note: this is not an async method, just a regular method! + + :param cdp_obj: + :type cdp_obj: + :return: + :rtype: + """ + asyncio.ensure_future(self.send(cdp_obj)) + async def wait(self, t: Union[int, float] = None): """ waits until the event listener reports idle (no new events received in certain timespan). @@ -415,6 +432,7 @@ async def send( except Exception: await self.aclose() + # async def _register_handlers(self): """ ensure that for current (event) handlers, the corresponding @@ -554,7 +572,6 @@ async def listener_loop(self): # thanks to zxsleebu for discovering the memory leak # pop to prevent memory leaks - tx = self.connection.mapper.pop(message["id"]) logger.debug("got answer for %s (message_id:%d)", tx, message["id"]) @@ -590,9 +607,15 @@ async def listener_loop(self): for callback in callbacks: try: if iscoroutinefunction(callback) or iscoroutine(callback): - await callback(event) + try: + await callback(event, self.connection) + except TypeError: + await callback(event) else: - callback(event) + try: + callback(event, self.connection) + except TypeError: + callback(event) except Exception as e: logger.warning( "exception in callback %s for event %s => %s", diff --git a/pyproject.toml b/pyproject.toml index 5065732..a4c2f4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "nodriver" # Required -version = "0.35" # Required +version = "0.36" # Required description = """ @@ -51,10 +51,6 @@ maintainers = [ {name = "UltrafunkAmsterdam", email = "doesnotexist@ultrafunk.nl" } # Optional ] classifiers = [ # Optional - # How mature is this project? Common values are - # 3 - Alpha - # 4 - Beta - # 5 - Production/Stable "Development Status :: 3 - Alpha", "Intended Audience :: Developers",