From 1bec9a10af0acefba8ca8d5fdcc907ac4e3450dd Mon Sep 17 00:00:00 2001 From: Alexis Deprez Date: Tue, 1 Oct 2024 20:04:09 +0200 Subject: [PATCH] feat: add filtering for interactive elements --- .../lavague/drivers/playwright/base.py | 20 ++++++++++++- .../lavague/drivers/selenium/base.py | 15 ++++++++-- .../lavague/drivers/selenium/javascript.py | 7 +++-- lavague-sdk/lavague/sdk/base_driver.py | 28 +++++++++++++------ 4 files changed, 55 insertions(+), 15 deletions(-) diff --git a/lavague-integrations/drivers/lavague-drivers-playwright/lavague/drivers/playwright/base.py b/lavague-integrations/drivers/lavague-drivers-playwright/lavague/drivers/playwright/base.py index de41024a..3a96a3ea 100644 --- a/lavague-integrations/drivers/lavague-drivers-playwright/lavague/drivers/playwright/base.py +++ b/lavague-integrations/drivers/lavague-drivers-playwright/lavague/drivers/playwright/base.py @@ -308,18 +308,36 @@ def scroll_down(self): self.execute_script("window.scrollBy(0, window.innerHeight);") def get_possible_interactions( - self, in_viewport=True, foreground_only=True + self, + in_viewport=True, + foreground_only=True, + types: List[InteractionType] = [ + InteractionType.CLICK, + InteractionType.TYPE, + InteractionType.HOVER, + ], ) -> PossibleInteractionsByXpath: exe: Dict[str, List[str]] = self.execute_script( JS_GET_INTERACTIVES, in_viewport, foreground_only, + False, + [t.name for t in types], ) res = dict() for k, v in exe.items(): res[k] = set(InteractionType[i] for i in v) return res + def get_in_viewport(self): + res: Dict[str, List[str]] = self.driver.execute_script( + JS_GET_INTERACTIVES, + True, + True, + True, + ) + return list(res.keys()) + def get_capability(self) -> str: return """ You are a chrome extension and your goal is to interact with web pages. You have been given a series of HTML snippets and queries. diff --git a/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/base.py b/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/base.py index 39eced76..d40e5a2d 100644 --- a/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/base.py +++ b/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/base.py @@ -60,8 +60,8 @@ def __init__( get_selenium_driver: Optional[Callable[[], WebDriver]] = None, headless: bool = True, user_data_dir: Optional[str] = None, - width: Optional[int] = 1080, - height: Optional[int] = 1080, + width: Optional[int] = 1096, + height: Optional[int] = 1096, options: Optional[Options] = None, driver: Optional[WebDriver] = None, log_waiting_time=False, @@ -650,12 +650,21 @@ def highlight_nodes( ) def get_possible_interactions( - self, in_viewport=True, foreground_only=True + self, + in_viewport=True, + foreground_only=True, + types: List[InteractionType] = [ + InteractionType.CLICK, + InteractionType.TYPE, + InteractionType.HOVER, + ], ) -> PossibleInteractionsByXpath: exe: Dict[str, List[str]] = self.driver.execute_script( JS_GET_INTERACTIVES, in_viewport, foreground_only, + False, + [t.name for t in types], ) res = dict() for k, v in exe.items(): diff --git a/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/javascript.py b/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/javascript.py index c6cdd33b..d4f6f4c5 100644 --- a/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/javascript.py +++ b/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/javascript.py @@ -51,12 +51,13 @@ def get_highlighter_style(color: str = "red", label: bool = False): label.style.position = 'absolute'; label.style.backgroundColor = 'red'; label.style.color = 'white'; - label.style.padding = '0 4px'; + label.style.padding = '0px 6px 2px 4px'; label.style.top = '-12px'; label.style.left = '-12px'; - label.style['font-size'] = '13px'; + label.style['font-size'] = '13pt'; + label.style['font-weight'] = 'bold'; label.style['border-bottom-right-radius'] = '13px'; - label.textContent = i; + label.textContent = i + 1; bb.appendChild(label); """ return set_style diff --git a/lavague-sdk/lavague/sdk/base_driver.py b/lavague-sdk/lavague/sdk/base_driver.py index 2341f642..f7f76575 100644 --- a/lavague-sdk/lavague/sdk/base_driver.py +++ b/lavague-sdk/lavague/sdk/base_driver.py @@ -192,7 +192,14 @@ def get_screenshots_whole_page(self, max_screenshots=30) -> list[str]: @abstractmethod def get_possible_interactions( - self, in_viewport=True, foreground_only=True + self, + in_viewport=True, + foreground_only=True, + types: List[InteractionType] = [ + InteractionType.CLICK, + InteractionType.TYPE, + InteractionType.HOVER, + ], ) -> PossibleInteractionsByXpath: """Get elements that can be interacted with as a dictionary mapped by xpath""" pass @@ -314,8 +321,9 @@ def get_current_screenshot_folder(self) -> Path: def get_screenshot_as_png(self) -> bytes: pass + @abstractmethod def get_shadow_roots(self) -> Dict[str, str]: - return {} + pass def get_nodes(self, xpaths: List[str]) -> List["DOMNode"]: raise NotImplementedError("get_nodes not implemented") @@ -515,7 +523,7 @@ def js_wrap_function_call(fn: str): const windowHeight = (window.innerHeight || document.documentElement.clientHeight); const windowWidth = (window.innerWidth || document.documentElement.clientWidth); -return (function(inViewport, foregroundOnly, nonInteractives) { +return (function(inViewport, foregroundOnly, nonInteractives, filterTypes) { function getInteractions(e) { const tag = e.tagName.toLowerCase(); if (!e.checkVisibility() || (e.hasAttribute('disabled') && !nonInteractives) || e.hasAttribute('readonly') @@ -536,7 +544,7 @@ def js_wrap_function_call(fn: str): function hasEvent(n) { return events[n]?.length || e.hasAttribute('on' + n); } - const evts = []; + let evts = []; if (hasEvent('keydown') || hasEvent('keyup') || hasEvent('keypress') || hasEvent('keydown') || hasEvent('input') || e.isContentEditable || ( (tag === 'input' || tag === 'textarea' || role === 'searchbox' || role === 'input') @@ -553,8 +561,13 @@ def js_wrap_function_call(fn: str): ) { evts.push('CLICK'); } - if (hasEvent('scroll') || hasEvent('wheel')|| e.scrollHeight > e.clientHeight || e.scrollWidth > e.clientWidth) { - //evts.push('SCROLL'); + if ( + (hasEvent('scroll') || hasEvent('wheel') || style.overflow === 'auto' || style.overflow === 'scroll' || style.overflowY === 'auto' || style.overflowY === 'scroll') + && (e.scrollHeight > e.clientHeight || e.scrollWidth > e.clientWidth)) { + evts.push('SCROLL'); + } + if (filterTypes && evts.length) { + evts = evts.filter(t => filterTypes.includes(t)); } if (nonInteractives && evts.length === 0) { evts.push('NONE'); @@ -600,7 +613,6 @@ def js_wrap_function_call(fn: str): } while (pointContainer = pointContainer.parentNode); return []; } - return evts; } @@ -641,7 +653,7 @@ def js_wrap_function_call(fn: str): } traverse(document.body, '/html/body'); return results; -})(arguments?.[0], arguments?.[1], arguments?.[2]); +})(arguments?.[0], arguments?.[1], arguments?.[2], arguments?.[3]); """ JS_WAIT_DOM_IDLE = """