feat: OCR for objects without text and desc

#372 it works for feed job - for windows you have also to install manually the exe of tesseract: https://github.com/UB-Mannheim/tesseract/wiki
GramAddict · Feb 25, 2024 · 9cd8261 · 9cd8261
1 parent a66ecd1
commit 9cd8261
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 8 deletions.
diff --git a/GramAddict/core/device_facade.py b/GramAddict/core/device_facade.py
@@ -185,8 +185,11 @@ def stop_screenrecord(self, crash=True):
         if self.deviceV2.screenrecord.stop(crash=crash):
             logger.warning("Screen recorder has been stopped successfully!")
 
-    def screenshot(self, path):
-        self.deviceV2.screenshot(path)
+    def screenshot(self, path=None):
+        if path is None:
+            return self.deviceV2.screenshot()
+        else:
+            self.deviceV2.screenshot(path)
 
     def dump_hierarchy(self, path):
         xml_dump = self.deviceV2.dump_hierarchy()

diff --git a/GramAddict/core/views.py b/GramAddict/core/views.py
@@ -988,24 +988,57 @@ def _check_if_ad_or_hashtag(
     ) -> Tuple[bool, bool, Optional[str]]:
         is_hashtag = False
         is_ad = False
-        real_username = None
         logger.debug("Checking if it's an AD or an hashtag..")
         ad_like_obj = post_owner_obj.sibling(
             resourceId=ResourceID.SECONDARY_LABEL,
         )
-        if post_owner_obj.get_text().startswith("#"):
-            is_hashtag = True
-            logger.debug("Looks like an hashtag, skip.")
+
+        owner_name = post_owner_obj.get_text() or post_owner_obj.get_desc()
+        if not owner_name:
+            logger.info("Can't find the owner name, need to use OCR.")
+            try:
+                owner_name = self.get_text_from_screen(post_owner_obj)
+            except ImportError:
+                logger.error(
+                    "You need to install pytesseract in order to use OCR feature."
+                )
+            if owner_name.startswith("#"):
+                is_hashtag = True
+                logger.debug("Looks like an hashtag, skip.")
         if ad_like_obj.exists():
             sponsored_txt = "Sponsored"
             ad_like_txt = ad_like_obj.get_text() or ad_like_obj.get_desc()
             if ad_like_txt.casefold() == sponsored_txt.casefold():
                 logger.debug("Looks like an AD, skip.")
                 is_ad = True
             elif is_hashtag:
-                real_username = ad_like_obj.get_text().split("•")[0].strip()
+                owner_name = owner_name.split("•")[0].strip()
 
-        return is_ad, is_hashtag, real_username
+        return is_ad, is_hashtag, owner_name
+
+    def get_text_from_screen(self, obj) -> Optional[str]:
+        import pytesseract as pt
+        import platform
+
+        if platform.system() == "Windows":
+            pt.pytesseract.tesseract_cmd = (
+                r"C:\Program Files\Tesseract-OCR\tesseract.exe"
+            )
+
+        screenshot = self.device.screenshot()
+        bounds = obj.ui_info().get("visibleBounds", None)
+        if bounds is None:
+            logger.info("Can't find the bounds of the object.")
+            return None
+        screenshot_cropped = screenshot.crop(
+            [
+                bounds.get("left"),
+                bounds.get("top"),
+                bounds.get("right"),
+                bounds.get("bottom"),
+            ]
+        )
+        return pt.image_to_string(screenshot_cropped).split(" ")[0].rstrip()
 
 
 class LanguageView: