Improve OCR

junalmeida · Jan 4, 2023 · 0878e87 · 0878e87
1 parent b0c8b31
commit 0878e87
Show file tree

Hide file tree

Showing 5 changed files with 54 additions and 7 deletions.
diff --git a/meterparser/CHANGELOG.md b/meterparser/CHANGELOG.md
@@ -1,6 +1,7 @@
-### [1.0.2.4]
+### [1.0.2.5]
 
 - Reduce ffmpeg logs, reduce amount of time to seek for a snapshot
+- Improve ocr.space retry logic, add engine 5 as a fallback
 
 ### [1.0.2.3]
 

diff --git a/meterparser/config.yaml b/meterparser/config.yaml
@@ -1,5 +1,5 @@
 name: Meter Parser
-version: 1.0.2.4
+version: 1.0.2.5
 slug: meter-parser
 description: Read meter needles and numbers from a camera snapshot.
 url: https://github.com/junalmeida/homeassistant-addons/tree/main/meterparser

diff --git a/meterparser/src/app/camera.py b/meterparser/src/app/camera.py
@@ -168,6 +168,9 @@ def run(self):
                         self._logger.info("Close reading - current=%s, previous=%s, limit=%s, not updating." % (
                             reading, self._current_reading, upper_limit))
                         self._error_count = 0
+                    elif reading == 0:
+                        # not a valid OCR result already logged
+                        self._error_count += 1
                     else:
                         self._logger.error("Invalid reading - current=%s, previous=%s, limit=%s - Value could be too high or less than previous reading." % (
                             reading, self._current_reading, upper_limit))

diff --git a/meterparser/src/app/parsers/image_utils.py b/meterparser/src/app/parsers/image_utils.py
@@ -23,8 +23,8 @@ class Marker:
 def prepare_image(image, entity_id:str, send_image, debug_path: str, first_aruco: int, second_aruco: int):
     debugfile = time.strftime(entity_id + "-%Y-%m-%d_%H-%M-%S")
 
-    image = automatic_brightness_and_contrast(image)[0]
-    image = cv2.bilateralFilter(image,9,75,75)
+    # image = automatic_brightness_and_contrast(image)[0]
+    # image = cv2.bilateralFilter(image,9,75,75)
     image_to_aruco = image.copy()
 
     if send_image is not None:
@@ -204,3 +204,34 @@ def automatic_brightness_and_contrast(image, clip_hist_percent=1):
 
     auto_result = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
     return (auto_result, alpha, beta)
+
+def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
+    # initialize the dimensions of the image to be resized and
+    # grab the image size
+    dim = None
+    (h, w) = image.shape[:2]
+
+    # if both the width and height are None, then return the
+    # original image
+    if width is None and height is None:
+        return image
+
+    # check to see if the width is None
+    if width is None:
+        # calculate the ratio of the height and construct the
+        # dimensions
+        r = height / float(h)
+        dim = (int(w * r), height)
+
+    # otherwise, the height is None
+    else:
+        # calculate the ratio of the width and construct the
+        # dimensions
+        r = width / float(w)
+        dim = (width, int(h * r))
+
+    # resize the image
+    resized = cv2.resize(image, dim, interpolation = inter)
+
+    # return the resized image
+    return resized
diff --git a/meterparser/src/app/parsers/parser_digits_ocr_space.py b/meterparser/src/app/parsers/parser_digits_ocr_space.py
@@ -22,6 +22,8 @@
 import numpy as np
 import regex as re
 
+from .image_utils import image_resize
+
 _LOGGER = logging.getLogger(__name__)
 
 
@@ -42,16 +44,26 @@ def parse_digits_ocr_space(
         cv2.imwrite(os.path.join(debug_path, "%s-in.jpg" % debugfile), image)
 
     reading = ocr_space(image, digits_count,
-                        decimals_count, ocr_key, entity_id)
+                        decimals_count, ocr_key, entity_id, engine = 2) # fastest
+    if reading == 0.0:
+        reading = ocr_space(image, digits_count,
+                            decimals_count, ocr_key, entity_id, engine = 5) # slower but better with bad images
     return reading
 
 
 URL_API = "https://api.ocr.space/parse/image"
 
 
-def ocr_space(frame, digits_count, decimals_count, ocr_key, entity_id):
+def ocr_space(frame, digits_count: int, decimals_count: int, ocr_key: str, entity_id : str, engine: int):
     payload = {"apikey": ocr_key, "language": "eng",
-               "scale": "true", "OCREngine": "2", "filetype": "PNG"}
+               "scale": "true", "OCREngine": str(engine), "filetype": "PNG"}
+
+    (fh, fw) = frame.shape[:2]
+
+    if (fh < 32):
+        frame = image_resize(frame, height = 32)
+    elif (fw < 32):
+        frame = image_resize(frame, width = 32)
 
     _LOGGER.debug("OCR image: %s, payload=%s" % (URL_API, payload))
     imencoded = cv2.imencode(".png", frame)[1]