Skip to content

Commit

Permalink
Improve OCR
Browse files Browse the repository at this point in the history
  • Loading branch information
junalmeida committed Jan 4, 2023
1 parent b0c8b31 commit 0878e87
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 7 deletions.
3 changes: 2 additions & 1 deletion meterparser/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
### [1.0.2.4]
### [1.0.2.5]

- Reduce ffmpeg logs, reduce amount of time to seek for a snapshot
- Improve ocr.space retry logic, add engine 5 as a fallback

### [1.0.2.3]

Expand Down
2 changes: 1 addition & 1 deletion meterparser/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: Meter Parser
version: 1.0.2.4
version: 1.0.2.5
slug: meter-parser
description: Read meter needles and numbers from a camera snapshot.
url: https://github.com/junalmeida/homeassistant-addons/tree/main/meterparser
Expand Down
3 changes: 3 additions & 0 deletions meterparser/src/app/camera.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,9 @@ def run(self):
self._logger.info("Close reading - current=%s, previous=%s, limit=%s, not updating." % (
reading, self._current_reading, upper_limit))
self._error_count = 0
elif reading == 0:
# not a valid OCR result already logged
self._error_count += 1
else:
self._logger.error("Invalid reading - current=%s, previous=%s, limit=%s - Value could be too high or less than previous reading." % (
reading, self._current_reading, upper_limit))
Expand Down
35 changes: 33 additions & 2 deletions meterparser/src/app/parsers/image_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ class Marker:
def prepare_image(image, entity_id:str, send_image, debug_path: str, first_aruco: int, second_aruco: int):
debugfile = time.strftime(entity_id + "-%Y-%m-%d_%H-%M-%S")

image = automatic_brightness_and_contrast(image)[0]
image = cv2.bilateralFilter(image,9,75,75)
# image = automatic_brightness_and_contrast(image)[0]
# image = cv2.bilateralFilter(image,9,75,75)
image_to_aruco = image.copy()

if send_image is not None:
Expand Down Expand Up @@ -204,3 +204,34 @@ def automatic_brightness_and_contrast(image, clip_hist_percent=1):

auto_result = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
return (auto_result, alpha, beta)

def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]

# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image

# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)

# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))

# resize the image
resized = cv2.resize(image, dim, interpolation = inter)

# return the resized image
return resized
18 changes: 15 additions & 3 deletions meterparser/src/app/parsers/parser_digits_ocr_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import numpy as np
import regex as re

from .image_utils import image_resize

_LOGGER = logging.getLogger(__name__)


Expand All @@ -42,16 +44,26 @@ def parse_digits_ocr_space(
cv2.imwrite(os.path.join(debug_path, "%s-in.jpg" % debugfile), image)

reading = ocr_space(image, digits_count,
decimals_count, ocr_key, entity_id)
decimals_count, ocr_key, entity_id, engine = 2) # fastest
if reading == 0.0:
reading = ocr_space(image, digits_count,
decimals_count, ocr_key, entity_id, engine = 5) # slower but better with bad images
return reading


URL_API = "https://api.ocr.space/parse/image"


def ocr_space(frame, digits_count, decimals_count, ocr_key, entity_id):
def ocr_space(frame, digits_count: int, decimals_count: int, ocr_key: str, entity_id : str, engine: int):
payload = {"apikey": ocr_key, "language": "eng",
"scale": "true", "OCREngine": "2", "filetype": "PNG"}
"scale": "true", "OCREngine": str(engine), "filetype": "PNG"}

(fh, fw) = frame.shape[:2]

if (fh < 32):
frame = image_resize(frame, height = 32)
elif (fw < 32):
frame = image_resize(frame, width = 32)

_LOGGER.debug("OCR image: %s, payload=%s" % (URL_API, payload))
imencoded = cv2.imencode(".png", frame)[1]
Expand Down

0 comments on commit 0878e87

Please sign in to comment.