feat(fix): Fix issue Unable to segment document : CUDA out of memory.…

… Tried to allocate 70.24 GiB #98 Images were not validate that they meet any type constraint
marieai · Dec 21, 2023 · 4e81e85 · 4e81e85
1 parent 6b66537
commit 4e81e85
Show file tree

Hide file tree

Showing 4 changed files with 84 additions and 12 deletions.
diff --git a/im-policy.xml b/im-policy.xml
@@ -59,8 +59,8 @@
   <!-- <policy domain="resource" name="temporary-path" value="/tmp"/> -->
   <policy domain="resource" name="memory" value="32000MiB"/>
   <policy domain="resource" name="map" value="8GiB"/>
-  <policy domain="resource" name="width" value="16KP"/>
-  <policy domain="resource" name="height" value="16KP"/>
+  <policy domain="resource" name="width" value="20KP"/>
+  <policy domain="resource" name="height" value="20KP"/>
   <!-- <policy domain="resource" name="list-length" value="128"/> -->
   <policy domain="resource" name="area" value="8GiB"/>
   <policy domain="resource" name="disk" value="8GiB"/>

diff --git a/marie/executor/classifier/document_classifier_executor.py b/marie/executor/classifier/document_classifier_executor.py
@@ -20,7 +20,7 @@
 from marie.ocr import CoordinateFormat
 from marie.pipe.classification_pipeline import ClassificationPipeline
 from marie.utils.docs import docs_from_asset, frames_from_docs
-from marie.utils.image_utils import hash_frames_fast
+from marie.utils.image_utils import ensure_max_page_size, hash_frames_fast
 from marie.utils.network import get_ip_address
 
 # TODO : Refactor this to as it is a duplicate of the one in text_extraction_executor.py
@@ -129,12 +129,16 @@ def classify(self, docs: DocList[AssetKeyDoc], parameters: dict, *args, **kwargs
         if len(docs) > 1:
             return {"error": "expected single document"}
 
-        doc = docs[0]
         # load documents from specified document asset key
+        doc = docs[0]
         docs = docs_from_asset(doc.asset_key, doc.pages)
 
-        frames = frames_from_docs(docs)
-        frame_len = len(frames)
+        src_frames = frames_from_docs(docs)
+        changed, frames = ensure_max_page_size(src_frames)
+        if changed:
+            self.logger.warning(f"Page size of frames was changed ")
+            for i, (s, f) in enumerate(zip(src_frames, frames)):
+                self.logger.warning(f"Frame[{i}] changed : {s.shape} -> {f.shape}")
 
         if parameters is None or "job_id" not in parameters:
             self.logger.warning(f"Job ID is not present in parameters")
@@ -193,7 +197,7 @@ def classify(self, docs: DocList[AssetKeyDoc], parameters: dict, *args, **kwargs
 
             self.logger.debug(
                 "ref_id, ref_type frames , regions , pms_mode, coordinate_format,"
-                f" checksum: {ref_id}, {ref_type},  {frame_len}, {len(regions)}, {pms_mode},"
+                f" checksum: {ref_id}, {ref_type},  {len(frames)}, {len(regions)}, {pms_mode},"
                 f" {coordinate_format}"
             )
             payload_kwargs = {}

diff --git a/marie/executor/text/text_extraction_executor.py b/marie/executor/text/text_extraction_executor.py
@@ -22,7 +22,7 @@
 from marie.ocr import CoordinateFormat
 from marie.pipe import ExtractPipeline
 from marie.utils.docs import docs_from_asset, frames_from_docs
-from marie.utils.image_utils import hash_frames_fast
+from marie.utils.image_utils import ensure_max_page_size, hash_frames_fast
 from marie.utils.network import get_ip_address
 from marie.utils.types import strtobool
 
@@ -132,12 +132,16 @@ def extract(self, docs: DocList[AssetKeyDoc], parameters: dict, *args, **kwargs)
         if len(docs) > 1:
             return {"error": "expected single document"}
 
-        doc = docs[0]
         # load documents from specified document asset key
+        doc = docs[0]
         docs = docs_from_asset(doc.asset_key, doc.pages)
 
-        frames = frames_from_docs(docs)
-        frame_len = len(frames)
+        src_frames = frames_from_docs(docs)
+        changed, frames = ensure_max_page_size(src_frames)
+        if changed:
+            self.logger.warning(f"Page size of frames was changed ")
+            for i, (s, f) in enumerate(zip(src_frames, frames)):
+                self.logger.warning(f"Frame[{i}] changed : {s.shape} -> {f.shape}")
 
         if parameters is None or "job_id" not in parameters:
             self.logger.warning(f"Job ID is not present in parameters")
@@ -197,7 +201,7 @@ def extract(self, docs: DocList[AssetKeyDoc], parameters: dict, *args, **kwargs)
 
             self.logger.debug(
                 "ref_id, ref_type frames , regions , pms_mode, coordinate_format,"
-                f" checksum: {ref_id}, {ref_type},  {frame_len}, {len(regions)}, {pms_mode},"
+                f" checksum: {ref_id}, {ref_type},  {len(frames)}, {len(regions)}, {pms_mode},"
                 f" {coordinate_format}"
             )
             payload_kwargs = {}

diff --git a/marie/utils/image_utils.py b/marie/utils/image_utils.py
@@ -245,3 +245,67 @@ def crop_to_content(frame: np.ndarray, content_aware=True) -> np.ndarray:
 
     dt = time.time() - start
     return cropped
+
+
+def ensure_max_page_size(
+    frames: List[np.ndarray], max_page_size: Tuple[int, int] = (3300, 2550)
+) -> Tuple[bool, List[np.ndarray]]:
+    """
+    Ensure frames do not exceed the max page size. Resize if necessary, considering the orientation.
+
+    EXAMPLE USAGE
+    .. code-block:: python
+        frames = [cv2.imread(image_path) for image_path in list_of_image_paths]
+        changed, resized_frames = ensure_max_page_size(frames)
+
+    :param frames: List of image frames.
+    :param max_page_size: Max page size (width, height) in pixels for portrait orientation.
+    :return: (changed, frames) - 'changed' indicates if any resizing was done; 'frames' are the possibly resized frames.
+    """
+
+    max_width_portrait, max_height_portrait = max_page_size
+    resized_frames = []
+    changed = False
+
+    for frame in frames:
+        height, width = frame.shape[:2]
+
+        # Determine if the frame is portrait or landscape
+        if width > height:
+            # Landscape orientation: swap max width and height
+            max_width, max_height = max_height_portrait, max_width_portrait
+        else:
+            # Portrait orientation
+            max_width, max_height = max_width_portrait, max_height_portrait
+
+        # Check if the frame exceeds max dimensions
+        if width > max_width or height > max_height:
+            changed = True
+            # Calculate aspect ratio
+            aspect_ratio = width / height
+
+            # Determine new dimensions
+            if width > height:  # Landscape orientation
+                new_width = min(width, max_width)
+                new_height = int(new_width / aspect_ratio)
+                # Adjust height if it exceeds max height
+                if new_height > max_height:
+                    new_height = max_height
+                    new_width = int(new_height * aspect_ratio)
+            else:  # Portrait orientation
+                new_height = min(height, max_height)
+                new_width = int(new_height * aspect_ratio)
+                # Adjust width if it exceeds max width
+                if new_width > max_width:
+                    new_width = max_width
+                    new_height = int(new_width / aspect_ratio)
+
+            # Resize the frame
+            resized_frame = cv2.resize(
+                frame, (new_width, new_height), interpolation=cv2.INTER_AREA
+            )
+            resized_frames.append(resized_frame)
+        else:
+            resized_frames.append(frame)
+
+    return changed, resized_frames