PaddlePaddle · yiakwy-xpu-ml-framework-team · Feb 29, 2024 · Feb 29, 2024 · Mar 18, 2024 · tink2123
diff --git a/ppocr/data/simple_dataset.py b/ppocr/data/simple_dataset.py
@@ -57,6 +57,9 @@ def __init__(self, config, mode, logger, seed=None):
         self.ext_op_transform_idx = dataset_config.get("ext_op_transform_idx",
                                                        2)
         self.need_reset = True in [x < 1 for x in ratio_list]
+
+        self.img_cache = {}
+        self.data_cache = {}
 
     def set_epoch_as_seed(self, seed, dataset_config):
         if self.mode == 'train':
@@ -124,18 +127,26 @@ def get_ext_data(self):
             label = substr[1]
             img_path = os.path.join(self.data_dir, file_name)
             data = {'img_path': img_path, 'label': label}
+
             if not os.path.exists(img_path):
                 continue
-            with open(data['img_path'], 'rb') as f:
-                img = f.read()
+            if data['img_path'] in self.img_cache:
+                img = self.img_cache[data['img_path']]
+                # used by imgaug transform
                 data['image'] = img
-            data = transform(data, load_data_ops)
-
+            else:
+                with open(data['img_path'], 'rb') as f:
+                    img = f.read()
+                    data['image'] = img
+                self.img_cache[data['img_path']] = img
+            if data['img_path'] in self.data_cache:
+                data = self.data_cache[data['img_path']]
+            else:
+                data = transform(data, load_data_ops)
+                self.data_cache[data['img_path']] = data
+
             if data is None:
                 continue
-            if 'polys' in data.keys():
-                if data['polys'].shape[1] != 4:
-                    continue
             ext_data.append(data)
         return ext_data
 
@@ -152,9 +163,14 @@ def __getitem__(self, idx):
             data = {'img_path': img_path, 'label': label}
             if not os.path.exists(img_path):
                 raise Exception("{} does not exist!".format(img_path))
-            with open(data['img_path'], 'rb') as f:
-                img = f.read()
+            if data['img_path'] in self.img_cache:
+                img = self.img_cache[data['img_path']]
                 data['image'] = img
+            else:
+                with open(data['img_path'], 'rb') as f:
+                    img = f.read()
+                    data['image'] = img
+                    self.img_cache[data['img_path']] = img
             data['ext_data'] = self.get_ext_data()
             outs = transform(data, self.ops)
         except:

diff --git a/ppocr/modeling/necks/db_fpn.py b/ppocr/modeling/necks/db_fpn.py
@@ -29,7 +29,7 @@
 sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../../..')))
 
 from ppocr.modeling.backbones.det_mobilenet_v3 import SEModule
-
+import numbers
 
 class DSConv(nn.Layer):
     def __init__(self,
@@ -237,6 +237,9 @@ def __init__(self, in_channels, out_channels, shortcut=True, **kwargs):
             self.incl3 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
             self.incl4 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
 
+        if isinstance(in_channels, numbers.Number):
+            in_channels = [in_channels]
+
         for i in range(len(in_channels)):
             self.ins_conv.append(
                 RSELayer(
@@ -306,6 +309,9 @@ def __init__(self, in_channels, out_channels, mode='large', **kwargs):
                 "mode can only be one of ['lite', 'large'], but received {}".
                 format(mode))
 
+        if isinstance(in_channels, numbers.Number):
+            in_channels = [in_channels]
+
         for i in range(len(in_channels)):
             self.ins_conv.append(
                 nn.Conv2D(

diff --git a/ppocr/utils/network.py b/ppocr/utils/network.py
@@ -75,6 +75,9 @@ def maybe_download_params(model_path):
     else:
         url = model_path
     tmp_path = os.path.join(MODELS_DIR, url.split('/')[-1])
+    if os.path.exists(tmp_path) and os.path.isfile(tmp_path):
+        print(f'[network::maybe_download_params] models has been downloaded to {tmp_path}')
+        return tmp_path
     print('download {} to {}'.format(url, tmp_path))
     os.makedirs(MODELS_DIR, exist_ok=True)
     download_with_progressbar(url, tmp_path)

diff --git a/tools/infer/utility.py b/tools/infer/utility.py
@@ -26,6 +26,7 @@
 import random
 from ppocr.utils.logging import get_logger
 
+from shapely.geometry import Polygon
 
 def str2bool(v):
     return v.lower() in ("true", "yes", "t", "y", "1")
@@ -609,8 +610,31 @@ def get_rotate_crop_image(img, points):
     img_crop = img[top:bottom, left:right, :].copy()
     points[:, 0] = points[:, 0] - left
     points[:, 1] = points[:, 1] - top
-    '''
-    assert len(points) == 4, "shape of points must be 4*2"
+    ''' 
+    if not isinstance(points, np.ndarray):
+        raise Exception("polygons must be of shape numpy array!")
+    rank = len(points.shape)
+    if rank != 2:
+        points = points.reshape(-1, 2)
+    # capture bbox of points, the length of points must be > 1
+    assert len(points) > 1 and len(points) % 2 == 0, "[utility::get_rotate_crop_image] shape of points must be greater than 1 and labeled with even points"
+    x_min, y_min = np.min(points, axis=0)
+    x_max, y_max = np.max(points, axis=0)
+
+    points_ = points
+    points = np.array([
+        [x_min, y_min],
+        [x_max, y_min],
+        [x_max, y_max],
+        [x_min, y_max]
+    ], dtype=points_.dtype)
+
+    area = Polygon(points).area
+    assert area > 1, "[utility::get_rotate_crop_image] bbox must has area greater than 1 pixel unit"
+
+    # TODO (yiakwy) : remove this unwanted check, since CWT dataset 
+    # naturally contains points more than 4
+    # assert len(points) == 4, "shape of points must be 4*2"
     img_crop_width = int(
         max(
             np.linalg.norm(points[0] - points[1]),