Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix dataset problems release 2.7 #11646

Open
wants to merge 3 commits into
base: release/2.7
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 25 additions & 9 deletions ppocr/data/simple_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def __init__(self, config, mode, logger, seed=None):
self.ext_op_transform_idx = dataset_config.get("ext_op_transform_idx",
2)
self.need_reset = True in [x < 1 for x in ratio_list]

self.img_cache = {}
self.data_cache = {}

def set_epoch_as_seed(self, seed, dataset_config):
if self.mode == 'train':
Expand Down Expand Up @@ -124,18 +127,26 @@ def get_ext_data(self):
label = substr[1]
img_path = os.path.join(self.data_dir, file_name)
data = {'img_path': img_path, 'label': label}

if not os.path.exists(img_path):
continue
with open(data['img_path'], 'rb') as f:
img = f.read()
if data['img_path'] in self.img_cache:
img = self.img_cache[data['img_path']]
# used by imgaug transform
data['image'] = img
data = transform(data, load_data_ops)

else:
with open(data['img_path'], 'rb') as f:
img = f.read()
data['image'] = img
self.img_cache[data['img_path']] = img
if data['img_path'] in self.data_cache:
data = self.data_cache[data['img_path']]
else:
data = transform(data, load_data_ops)
self.data_cache[data['img_path']] = data

if data is None:
continue
if 'polys' in data.keys():
if data['polys'].shape[1] != 4:
continue
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to handle this situation

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, this is where we need to discuss.

This condition leaded to endless loop and discarded every annotation line with points more than 4 in detection task.

I have checked annotation files we generated and official dataset from ctw1500 (800M) and ICDAR2015 (80M) datasets which are used for finetuning and evaluation: they both have annotation lines with more than 4 points.

The root cause is utility file can only process polygon with fixed points:

assert len(points) == 4, "shape of points must be 4*2"

Hence we deleted the unnecessary constrain (4 points bbox annotation is just a simple case of general closed polyline) and training goes smoothly again.

ext_data.append(data)
return ext_data

Expand All @@ -152,9 +163,14 @@ def __getitem__(self, idx):
data = {'img_path': img_path, 'label': label}
if not os.path.exists(img_path):
raise Exception("{} does not exist!".format(img_path))
with open(data['img_path'], 'rb') as f:
img = f.read()
if data['img_path'] in self.img_cache:
img = self.img_cache[data['img_path']]
data['image'] = img
else:
with open(data['img_path'], 'rb') as f:
img = f.read()
data['image'] = img
self.img_cache[data['img_path']] = img
data['ext_data'] = self.get_ext_data()
outs = transform(data, self.ops)
except:
Expand Down
8 changes: 7 additions & 1 deletion ppocr/modeling/necks/db_fpn.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../../..')))

from ppocr.modeling.backbones.det_mobilenet_v3 import SEModule

import numbers

class DSConv(nn.Layer):
def __init__(self,
Expand Down Expand Up @@ -237,6 +237,9 @@ def __init__(self, in_channels, out_channels, shortcut=True, **kwargs):
self.incl3 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
self.incl4 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)

if isinstance(in_channels, numbers.Number):
in_channels = [in_channels]

for i in range(len(in_channels)):
self.ins_conv.append(
RSELayer(
Expand Down Expand Up @@ -306,6 +309,9 @@ def __init__(self, in_channels, out_channels, mode='large', **kwargs):
"mode can only be one of ['lite', 'large'], but received {}".
format(mode))

if isinstance(in_channels, numbers.Number):
in_channels = [in_channels]

for i in range(len(in_channels)):
self.ins_conv.append(
nn.Conv2D(
Expand Down
3 changes: 3 additions & 0 deletions ppocr/utils/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ def maybe_download_params(model_path):
else:
url = model_path
tmp_path = os.path.join(MODELS_DIR, url.split('/')[-1])
if os.path.exists(tmp_path) and os.path.isfile(tmp_path):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can be deleted

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi models from Model_Path was downloaded to tmp_path but it is repeated to do so each time when we launch a task in the same machine, which takes about 3-5 minutes. This is unnecessary.

A better way I think is, by providing MD5 sum in model configuration file, we can check if the files in tmp_path is correct.

But any change to the config in this project requires work checking all the other configuration files. That will be too cumbersome.

print(f'[network::maybe_download_params] models has been downloaded to {tmp_path}')
return tmp_path
print('download {} to {}'.format(url, tmp_path))
os.makedirs(MODELS_DIR, exist_ok=True)
download_with_progressbar(url, tmp_path)
Expand Down
28 changes: 26 additions & 2 deletions tools/infer/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import random
from ppocr.utils.logging import get_logger

from shapely.geometry import Polygon

def str2bool(v):
return v.lower() in ("true", "yes", "t", "y", "1")
Expand Down Expand Up @@ -609,8 +610,31 @@ def get_rotate_crop_image(img, points):
img_crop = img[top:bottom, left:right, :].copy()
points[:, 0] = points[:, 0] - left
points[:, 1] = points[:, 1] - top
'''
assert len(points) == 4, "shape of points must be 4*2"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Situations where more data formats need to be processed in addition to CTW

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. But both in CTW, ICDAR2015 and our own annotation files we don't assume bbox consists of fixed number number of points to form a valid bbox polyline.

So we just treat it as general polyline: 2 points (upper left, bottom right), 3 points (trianguulars), and closed polylines

'''
if not isinstance(points, np.ndarray):
raise Exception("polygons must be of shape numpy array!")
rank = len(points.shape)
if rank != 2:
points = points.reshape(-1, 2)
# capture bbox of points, the length of points must be > 1
assert len(points) > 1 and len(points) % 2 == 0, "[utility::get_rotate_crop_image] shape of points must be greater than 1 and labeled with even points"
x_min, y_min = np.min(points, axis=0)
x_max, y_max = np.max(points, axis=0)

points_ = points
points = np.array([
[x_min, y_min],
[x_max, y_min],
[x_max, y_max],
[x_min, y_max]
], dtype=points_.dtype)

area = Polygon(points).area
assert area > 1, "[utility::get_rotate_crop_image] bbox must has area greater than 1 pixel unit"

# TODO (yiakwy) : remove this unwanted check, since CWT dataset
# naturally contains points more than 4
# assert len(points) == 4, "shape of points must be 4*2"
img_crop_width = int(
max(
np.linalg.norm(points[0] - points[1]),
Expand Down