Merge pull request #10 from RapidAI/add_slanet_plus_model

Add slanet plus model
RapidAI · Oct 17, 2024 · 09dfa28 · 09dfa28
2 parents 4196924 + 70cbbbc
commit 09dfa28
Show file tree

Hide file tree

Showing 17 changed files with 24 additions and 1,099 deletions.
diff --git a/.github/workflows/publish_whl.yml b/.github/workflows/publish_whl.yml
@@ -56,7 +56,7 @@ jobs:
           ZIP_NAME=${RESOURCES_URL##*/}
           DIR_NAME=${ZIP_NAME%.*}
           unzip $ZIP_NAME
-          mv $DIR_NAME/en_ppstructure_mobile_v2_SLANet.onnx rapid_table/models/
+          mv $DIR_NAME/slanet-plus.onnx rapid_table/models/
           python setup.py bdist_wheel ${{ github.event.head_commit.message }}
 
       - name: Publish distribution 📦 to PyPI

diff --git a/README.md b/README.md
@@ -19,13 +19,13 @@ RapidTable库是专门用来文档类图像的表格结构还原，结合RapidOC
 
 目前支持两种类别的表格识别模型：中文和英文表格识别模型，具体可参见下面表格：
 
-slanet_plus是paddlex内置的SLANet升级版模型，准确率有大幅提升，但paddle2onnx暂时不支持转换
+slanet_plus是paddlex内置的SLANet升级版模型，准确率有大幅提升
 
   |      模型类型      |                  模型名称                  | 模型大小 |
   |:--------------:|:--------------------------------------:| :------: |
   |       英文       | `en_ppstructure_mobile_v2_SLANet.onnx` |   7.3M   |
   |       中文       | `ch_ppstructure_mobile_v2_SLANet.onnx` |   7.4M   |
-  | slanet_plus 中文 |          `inference.pdmodel`           |   7.4M   |
+  | slanet_plus 中文 |          `slanet-plus.onnx`           |   6.8M   |
 
 
 模型来源：[PaddleOCR 表格识别](https://github.com/PaddlePaddle/PaddleOCR/blob/133d67f27dc8a241d6b2e30a9f047a0fb75bebbe/ppstructure/table/README_ch.md)
@@ -45,38 +45,33 @@ RapidTable是整理自PP-Structure中表格识别部分而来。由于PP-Structu
 
 ### 安装
 
-由于模型较小，预先将英文表格识别模型(`en_ppstructure_mobile_v2_SLANet.onnx`)打包进了whl包内，如果做英文表格识别，可直接安装使用。
+由于模型较小，预先将slanet-plus表格识别模型(`slanet-plus.onnx`)打包进了whl包内。
 
 > ⚠️注意：`rapid_table>=v0.1.0`之后，不再将`rapidocr_onnxruntime`依赖强制打包到`rapid_table`中。使用前，需要自行安装`rapidocr_onnxruntime`包。
 
 ```bash
 pip install rapidocr_onnxruntime
 pip install rapid_table
-# 安装会引入paddlepaddle cpu 3.0.0b0
-#pip install slanet_plus_table
 ```
 
 ### 使用方式
 
 #### python脚本运行
 
-RapidTable类提供model_path参数，可以自行指定上述2个模型，默认是`en_ppstructure_mobile_v2_SLANet.onnx`。举例如下：
+RapidTable类提供model_path参数，可以自行指定上述2个模型，默认是`slanet-plus.onnx`。举例如下：
 
 ```python
-table_engine = RapidTable(model_path='ch_ppstructure_mobile_v2_SLANet.onnx')
-#table_engine = SLANetPlus()
+table_engine = RapidTable()
 ```
 
 完整示例：
 
 ```python
 from pathlib import Path
 
-from rapid_table import RapidTable
 from rapid_table import RapidTable, VisTable
 
 table_engine = RapidTable()
-#table_engine = SLANetPlus()
 ocr_engine = RapidOCR()
 viser = VisTable()
 

diff --git a/rapid_table/main.py b/rapid_table/main.py
@@ -19,12 +19,13 @@
 
 
 class RapidTable:
-    def __init__(self, model_path: Optional[str] = None):
+    def __init__(self, model_path: Optional[str] = None, model_type: str = None):
         if model_path is None:
             model_path = str(
-                root_dir / "models" / "en_ppstructure_mobile_v2_SLANet.onnx"
+                root_dir / "models" / "slanet-plus.onnx"
             )
-
+            model_type = "slanet-plus"
+        self.model_type = model_type
         self.load_img = LoadImage()
         self.table_structure = TableStructurer(model_path)
         self.table_matcher = TableMatch()
@@ -54,6 +55,9 @@ def __call__(
         dt_boxes, rec_res = self.get_boxes_recs(ocr_result, h, w)
 
         pred_structures, pred_bboxes, _ = self.table_structure(copy.deepcopy(img))
+        # 适配slanet-plus模型输出的box缩放还原
+        if self.model_type == "slanet-plus":
+            pred_bboxes = self.adapt_slanet_plus(img, pred_bboxes)
         pred_html = self.table_matcher(pred_structures, pred_bboxes, dt_boxes, rec_res)
 
         elapse = time.time() - s
@@ -76,7 +80,15 @@ def get_boxes_recs(
             r_boxes.append(box)
         dt_boxes = np.array(r_boxes)
         return dt_boxes, rec_res
-
+    def adapt_slanet_plus(self, img: np.ndarray, pred_bboxes: np.ndarray) -> np.ndarray:
+        h, w = img.shape[:2]
+        resized = 488
+        ratio = min(resized / h, resized / w)
+        w_ratio = resized / (w * ratio)
+        h_ratio = resized / (h * ratio)
+        pred_bboxes[:, 0::2] *= w_ratio
+        pred_bboxes[:, 1::2] *= h_ratio
+        return pred_bboxes
 
 def main():
     parser = argparse.ArgumentParser()

diff --git a/rapid_table/utils.py b/rapid_table/utils.py
@@ -114,7 +114,7 @@ def __call__(
         return drawed_img
 
     def insert_border_style(self, table_html_str: str):
-        style_res = """<style>td {border-left: 1px solid;border-bottom:1px solid;}
+        style_res = """<meta charset="UTF-8"><style>td {border-left: 1px solid;border-bottom:1px solid;}
                     table, th {border-top:1px solid;font-size: 10px;
                     border-collapse: collapse;border-right: 1px solid;}
                     </style>"""

diff --git a/setup.py b/setup.py
@@ -53,7 +53,7 @@ def get_readme():
         f"{MODULE_NAME}.table_matcher",
         f"{MODULE_NAME}.table_structure",
     ],
-    package_data={"": ["en_ppstructure_mobile_v2_SLANet.onnx"]},
+    package_data={"": ["slanet-plus.onnx"]},
     keywords=["ppstructure,table,rapidocr,rapid_table"],
     classifiers=[
         "Programming Language :: Python :: 3.6",

diff --git a/slanet_plus_table/__init__.py b/slanet_plus_table/__init__.py
diff --git a/slanet_plus_table/main.py b/slanet_plus_table/main.py
diff --git a/slanet_plus_table/models/inference.yml b/slanet_plus_table/models/inference.yml
diff --git a/slanet_plus_table/requirements.txt b/slanet_plus_table/requirements.txt
diff --git a/slanet_plus_table/setup.py b/slanet_plus_table/setup.py