瀏覽代碼

add pipelines of single modual, sseal recogniton and table recognition. (#2650)

* add the new architecture of pipelines

* add the new architecture of pipelines

* add explanatory note

* add explanatory note

* fix some modules name

* add pipelines of single modual, sseal recogniton and table recognition.
dyning 11 月之前
父節點
當前提交
4b9b74797e

+ 2 - 2
api_examples/pipelines/test_doc_preprocessor.py

@@ -16,8 +16,8 @@ from paddlex import create_pipeline
 
 pipeline = create_pipeline(pipeline="doc_preprocessor")
 
-test_img_path = "./test_demo_imgs/img_rot180_demo.jpg"
-# test_img_path = "./test_demo_imgs/doc_distort_test.jpg"
+test_img_path = "./test_samples/img_rot180_demo.jpg"
+# test_img_path = "./test_samples/doc_distort_test.jpg"
 
 output = pipeline.predict(
     test_img_path, use_doc_orientation_classify=True, use_doc_unwarping=True

+ 24 - 0
api_examples/pipelines/test_image_classification.py

@@ -0,0 +1,24 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlex import create_pipeline
+
+pipeline = create_pipeline(pipeline="image_classification")
+
+output = pipeline.predict("./test_samples/general_image_classification_001.jpg")
+for res in output:
+    print(res)
+    res.print()  ## 打印预测的结构化输出
+    res.save_to_img("./output/")  ## 保存结果可视化图像
+    res.save_to_json("./output/")  ## 保存预测的结构化输出

+ 2 - 6
api_examples/pipelines/test_layout_parsing.py

@@ -17,7 +17,7 @@ from paddlex import create_pipeline
 pipeline = create_pipeline(pipeline="layout_parsing")
 
 output = pipeline.predict(
-    "./test_demo_imgs/test_layout_parsing.jpg",
+    "./test_samples/test_layout_parsing.jpg",
     use_doc_orientation_classify=True,
     use_doc_unwarping=True,
     use_common_ocr=True,
@@ -25,10 +25,6 @@ output = pipeline.predict(
     use_table_recognition=True,
 )
 
-# output = pipeline("./test_demo_imgs/demo_paper.png")
-# output = pipeline("./test_demo_imgs/table_recognition.jpg")
-# output = pipeline.predict("./test_demo_imgs/seal_text_det.png")
-# output = pipeline.predict("./test_demo_imgs/img_rot180_demo.jpg")
 for res in output:
-    # print(res)
+    print(res)
     res.save_results("./output")

+ 2 - 2
api_examples/pipelines/test_ocr.py

@@ -16,9 +16,9 @@ from paddlex import create_pipeline
 
 pipeline = create_pipeline(pipeline="OCR")
 
-# output = pipeline.predict("./test_demo_imgs/general_ocr_002.png")
+# output = pipeline.predict("./test_samples/general_ocr_002.png")
 
-output = pipeline.predict("./test_demo_imgs/seal_text_det.png")
+output = pipeline.predict("./test_samples/seal_text_det.png")
 for res in output:
     print(res)
     res.save_to_img("./output")

+ 1 - 1
api_examples/pipelines/test_pp_chatocrv3.py

@@ -16,7 +16,7 @@ from paddlex import create_pipeline
 
 pipeline = create_pipeline(pipeline="PP-ChatOCRv3-doc")
 
-img_path = "./test_demo_imgs/vehicle_certificate-1.png"
+img_path = "./test_samples/vehicle_certificate-1.png"
 key_list = ["驾驶室准乘人数"]
 
 

+ 21 - 0
api_examples/pipelines/test_seal_recognition.py

@@ -0,0 +1,21 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlex import create_pipeline
+
+pipeline = create_pipeline(pipeline="seal_recognition")
+output = pipeline.predict("./test_samples/seal_text_det.png")
+for res in output:
+    print(res)
+    res.save_results("./output")

+ 2 - 4
api_examples/pipelines/test_table_recognition.py

@@ -16,9 +16,7 @@ from paddlex import create_pipeline
 
 pipeline = create_pipeline(pipeline="table_recognition")
 
-output = pipeline("./test_imgs/table_recognition.jpg")
+output = pipeline("./test_samples/table_recognition.jpg")
 for res in output:
     print(res)
-    res.save_to_img("./output/")  ## 保存img格式结果
-    res.save_to_xlsx("./output/")  ## 保存表格格式结果
-    res.save_to_html("./output/")  ## 保存html结果
+    res.save_results("./output/")

+ 0 - 2
paddlex/configs/pipelines/OCR.yaml

@@ -5,7 +5,6 @@ pipeline_name: OCR
 ####### Config for Common OCR
 ##############################################
 
-input: https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_001.png
 text_type: general
 
 SubModules:
@@ -24,7 +23,6 @@ SubModules:
 ####### Config for Seal OCR
 ##############################################
 
-# input: https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/seal_text_det.png
 # text_type: seal
 
 # SubModules:

+ 0 - 1
paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml

@@ -1,6 +1,5 @@
 
 pipeline_name: PP-ChatOCRv3-doc
-input: https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/demo_paper.png
 
 SubModules:
   LLM_Chat:

+ 1 - 2
paddlex/configs/pipelines/doc_preprocessor.yaml

@@ -1,7 +1,6 @@
 
 pipeline_name: doc_preprocessor
-input: https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/img_rot180_demo.jpg
-#input: https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/doc_test.jpg
+
 use_doc_orientation_classify: True
 use_doc_unwarping: True
 

+ 9 - 0
paddlex/configs/pipelines/image_classification.yaml

@@ -0,0 +1,9 @@
+
+pipeline_name: image_classification
+
+SubModules:
+  ImageClassification:
+    module_name: image_classification
+    model_name: PP-LCNet_x0_5
+    model_dir: null
+    batch_size: 4    

+ 1 - 1
paddlex/configs/pipelines/layout_parsing.yaml

@@ -1,6 +1,6 @@
 
 pipeline_name: layout_parsing
-input: https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/demo_paper.png
+
 use_doc_preprocessor: True
 use_general_ocr: True
 use_seal_recognition: True

+ 42 - 0
paddlex/configs/pipelines/seal_recognition.yaml

@@ -0,0 +1,42 @@
+
+pipeline_name: seal_recognition
+
+use_doc_preprocessor: True
+
+SubModules:
+  LayoutDetection:
+    module_name: layout_detection
+    model_name: RT-DETR-H_layout_3cls
+    model_dir: null
+    batch_size: 1
+
+SubPipelines:
+  DocPreprocessor:
+    pipeline_name: doc_preprocessor
+    use_doc_orientation_classify: True
+    use_doc_unwarping: True
+    SubModules:
+      DocOrientationClassify:
+        module_name: doc_text_orientation
+        model_name: PP-LCNet_x1_0_doc_ori
+        model_dir: null
+        batch_size: 1
+      DocUnwarping:
+        module_name: image_unwarping
+        model_name: UVDoc
+        model_dir: null
+        batch_size: 1
+  SealOCR:
+    pipeline_name: OCR
+    text_type: seal
+    SubModules:
+      TextDetection:
+        module_name: seal_text_detection
+        model_name: PP-OCRv4_server_seal_det
+        model_dir: null
+        batch_size: 1    
+      TextRecognition:
+        module_name: text_recognition
+        model_name: PP-OCRv4_server_rec
+        model_dir: null
+        batch_size: 1

+ 48 - 0
paddlex/configs/pipelines/table_recognition.yaml

@@ -0,0 +1,48 @@
+
+pipeline_name: table_recognition
+
+use_doc_preprocessor: True
+
+SubModules:
+  LayoutDetection:
+    module_name: layout_detection
+    model_name: RT-DETR-H_layout_3cls
+    model_dir: null
+    batch_size: 1
+
+  TableStructureRecognition:
+    module_name: table_structure_recognition
+    model_name: SLANet_plus
+    model_dir: null
+    batch_size: 1
+
+SubPipelines:
+  DocPreprocessor:
+    pipeline_name: doc_preprocessor
+    use_doc_orientation_classify: True
+    use_doc_unwarping: True
+    SubModules:
+      DocOrientationClassify:
+        module_name: doc_text_orientation
+        model_name: PP-LCNet_x1_0_doc_ori
+        model_dir: null
+        batch_size: 1
+      DocUnwarping:
+        module_name: image_unwarping
+        model_name: UVDoc
+        model_dir: null
+        batch_size: 1
+  GeneralOCR:
+    pipeline_name: OCR
+    text_type: general
+    SubModules:
+      TextDetection:
+        module_name: text_detection
+        model_name: PP-OCRv4_server_det
+        model_dir: null
+        batch_size: 1    
+      TextRecognition:
+        module_name: text_recognition
+        model_name: PP-OCRv4_server_rec
+        model_dir: null
+        batch_size: 1

+ 1 - 23
paddlex/inference/pipelines_new/__init__.py

@@ -12,29 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# from .single_model_pipeline import (
-#     _SingleModelPipeline,
-#     ImageClassification,
-#     ObjectDetection,
-#     InstanceSegmentation,
-#     SemanticSegmentation,
-#     TSFc,
-#     TSAd,
-#     TSCls,
-#     MultiLableImageClas,
-#     SmallObjDet,
-#     AnomalyDetection,
-# )
-# from .ocr import OCRPipeline
-# from .formula_recognition import FormulaRecognitionPipeline
-# from .table_recognition import TableRecPipeline
-# from .face_recognition import FaceRecPipeline
-# from .seal_recognition import SealOCRPipeline
-# from .ppchatocrv3 import PPChatOCRPipeline
-# from .layout_parsing import LayoutParsingPipeline
-# from .pp_shitu_v2 import ShiTuV2Pipeline
-# from .attribute_recognition import AttributeRecPipeline
-
 from pathlib import Path
 from typing import Any, Dict, Optional
 from .base import BasePipeline
@@ -46,6 +23,7 @@ from .ocr import OCRPipeline
 from .doc_preprocessor import DocPreprocessorPipeline
 from .layout_parsing import LayoutParsingPipeline
 from .pp_chatocrv3_doc import PP_ChatOCRv3_doc_Pipeline
+from .image_classification import ImageClassificationPipeline
 
 
 def get_pipeline_path(pipeline_name: str) -> str:

+ 15 - 0
paddlex/inference/pipelines_new/image_classification/__init__.py

@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .pipeline import ImageClassificationPipeline

+ 77 - 0
paddlex/inference/pipelines_new/image_classification/pipeline.py

@@ -0,0 +1,77 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, Optional
+import numpy as np
+
+from ...common.reader import ReadImage
+from ...common.batch_sampler import ImageBatchSampler
+from ...utils.pp_option import PaddlePredictorOption
+from ..base import BasePipeline
+from ...models_new.image_classification.result import TopkResult
+from ...results import TopkResult
+
+
+class ImageClassificationPipeline(BasePipeline):
+    """Image Classification Pipeline"""
+
+    entities = "image_classification"
+
+    def __init__(
+        self,
+        config: Dict,
+        device: str = None,
+        pp_option: PaddlePredictorOption = None,
+        use_hpip: bool = False,
+        hpi_params: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """
+        Initializes the class with given configurations and options.
+
+        Args:
+            config (Dict): Configuration dictionary containing model and other parameters.
+            device (str): The device to run the prediction on. Default is None.
+            pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
+            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            hpi_params (Optional[Dict[str, Any]]): HPIP specific parameters. Default is None.
+        """
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_params=hpi_params
+        )
+
+        image_classification_model_config = config["SubModules"]["ImageClassification"]
+        self.image_classification_model = self.create_model(
+            image_classification_model_config
+        )
+        batch_size = image_classification_model_config["batch_size"]
+        self.batch_sampler = ImageBatchSampler(batch_size=batch_size)
+        self.img_reader = ReadImage(format="BGR")
+
+    def predict(
+        self, input: str | list[str] | np.ndarray | list[np.ndarray], **kwargs
+    ) -> TopkResult:
+        """Predicts image classification results for the given input.
+
+        Args:
+            input (str | list[str] | np.ndarray | list[np.ndarray]): The input image(s) or path(s) to the images.
+            **kwargs: Additional keyword arguments that can be passed to the function.
+
+        Returns:
+            TopkResult: The predicted top k results.
+        """
+
+        for img_id, batch_data in enumerate(self.batch_sampler(input)):
+            batch_imgs = self.img_reader(batch_data)
+            for topk_single_result in self.image_classification_model(batch_imgs):
+                yield topk_single_result

+ 59 - 14
paddlex/inference/pipelines_new/layout_parsing/pipeline.py

@@ -36,7 +36,7 @@ from ...results import DetResult
 class LayoutParsingPipeline(BasePipeline):
     """Layout Parsing Pipeline"""
 
-    entities = "layout_parsing"
+    entities = ["layout_parsing", "seal_recognition", "table_recognition"]
 
     def __init__(
         self,
@@ -66,6 +66,42 @@ class LayoutParsingPipeline(BasePipeline):
 
         self._crop_by_boxes = CropByBoxes()
 
+    def set_used_models_flag(self, config: Dict) -> None:
+        """
+        Set the flags for which models to use based on the configuration.
+
+        Args:
+            config (Dict): A dictionary containing configuration settings.
+
+        Returns:
+            None
+        """
+        pipeline_name = config["pipeline_name"]
+
+        self.pipeline_name = pipeline_name
+
+        self.use_doc_preprocessor = False
+        self.use_general_ocr = False
+        self.use_seal_recognition = False
+        self.use_table_recognition = False
+
+        if "use_doc_preprocessor" in config:
+            self.use_doc_preprocessor = config["use_doc_preprocessor"]
+
+        if pipeline_name == "layout_parsing":
+            if "use_general_ocr" in config:
+                self.use_general_ocr = config["use_general_ocr"]
+            if "use_seal_recognition" in config:
+                self.use_seal_recognition = config["use_seal_recognition"]
+            if "use_table_recognition" in config:
+                self.use_table_recognition = config["use_table_recognition"]
+
+        elif pipeline_name == "seal_recognition":
+            self.use_seal_recognition = True
+
+        elif pipeline_name == "table_recognition":
+            self.use_table_recognition = True
+
     def inintial_predictor(self, config: Dict) -> None:
         """Initializes the predictor based on the provided configuration.
 
@@ -76,36 +112,25 @@ class LayoutParsingPipeline(BasePipeline):
             None
         """
 
+        self.set_used_models_flag(config)
+
         layout_det_config = config["SubModules"]["LayoutDetection"]
         self.layout_det_model = self.create_model(layout_det_config)
 
-        self.use_doc_preprocessor = False
-        if "use_doc_preprocessor" in config:
-            self.use_doc_preprocessor = config["use_doc_preprocessor"]
-
         if self.use_doc_preprocessor:
             doc_preprocessor_config = config["SubPipelines"]["DocPreprocessor"]
             self.doc_preprocessor_pipeline = self.create_pipeline(
                 doc_preprocessor_config
             )
 
-        self.use_general_ocr = False
-        if "use_general_ocr" in config:
-            self.use_general_ocr = config["use_general_ocr"]
         if self.use_general_ocr:
             general_ocr_config = config["SubPipelines"]["GeneralOCR"]
             self.general_ocr_pipeline = self.create_pipeline(general_ocr_config)
 
-        self.use_seal_recognition = False
-        if "use_seal_recognition" in config:
-            self.use_seal_recognition = config["use_seal_recognition"]
         if self.use_seal_recognition:
             seal_ocr_config = config["SubPipelines"]["SealOCR"]
             self.seal_ocr_pipeline = self.create_pipeline(seal_ocr_config)
 
-        self.use_table_recognition = False
-        if "use_table_recognition" in config:
-            self.use_table_recognition = config["use_table_recognition"]
         if self.use_table_recognition:
             table_structure_config = config["SubModules"]["TableStructureRecognition"]
             self.table_structure_model = self.create_model(table_structure_config)
@@ -171,6 +196,24 @@ class LayoutParsingPipeline(BasePipeline):
 
         return True
 
+    def convert_input_params(self, input_params: Dict) -> None:
+        """
+        Convert input parameters based on the pipeline name.
+
+        Args:
+            input_params (Dict): The input parameters dictionary.
+
+        Returns:
+            None
+        """
+        if self.pipeline_name == "seal_recognition":
+            input_params["use_general_ocr"] = False
+            input_params["use_table_recognition"] = False
+        elif self.pipeline_name == "table_recognition":
+            input_params["use_general_ocr"] = False
+            input_params["use_seal_recognition"] = False
+        return
+
     def predict(
         self,
         input: str | list[str] | np.ndarray | list[np.ndarray],
@@ -211,6 +254,8 @@ class LayoutParsingPipeline(BasePipeline):
             "use_table_recognition": use_table_recognition,
         }
 
+        self.convert_input_params(input_params)
+
         if use_doc_orientation_classify or use_doc_unwarping:
             input_params["use_doc_preprocessor"] = True
         else: