10 달 전 · 4474a183fa
--- a/api_examples/pipelines/test_layout_parsing_v2.py
+++ b/api_examples/pipelines/test_layout_parsing_v2.py
@@ -0,0 +1,35 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from paddlex import create_pipeline
			
 
				+
			
 
				+pipeline = create_pipeline(pipeline="layout_parsing_v2")
			
 
				+
			
 
				+output = pipeline.predict(
			
 
				+    "./test_samples/demo_paper.png",
			
 
				+    use_doc_orientation_classify=False,
			
 
				+    use_doc_unwarping=False,
			
 
				+    use_common_ocr=True,
			
 
				+    use_seal_recognition=True,
			
 
				+    use_table_recognition=True,
			
 
				+)
			
 
				+
			
 
				+for res in output:
			
 
				+    res.print()
			
 
				+    res.save_to_img("./output")
			
 
				+    res.save_to_json("./output")
			
 
				+    res.save_to_xlsx("./output")
			
 
				+    res.save_to_html("./output")
			
 
				+    res.save_to_markdown("./output")
			
 
				+    res.save_to_pdf_order("./output")
			
--- a/paddlex/configs/pipelines/layout_parsing_v2.yaml
+++ b/paddlex/configs/pipelines/layout_parsing_v2.yaml
@@ -0,0 +1,132 @@
 
				+
			
 
				+pipeline_name: layout_parsing_v2
			
 
				+
			
 
				+use_doc_preprocessor: True
			
 
				+use_general_ocr: True
			
 
				+use_seal_recognition: True
			
 
				+use_table_recognition: True
			
 
				+use_formula_recognition: True
			
 
				+
			
 
				+SubModules:
			
 
				+  LayoutDetection:
			
 
				+    module_name: layout_detection
			
 
				+    model_name: PP-DocLayout-L
			
 
				+    model_dir: null
			
 
				+
			
 
				+SubPipelines:
			
 
				+  DocPreprocessor:
			
 
				+    pipeline_name: doc_preprocessor
			
 
				+    use_doc_orientation_classify: True
			
 
				+    use_doc_unwarping: True
			
 
				+    SubModules:
			
 
				+      DocOrientationClassify:
			
 
				+        module_name: doc_text_orientation
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				+      DocUnwarping:
			
 
				+        module_name: image_unwarping
			
 
				+        model_name: UVDoc
			
 
				+        model_dir: null
			
 
				+
			
 
				+  GeneralOCR:
			
 
				+    pipeline_name: OCR
			
 
				+    text_type: general
			
 
				+    use_doc_preprocessor: False
			
 
				+    use_textline_orientation: False
			
 
				+    SubModules:
			
 
				+      TextDetection:
			
 
				+        module_name: text_detection
			
 
				+        model_name: PP-OCRv4_server_det
			
 
				+        model_dir: null
			
 
				+        limit_side_len: 960
			
 
				+        limit_type: max
			
 
				+        thresh: 0.3
			
 
				+        box_thresh: 0.6
			
 
				+        unclip_ratio: 2.0
			
 
				+        
			
 
				+      TextRecognition:
			
 
				+        module_name: text_recognition
			
 
				+        model_name: PP-OCRv4_server_rec
			
 
				+        model_dir: null
			
 
				+        batch_size: 1
			
 
				+        score_thresh: 0.0
			
 
				+
			
 
				+  # TableRecognition:
			
 
				+  #   pipeline_name: table_recognition_v2
			
 
				+  #   use_layout_detection: False
			
 
				+  #   use_doc_preprocessor: False
			
 
				+  #   use_ocr_model: True
			
 
				+  #   SubModules:  
			
 
				+  #     TableClassification:
			
 
				+  #       module_name: table_classification
			
 
				+  #       model_name: PP-LCNet_x1_0_table_cls
			
 
				+  #       model_dir: null
			
 
				+
			
 
				+  #     WiredTableStructureRecognition:
			
 
				+  #       module_name: table_structure_recognition
			
 
				+  #       model_name: SLANeXt_wired
			
 
				+  #       model_dir: null
			
 
				+      
			
 
				+  #     WirelessTableStructureRecognition:
			
 
				+  #       module_name: table_structure_recognition
			
 
				+  #       model_name: SLANeXt_wireless
			
 
				+  #       model_dir: null
			
 
				+      
			
 
				+  #     WiredTableCellsDetection:
			
 
				+  #       module_name: table_cells_detection
			
 
				+  #       model_name: RT-DETR-L_wired_table_cell_det
			
 
				+  #       model_dir: null
			
 
				+      
			
 
				+  #     WirelessTableCellsDetection:
			
 
				+  #       module_name: table_cells_detection
			
 
				+  #       model_name: RT-DETR-L_wireless_table_cell_det
			
 
				+  #       model_dir: null
			
 
				+
			
 
				+  TableRecognition:
			
 
				+    pipeline_name: table_recognition
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    use_ocr_model: False
			
 
				+    SubModules:
			
 
				+      TableStructureRecognition:
			
 
				+        module_name: table_structure_recognition
			
 
				+        model_name: SLANet_plus
			
 
				+        model_dir: null
			
 
				+
			
 
				+  SealRecognition:
			
 
				+    pipeline_name: seal_recognition
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    SubPipelines:
			
 
				+      SealOCR:
			
 
				+        pipeline_name: OCR
			
 
				+        text_type: seal
			
 
				+        use_doc_preprocessor: False
			
 
				+        use_textline_orientation: False
			
 
				+        SubModules:
			
 
				+          TextDetection:
			
 
				+            module_name: seal_text_detection
			
 
				+            model_name: PP-OCRv4_server_seal_det
			
 
				+            model_dir: null
			
 
				+            limit_side_len: 736
			
 
				+            limit_type: min
			
 
				+            thresh: 0.2
			
 
				+            box_thresh: 0.6
			
 
				+            unclip_ratio: 0.5
			
 
				+          TextRecognition:
			
 
				+            module_name: text_recognition
			
 
				+            model_name: PP-OCRv4_server_rec
			
 
				+            model_dir: null
			
 
				+            batch_size: 1
			
 
				+            score_thresh: 0
			
 
				+    
			
 
				+  FormulaRecognition:
			
 
				+    pipeline_name: formula_recognition
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    SubModules:
			
 
				+      FormulaRecognition:
			
 
				+        module_name: formula_recognition
			
 
				+        model_name: PP-FormulaNet-L
			
 
				+        model_dir: null
			
 
				+        batch_size: 5
			
--- a/paddlex/inference/pipelines_new/layout_parsing/__init__.py
+++ b/paddlex/inference/pipelines_new/layout_parsing/__init__.py
@@ -13,3 +13,4 @@
 
				 # limitations under the License.
			
 
				 
			
 
				 from .pipeline import LayoutParsingPipeline
			
 
				+from .pipeline_v2 import LayoutParsingPipelineV2
			
--- a/paddlex/inference/pipelines_new/layout_parsing/pipeline_v2.py
+++ b/paddlex/inference/pipelines_new/layout_parsing/pipeline_v2.py
@@ -40,7 +40,7 @@ from .utils import get_sub_regions_ocr_res
 
				 class LayoutParsingPipelineV2(BasePipeline):
			
 
				     """Layout Parsing Pipeline V2"""
			
 
				 
			
 
				-    entities = ["layout_parsing"]
			
 
				+    entities = ["layout_parsing_v2"]
			
 
				 
			
 
				     def __init__(
			
 
				         self,
			
--- a/paddlex/inference/pipelines_new/layout_parsing/result_v2.py
+++ b/paddlex/inference/pipelines_new/layout_parsing/result_v2.py
@@ -103,7 +103,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
				                 formula_region_id = formula_res["formula_region_id"]
			
 
				                 sub_formula_res_dict = formula_res.img
			
 
				                 key = f"formula_res_region{formula_region_id}"
			
 
				-                res_img_dict[key] = sub_formula_res_dict
			
 
				+                res_img_dict[key] = sub_formula_res_dict["res"]
			
 
				 
			
 
				         return res_img_dict