Эх сурвалжийг харах

新增表格识别管道配置文件,包含文档预处理、布局检测和OCR模型设置

zhch158_admin 4 сар өмнө
parent
commit
6a9b08f2a9

+ 82 - 0
zhch/table_recognition_v2-zhch.yaml

@@ -0,0 +1,82 @@
+
+pipeline_name: table_recognition_v2
+
+use_doc_preprocessor: True
+use_layout_detection: True
+use_ocr_model: True
+
+SubModules:
+  LayoutDetection:
+    module_name: layout_detection
+    model_name: PP-DocLayout-L
+    model_dir: null
+  
+  TableOrientationClassify:
+    module_name: doc_text_orientation
+    model_name: PP-LCNet_x1_0_doc_ori
+    model_dir: null
+  
+  TableClassification:
+    module_name: table_classification
+    model_name: PP-LCNet_x1_0_table_cls
+    model_dir: null
+
+  WiredTableStructureRecognition:
+    module_name: table_structure_recognition
+    model_name: SLANeXt_wired
+    model_dir: null
+  
+  WirelessTableStructureRecognition:
+    module_name: table_structure_recognition
+    model_name: SLANeXt_wireless
+    model_dir: null
+  
+  WiredTableCellsDetection:
+    module_name: table_cells_detection
+    model_name: RT-DETR-L_wired_table_cell_det
+    model_dir: null
+  
+  WirelessTableCellsDetection:
+    module_name: table_cells_detection
+    model_name: RT-DETR-L_wireless_table_cell_det
+    model_dir: null
+
+SubPipelines:
+  DocPreprocessor:
+    pipeline_name: doc_preprocessor
+    use_doc_orientation_classify: True
+    use_doc_unwarping: True
+    SubModules:
+      DocOrientationClassify:
+        module_name: doc_text_orientation
+        model_name: PP-LCNet_x1_0_doc_ori
+        model_dir: null
+
+      DocUnwarping:
+        module_name: image_unwarping
+        model_name: UVDoc
+        model_dir: null
+
+  GeneralOCR:
+    pipeline_name: OCR
+    text_type: general
+    use_doc_preprocessor: False
+    use_textline_orientation: False
+    SubModules:
+      TextDetection:
+        module_name: text_detection
+        model_name: PP-OCRv4_server_det
+        model_dir: null
+        limit_side_len: 960
+        limit_type: max
+        max_side_limit: 4000
+        thresh: 0.3
+        box_thresh: 0.4
+        unclip_ratio: 1.5
+        
+      TextRecognition:
+        module_name: text_recognition
+        model_name: PP-OCRv4_server_rec_doc
+        model_dir: null
+        batch_size: 1
+        score_thresh: 0