瀏覽代碼

fix: 更新表格识别配置,禁用文档预处理器并升级OCR模型版本

zhch158_admin 3 月之前
父節點
當前提交
7322820770
共有 1 個文件被更改,包括 53 次插入8 次删除
  1. 53 8
      zhch/table_recognition_v2-zhch.yaml

+ 53 - 8
zhch/table_recognition_v2-zhch.yaml

@@ -1,16 +1,61 @@
 
 pipeline_name: table_recognition_v2
 
-use_doc_preprocessor: True
+use_doc_preprocessor: False
 use_layout_detection: True
 use_ocr_model: True
 
 SubModules:
   LayoutDetection:
     module_name: layout_detection
-    model_name: PP-DocLayout-L
+    model_name: PP-DocLayout_plus-L
     model_dir: null
-  
+    batch_size: 1
+    threshold: 
+      0: 0.3  # paragraph_title
+      1: 0.5  # image
+      2: 0.4  # text
+      3: 0.5  # number
+      4: 0.5  # abstract
+      5: 0.5  # content
+      6: 0.5  # figure_table_chart_title
+      7: 0.3  # formula
+      8: 0.5  # table
+      9: 0.5  # reference
+      10: 0.5 # doc_title
+      11: 0.5 # footnote
+      12: 0.5 # header
+      13: 0.5 # algorithm
+      14: 0.5 # footer
+      15: 0.45 # seal
+      16: 0.5 # chart
+      17: 0.5 # formula_number
+      18: 0.5 # aside_text
+      19: 0.5 # reference_content
+    layout_nms: True
+    layout_unclip_ratio: [1.0, 1.0] 
+    layout_merge_bboxes_mode: 
+      0: "large"  # paragraph_title
+      1: "large"  # image
+      2: "union"  # text
+      3: "union"  # number
+      4: "union"  # abstract
+      5: "union"  # content
+      6: "union"  # figure_table_chart_title
+      7: "large"  # formula
+      8: "union"  # table
+      9: "union"  # reference
+      10: "union" # doc_title
+      11: "union" # footnote
+      12: "union" # header
+      13: "union" # algorithm
+      14: "union" # footer
+      15: "union" # seal
+      16: "large" # chart
+      17: "union" # formula_number
+      18: "union" # aside_text
+      19: "union" # reference_content  
+
   TableOrientationClassify:
     module_name: doc_text_orientation
     model_name: PP-LCNet_x1_0_doc_ori
@@ -44,8 +89,8 @@ SubModules:
 SubPipelines:
   DocPreprocessor:
     pipeline_name: doc_preprocessor
-    use_doc_orientation_classify: True
-    use_doc_unwarping: True
+    use_doc_orientation_classify: False
+    use_doc_unwarping: False
     SubModules:
       DocOrientationClassify:
         module_name: doc_text_orientation
@@ -65,9 +110,9 @@ SubPipelines:
     SubModules:
       TextDetection:
         module_name: text_detection
-        model_name: PP-OCRv4_server_det
+        model_name: PP-OCRv5_server_det
         model_dir: null
-        limit_side_len: 960
+        limit_side_len: 1600
         limit_type: max
         max_side_limit: 4000
         thresh: 0.3
@@ -76,7 +121,7 @@ SubPipelines:
         
       TextRecognition:
         module_name: text_recognition
-        model_name: PP-OCRv4_server_rec_doc
+        model_name: PP-OCRv5_server_rec
         model_dir: null
         batch_size: 1
         score_thresh: 0