1 bulan lalu · f9cf323e46
--- a/config.yaml
+++ b/config.yaml
@@ -60,17 +60,35 @@ ocr:
 
				       parsing_results_field: "parsing_res_list"
			
 
				       text_field: "block_content"
			
 
				       bbox_field: "block_bbox"
			
 
				+      rec_texts_field: "overall_ocr_res.rec_texts" # 针对表格中的文字块
			
 
				+      rec_boxes_field: "overall_ocr_res.rec_boxes" # 针对表格中的文字块
			
 
				       category_field: "block_label"
			
 
				       confidence_field: "confidence"
			
 
				       # 旋转处理配置
			
 
				       rotation:
			
 
				         coordinates_are_pre_rotated: true  # 坐标已经是预旋转的
			
 
				       
			
 
				+    table_recognition_v2:
			
 
				+      name: "TableRecognitionV2"
			
 
				+      description: "PaddleOCR Table Recognition V2"
			
 
				+      json_structure: "object"
			
 
				+      parsing_results_field: "table_res_list"
			
 
				+      text_field: "pred_html"
			
 
				+      bbox_field: "cell_box_list"            # 原先的 cell_box_listox 为笔误
			
 
				+      rec_texts_field: "table_ocr_pred.rec_texts" # 针对表格中的文字块
			
 
				+      rec_boxes_field: "table_ocr_pred.rec_boxes" # 针对表格中的文字块
			
 
				+      category_field: "type"
			
 
				+      confidence_field: "confidence"
			
 
				+      rotation:
			
 
				+        coordinates_are_pre_rotated: true
			
 
				+  
			
 
				   # 自动检测工具类型的规则
			
 
				   auto_detection:
			
 
				     enabled: true
			
 
				     rules:
			
 
				-      - field_exists: "parsing_res_list"  # 如果存在该字段，判断为ppstructv3
			
 
				+      - field_exists: "table_res_list"
			
 
				+        tool_type: "table_recognition_v2"
			
 
				+      - field_exists: "parsing_res_list"
			
 
				         tool_type: "ppstructv3"
			
 
				       - json_is_array: true  # 如果JSON是数组，判断为dots_ocr
			
 
				         tool_type: "dots_ocr"
			
@@ -87,7 +105,13 @@ data_sources:
 
				     ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_PPStructureV3_Results"
			
 
				     src_img_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_PPStructureV3_Results/A用户_单元格扫描流水"
			
 
				     description: "A用户使用PPStructV3的单元格扫描结果"
			
 
				-    
			
 
				+
			
 
				+  - name: "A用户_单元格扫描流水"
			
 
				+    ocr_tool: "table_recognition_v2"
			
 
				+    ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/table_recognition_v2_Results"
			
 
				+    src_img_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_PPStructureV3_Results/A用户_单元格扫描流水"
			
 
				+    description: "A用户使用Table Recognition V2的单元格扫描结果"
			
 
				+        
			
 
				   - name: "A用户_单元格图片合成"
			
 
				     ocr_tool: "dots_ocr"
			
 
				     ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格图片合成/data_DotsOCR_Results"