|
|
@@ -60,17 +60,35 @@ ocr:
|
|
|
parsing_results_field: "parsing_res_list"
|
|
|
text_field: "block_content"
|
|
|
bbox_field: "block_bbox"
|
|
|
+ rec_texts_field: "overall_ocr_res.rec_texts" # 针对表格中的文字块
|
|
|
+ rec_boxes_field: "overall_ocr_res.rec_boxes" # 针对表格中的文字块
|
|
|
category_field: "block_label"
|
|
|
confidence_field: "confidence"
|
|
|
# 旋转处理配置
|
|
|
rotation:
|
|
|
coordinates_are_pre_rotated: true # 坐标已经是预旋转的
|
|
|
|
|
|
+ table_recognition_v2:
|
|
|
+ name: "TableRecognitionV2"
|
|
|
+ description: "PaddleOCR Table Recognition V2"
|
|
|
+ json_structure: "object"
|
|
|
+ parsing_results_field: "table_res_list"
|
|
|
+ text_field: "pred_html"
|
|
|
+ bbox_field: "cell_box_list" # 原先的 cell_box_listox 为笔误
|
|
|
+ rec_texts_field: "table_ocr_pred.rec_texts" # 针对表格中的文字块
|
|
|
+ rec_boxes_field: "table_ocr_pred.rec_boxes" # 针对表格中的文字块
|
|
|
+ category_field: "type"
|
|
|
+ confidence_field: "confidence"
|
|
|
+ rotation:
|
|
|
+ coordinates_are_pre_rotated: true
|
|
|
+
|
|
|
# 自动检测工具类型的规则
|
|
|
auto_detection:
|
|
|
enabled: true
|
|
|
rules:
|
|
|
- - field_exists: "parsing_res_list" # 如果存在该字段,判断为ppstructv3
|
|
|
+ - field_exists: "table_res_list"
|
|
|
+ tool_type: "table_recognition_v2"
|
|
|
+ - field_exists: "parsing_res_list"
|
|
|
tool_type: "ppstructv3"
|
|
|
- json_is_array: true # 如果JSON是数组,判断为dots_ocr
|
|
|
tool_type: "dots_ocr"
|
|
|
@@ -87,7 +105,13 @@ data_sources:
|
|
|
ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_PPStructureV3_Results"
|
|
|
src_img_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_PPStructureV3_Results/A用户_单元格扫描流水"
|
|
|
description: "A用户使用PPStructV3的单元格扫描结果"
|
|
|
-
|
|
|
+
|
|
|
+ - name: "A用户_单元格扫描流水"
|
|
|
+ ocr_tool: "table_recognition_v2"
|
|
|
+ ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/table_recognition_v2_Results"
|
|
|
+ src_img_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_PPStructureV3_Results/A用户_单元格扫描流水"
|
|
|
+ description: "A用户使用Table Recognition V2的单元格扫描结果"
|
|
|
+
|
|
|
- name: "A用户_单元格图片合成"
|
|
|
ocr_tool: "dots_ocr"
|
|
|
ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格图片合成/data_DotsOCR_Results"
|