config.yaml 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. # OCR验证工具配置文件
  2. # 样式配置
  3. styles:
  4. font_size: 8
  5. colors:
  6. primary: "#0288d1"
  7. secondary: "#ff9800"
  8. success: "#4caf50"
  9. error: "#f44336"
  10. warning: "#ff9800"
  11. background: "#fafafa"
  12. text: "#333333"
  13. layout:
  14. default_zoom: 1.0
  15. default_height: 800
  16. sidebar_width: 1
  17. content_width: 0.65
  18. # 界面配置
  19. ui:
  20. page_title: "OCR可视化校验工具"
  21. page_icon: "🔍"
  22. layout: "wide"
  23. sidebar_state: "expanded"
  24. # OCR数据配置
  25. ocr:
  26. min_text_length: 2
  27. default_confidence: 1.0
  28. exclude_texts: ["Picture", ""]
  29. # 图片方向检测配置
  30. orientation_detection:
  31. enabled: true
  32. confidence_threshold: 0.3 # 置信度阈值
  33. methods: ["opencv_analysis"] # 检测方法
  34. cache_results: true # 缓存检测结果
  35. # OCR工具类型配置
  36. tools:
  37. dots_ocr:
  38. name: "Dots OCR"
  39. description: "专业VLM OCR"
  40. json_structure: "array" # JSON为数组格式
  41. text_field: "text"
  42. bbox_field: "bbox"
  43. category_field: "category"
  44. confidence_field: "confidence"
  45. # 旋转处理配置
  46. rotation:
  47. coordinates_are_pre_rotated: false # 坐标不是预旋转的
  48. ppstructv3:
  49. name: "PPStructV3"
  50. description: "PaddleOCR PP-StructureV3"
  51. json_structure: "object" # JSON为对象格式
  52. parsing_results_field: "parsing_res_list"
  53. text_field: "block_content"
  54. bbox_field: "block_bbox"
  55. rec_texts_field: "overall_ocr_res.rec_texts" # 针对表格中的文字块
  56. rec_boxes_field: "overall_ocr_res.rec_boxes" # 针对表格中的文字块
  57. category_field: "block_label"
  58. confidence_field: "confidence"
  59. # 旋转处理配置
  60. rotation:
  61. coordinates_are_pre_rotated: true # 坐标已经是预旋转的
  62. table_recognition_v2:
  63. name: "TableRecognitionV2"
  64. description: "PaddleOCR Table Recognition V2"
  65. json_structure: "object"
  66. parsing_results_field: "table_res_list"
  67. text_field: "pred_html"
  68. bbox_field: "cell_box_list" # 原先的 cell_box_listox 为笔误
  69. rec_texts_field: "table_ocr_pred.rec_texts" # 针对表格中的文字块
  70. rec_boxes_field: "table_ocr_pred.rec_boxes" # 针对表格中的文字块
  71. category_field: "type"
  72. confidence_field: "confidence"
  73. rotation:
  74. coordinates_are_pre_rotated: true
  75. mineru:
  76. name: "MinerU"
  77. description: "MinerU OCR"
  78. json_structure: "array" # JSON为数组格式
  79. text_field: "text"
  80. bbox_field: "bbox"
  81. category_field: "type"
  82. confidence_field: "confidence"
  83. # 表格相关字段
  84. table_body_field: "table_body"
  85. table_cells_field: "table_cells"
  86. img_path_field: "img_path"
  87. # 旋转处理配置
  88. rotation:
  89. coordinates_are_pre_rotated: false
  90. # 自动检测工具类型的规则(按优先级从高到低)
  91. auto_detection:
  92. enabled: true
  93. rules:
  94. # Table Recognition V2 - 最高优先级
  95. - tool_type: "table_recognition_v2"
  96. conditions:
  97. - type: "field_exists"
  98. field: "table_res_list"
  99. - type: "field_not_exists"
  100. field: "parsing_res_list"
  101. priority: 4
  102. # PPStructV3 - 第二优先级
  103. - tool_type: "ppstructv3"
  104. conditions:
  105. - type: "field_exists"
  106. field: "parsing_res_list"
  107. - type: "field_exists"
  108. field: "doc_preprocessor_res"
  109. priority: 2
  110. # MinerU - 第三优先级
  111. - tool_type: "mineru"
  112. conditions:
  113. - type: "field_exists"
  114. field: "page_idx"
  115. - type: "field_exists"
  116. field: "type"
  117. - type: "json_structure"
  118. structure: "array"
  119. priority: 1
  120. # Dots OCR - 最低优先级(默认)
  121. - tool_type: "dots_ocr"
  122. conditions:
  123. - type: "json_structure"
  124. structure: "array"
  125. - type: "field_exists"
  126. field: "category"
  127. priority: 3
  128. data_sources:
  129. - name: "德_内蒙古银行照"
  130. ocr_tool: "ppstructv3"
  131. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/data_PPStructureV3_Results"
  132. src_img_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/data_PPStructureV3_Results/德_内蒙古银行照"
  133. description: "德_内蒙古银行照使用PPStructV3的图片合成结果"
  134. - name: "德_内蒙古银行照_PaddleOCR_VL_cell_bbox"
  135. ocr_tool: "ppstructv3"
  136. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/PaddleOCR_VL_Results"
  137. src_img_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/PaddleOCR_VL_Results/德_内蒙古银行照"
  138. description: "德_内蒙古银行照使用PaddleOCR VLM的图片合成结果"
  139. - name: "德_内蒙古银行照"
  140. ocr_tool: "mineru"
  141. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/mineru-vlm-2.5.3_Results"
  142. src_img_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/mineru-vlm-2.5.3_Results/德_内蒙古银行照"
  143. description: "德_内蒙古银行照使用Mineru的图片合成结果"
  144. - name: "德_内蒙古银行照_mineru_cell_bbox"
  145. ocr_tool: "mineru"
  146. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/mineru-vlm-2.5.3_Results_cell_bbox"
  147. src_img_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/mineru-vlm-2.5.3_Results/德_内蒙古银行照"
  148. description: "德_内蒙古银行照使用Mineru的图片合成结果, 整合PaddleOCR坐标"
  149. - name: "德_内蒙古银行照"
  150. ocr_tool: "dots_ocr"
  151. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/data_DotsOCR_Results"
  152. src_img_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/data_DotsOCR_Results/德_内蒙古银行照"
  153. description: "德_内蒙古银行照使用Dots OCR的图片合成结果"
  154. - name: "对公_招商银行图"
  155. ocr_tool: "ppstructv3"
  156. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/data_PPStructureV3_Results"
  157. src_img_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/data_PPStructureV3_Results/对公_招商银行图"
  158. description: "对公_招商银行图使用PPStructV3的图片合成结果"
  159. - name: "对公_招商银行图_PaddleOCR_VL_cell_bbox"
  160. ocr_tool: "ppstructv3"
  161. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/PaddleOCR_VL_Results"
  162. src_img_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/PaddleOCR_VL_Results/对公_招商银行图"
  163. description: "对公_招商银行图使用PaddleOCR VLM的图片合成结果"
  164. - name: "对公_招商银行图"
  165. ocr_tool: "mineru"
  166. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/mineru-vlm-2.5.3_Results"
  167. src_img_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/mineru-vlm-2.5.3_Results/对公_招商银行图"
  168. description: "对公_招商银行图使用Mineru的图片合成结果"
  169. - name: "对公_招商银行图_mineru_cell_bbox"
  170. ocr_tool: "mineru"
  171. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/mineru-vlm-2.5.3_Results_cell_bbox"
  172. src_img_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/mineru-vlm-2.5.3_Results/对公_招商银行图"
  173. description: "对公_招商银行图使用Mineru的图片合成结果, 整合PaddleOCR坐标"
  174. - name: "对公_招商银行图"
  175. ocr_tool: "dots_ocr"
  176. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/data_DotsOCR_Results"
  177. src_img_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/data_DotsOCR_Results/对公_招商银行图"
  178. description: "对公_招商银行图使用Dots OCR的图片合成结果"
  179. - name: "A用户_单元格扫描流水"
  180. ocr_tool: "dots_ocr"
  181. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_DotsOCR_Results"
  182. src_img_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_DotsOCR_Results/A用户_单元格扫描流水"
  183. description: "A用户使用Dots OCR的单元格扫描结果"
  184. - name: "A用户_单元格扫描流水_PaddleOCR_VL_cell_bbox"
  185. ocr_tool: "ppstructv3"
  186. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_PPStructureV3_Results"
  187. src_img_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_PPStructureV3_Results/A用户_单元格扫描流水"
  188. description: "A用户使用PPStructV3的单元格扫描结果"
  189. - name: "A用户_单元格扫描流水"
  190. ocr_tool: "ppstructv3"
  191. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/PaddleOCR_VL_Results"
  192. src_img_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/PaddleOCR_VL_Results/A用户_单元格扫描流水"
  193. description: "A用户使用PaddleOCR VLM的单元格扫描结果"
  194. - name: "A用户_单元格扫描流水"
  195. ocr_tool: "table_recognition_v2"
  196. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/table_recognition_v2_Results"
  197. src_img_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_PPStructureV3_Results/A用户_单元格扫描流水"
  198. description: "A用户使用Table Recognition V2的单元格扫描结果"
  199. - name: "A用户_单元格扫描流水"
  200. ocr_tool: "mineru"
  201. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/mineru-vlm-2.5.3_Results"
  202. src_img_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水.img"
  203. description: "A用户使用Mineru的识别结果"
  204. - name: "A用户_单元格扫描流水_mineru_cell_bbox"
  205. ocr_tool: "mineru"
  206. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/mineru-vlm-2.5.3_Results_cell_bbox"
  207. src_img_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水.img"
  208. description: "A用户使用Mineru VLM识别结果, 整合PaddleOCR坐标"
  209. - name: "B用户_扫描流水"
  210. ocr_tool: "ppstructv3"
  211. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/data_PPStructureV3_Results"
  212. src_img_dir: "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/data_PPStructureV3_Results/B用户_扫描流水"
  213. description: "B用户使用PPStructV3的扫描结果"
  214. - name: "B用户_扫描流水_PaddleOCR_VL_cell_bbox"
  215. ocr_tool: "ppstructv3"
  216. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/PaddleOCR_VL_Results"
  217. src_img_dir: "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/PaddleOCR_VL_Results/B用户_扫描流水"
  218. description: "B用户使用PaddleOCR VLM的扫描结果"
  219. - name: "B用户_扫描流水"
  220. ocr_tool: "mineru"
  221. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/mineru-vlm-2.5.3_Results"
  222. src_img_dir: "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水.img"
  223. description: "B用户使用Mineru的扫描结果"
  224. - name: "B用户_扫描流水_mineru_cell_bbox"
  225. ocr_tool: "mineru"
  226. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/mineru-vlm-2.5.3_Results_cell_bbox"
  227. src_img_dir: "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水.img"
  228. description: "B用户使用Mineru的扫描结果, 整合PaddleOCR坐标"
  229. - name: "B用户_扫描流水"
  230. ocr_tool: "dots_ocr"
  231. ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/data_DotsOCR_Results"
  232. src_img_dir: "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/data_DotsOCR_Results/B用户_扫描流水"
  233. description: "B用户使用Dots OCR的扫描结果"
  234. - name: "至远彩色_2023年报"
  235. ocr_tool: "ppstructv3"
  236. ocr_out_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_PPStructureV3_Results"
  237. src_img_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_PPStructureV3_Results/2023年度报告母公司"
  238. description: "至远彩色使用PPStructV3的2023年报"
  239. - name: "至远彩色_2023年报_PaddleOCR_VL_cell_bbox"
  240. ocr_tool: "ppstructv3"
  241. ocr_out_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/PaddleOCR_VL_Results"
  242. src_img_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/PaddleOCR_VL_Results/2023年度报告母公司"
  243. description: "至远彩色使用PaddleOCR VLM的2023年报"
  244. - name: "至远彩色_2023年报"
  245. ocr_tool: "mineru"
  246. ocr_out_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/mineru-vlm-2.5.3_Results"
  247. src_img_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/mineru-vlm-2.5.3_Results/2023年度报告母公司"
  248. description: "至远彩色使用mineru的2023年报"
  249. - name: "至远彩色_2023年报_mineru_cell_bbox"
  250. ocr_tool: "mineru"
  251. ocr_out_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/mineru-vlm-2.5.3_Results_cell_bbox"
  252. src_img_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/mineru-vlm-2.5.3_Results/2023年度报告母公司"
  253. description: "至远彩色使用mineru的2023年报, 整合PaddleOCR坐标"
  254. - name: "至远彩色_2023年报"
  255. ocr_tool: "dots_ocr"
  256. ocr_out_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_DotsOCR_Results"
  257. src_img_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_DotsOCR_Results/2023年度报告母公司"
  258. description: "至远彩色使用Dots OCR的2023年报"
  259. # 预校验结果文件路径
  260. pre_validation:
  261. out_dir: "./output/pre_validation/"