table_line_generator.yaml 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. table_editor:
  2. viewport:
  3. width: 1200
  4. height: 600
  5. display:
  6. default_zoom: 0.70
  7. zoom_min: 0.20
  8. zoom_max: 2.0
  9. zoom_step: 0.1
  10. default_line_width: 1
  11. line_width_min: 1
  12. line_width_max: 5
  13. show_line_numbers: true
  14. output:
  15. directory: "output/table_structures"
  16. structure_suffix: "_structure.json"
  17. image_suffix: ".png"
  18. defaults:
  19. save_structure: true
  20. save_image: true
  21. line_color: "黑色"
  22. line_colors:
  23. - name: "黑色"
  24. rgb: [0, 0, 0]
  25. - name: "蓝色"
  26. rgb: [0, 0, 255]
  27. - name: "红色"
  28. rgb: [255, 0, 0]
  29. data_sources:
  30. - name: "B用户_扫描流水"
  31. tool: "mineru" # 格式同 MinerU
  32. base_dir: "/Users/zhch158/workspace/data/流水分析"
  33. json_dir: "{{name}}/mineru_vllm_results_cell_bbox"
  34. image_dir: "{{name}}/mineru_vllm_results/{{name}}"
  35. # {{ name }}_page_(?P<page>\d{3})\.json 来匹配 JSON 文件名。模板变量 {{ name }} 会在运行时被替换成实际前缀;随后 _page_ 是字面固定部分。命名捕获组 (?P<page>\d{3}) 强制页面编号恰好为三位数字,并将其存入 page 组
  36. json_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.json"
  37. image_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.png"
  38. sort_key: "page"
  39. output:
  40. directory: "{{base_dir}}/{{ name }}.wiredtable"
  41. structure_suffix: "_structure.json"
  42. image_suffix: ".png"
  43. - name: "康强_北京农村商业银行"
  44. tool: "ppstructv3"
  45. base_dir: "/Users/zhch158/workspace/data/流水分析"
  46. json_dir: "{{name}}/ppstructurev3_client_results"
  47. image_dir: "{{name}}/ppstructurev3_client_results/{{name}}"
  48. # {{ name }}_page_(?P<page>\d{3})\.json 来匹配 JSON 文件名。模板变量 {{ name }} 会在运行时被替换成实际前缀;随后 _page_ 是字面固定部分。命名捕获组 (?P<page>\d{3}) 强制页面编号恰好为三位数字,并将其存入 page 组
  49. json_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.json"
  50. image_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.png"
  51. sort_key: "page"
  52. output:
  53. directory: "{{base_dir}}/{{ name }}.wiredtable"
  54. structure_suffix: "_structure.json"
  55. image_suffix: ".png"