| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 |
- table_editor:
- viewport:
- width: 1200
- height: 600
- display:
- default_zoom: 0.70
- zoom_min: 0.20
- zoom_max: 2.0
- zoom_step: 0.1
- default_line_width: 1
- line_width_min: 1
- line_width_max: 5
- show_line_numbers: true
- output:
- directory: "output/table_structures"
- structure_suffix: "_structure.json"
- image_suffix: ".png"
- defaults:
- save_structure: true
- save_image: true
- line_color: "黑色"
- line_colors:
- - name: "黑色"
- rgb: [0, 0, 0]
- - name: "蓝色"
- rgb: [0, 0, 255]
- - name: "红色"
- rgb: [255, 0, 0]
- data_sources:
- - name: "B用户_扫描流水"
- tool: "mineru" # 格式同 MinerU
- base_dir: "/Users/zhch158/workspace/data/流水分析"
- json_dir: "{{name}}/mineru_vllm_results_cell_bbox"
- image_dir: "{{name}}/mineru_vllm_results/{{name}}"
- # {{ name }}_page_(?P<page>\d{3})\.json 来匹配 JSON 文件名。模板变量 {{ name }} 会在运行时被替换成实际前缀;随后 _page_ 是字面固定部分。命名捕获组 (?P<page>\d{3}) 强制页面编号恰好为三位数字,并将其存入 page 组
- json_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.json"
- image_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.png"
- sort_key: "page"
- output:
- directory: "{{base_dir}}/{{ name }}.wiredtable"
- structure_suffix: "_structure.json"
- image_suffix: ".png"
- - name: "康强_北京农村商业银行"
- tool: "ppstructv3"
- base_dir: "/Users/zhch158/workspace/data/流水分析"
- json_dir: "{{name}}/ppstructurev3_client_results"
- image_dir: "{{name}}/ppstructurev3_client_results/{{name}}"
- # {{ name }}_page_(?P<page>\d{3})\.json 来匹配 JSON 文件名。模板变量 {{ name }} 会在运行时被替换成实际前缀;随后 _page_ 是字面固定部分。命名捕获组 (?P<page>\d{3}) 强制页面编号恰好为三位数字,并将其存入 page 组
- json_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.json"
- image_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.png"
- sort_key: "page"
- output:
- directory: "{{base_dir}}/{{ name }}.wiredtable"
- structure_suffix: "_structure.json"
- image_suffix: ".png"
|