| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- layout_order_config = {
- # 人工配置项
- "line_height_iou_threshold": 0.4, # For line segmentation of OCR results
- "title_area_max_block_threshold": 0.3, # update paragraph_title -> doc_title
- "block_label_match_iou_threshold": 0.1,
- "block_title_match_iou_threshold": 0.1,
- "doc_title_labels": ["doc_title"], # 文档标题
- "paragraph_title_labels": ["paragraph_title"], # 段落标题
- "vision_labels": [
- "image",
- "table",
- "chart",
- "figure",
- ], # 图、表、印章、图表、图
- "vision_title_labels": ["table_title", "chart_title", "figure_title"], # 图表标题
- "unordered_labels": [
- "aside_text",
- "seal",
- "number",
- "formula_number",
- ],
- "text_labels": ["text"],
- "header_labels": ["header", "header_image"],
- "footer_labels": ["footer", "footer_image", "footnote"],
- "visualize_index_labels": [
- "text",
- "formula",
- "algorithm",
- "reference",
- "content",
- "abstract",
- "paragraph_title",
- "doc_title",
- "table_title",
- "chart_title",
- "figure_title",
- "image",
- "table",
- "chart",
- "figure",
- ],
- # 自动补全配置项
- "layout_to_ocr_mapping": {},
- "all_layout_region_box": [], # 区域box
- "doc_title_block_idxes": [],
- "paragraph_title_block_idxes": [],
- "text_title_labels": [], # doc_title_labels+paragraph_title_labels
- "text_title_block_idxes": [],
- "vision_block_idxes": [],
- "vision_title_block_idxes": [],
- "vision_footnote_block_idxes": [],
- "text_block_idxes": [],
- "header_block_idxes": [],
- "footer_block_idxes": [],
- "unordered_block_idxes": [],
- }
|