setting.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. layout_order_config = {
  15. # 人工配置项
  16. "line_height_iou_threshold": 0.4, # For line segmentation of OCR results
  17. "title_area_max_block_threshold": 0.3, # update paragraph_title -> doc_title
  18. "block_label_match_iou_threshold": 0.1,
  19. "block_title_match_iou_threshold": 0.1,
  20. "doc_title_labels": ["doc_title"], # 文档标题
  21. "paragraph_title_labels": ["paragraph_title"], # 段落标题
  22. "vision_labels": [
  23. "image",
  24. "table",
  25. "chart",
  26. "figure",
  27. ], # 图、表、印章、图表、图
  28. "vision_title_labels": ["table_title", "chart_title", "figure_title"], # 图表标题
  29. "unordered_labels": [
  30. "aside_text",
  31. "seal",
  32. "number",
  33. "formula_number",
  34. ],
  35. "text_labels": ["text"],
  36. "header_labels": ["header", "header_image"],
  37. "footer_labels": ["footer", "footer_image", "footnote"],
  38. "visualize_index_labels": [
  39. "text",
  40. "formula",
  41. "algorithm",
  42. "reference",
  43. "content",
  44. "abstract",
  45. "paragraph_title",
  46. "doc_title",
  47. "table_title",
  48. "chart_title",
  49. "figure_title",
  50. "image",
  51. "table",
  52. "chart",
  53. "figure",
  54. ],
  55. # 自动补全配置项
  56. "layout_to_ocr_mapping": {},
  57. "all_layout_region_box": [], # 区域box
  58. "doc_title_block_idxes": [],
  59. "paragraph_title_block_idxes": [],
  60. "text_title_labels": [], # doc_title_labels+paragraph_title_labels
  61. "text_title_block_idxes": [],
  62. "vision_block_idxes": [],
  63. "vision_title_block_idxes": [],
  64. "vision_footnote_block_idxes": [],
  65. "text_block_idxes": [],
  66. "header_block_idxes": [],
  67. "footer_block_idxes": [],
  68. "unordered_block_idxes": [],
  69. }