setting.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. parameters_config = {
  15. "page": {},
  16. "region": {
  17. "match_block_overlap_ratio_threshold": 0.8,
  18. "split_block_overlap_ratio_threshold": 0.4,
  19. },
  20. "block": {
  21. "title_conversion_area_ratio_threshold": 0.3, # update paragraph_title -> doc_title
  22. },
  23. "line": {
  24. "line_height_iou_threshold": 0.6, # For line segmentation of OCR results
  25. "delimiter_map": {
  26. "doc_title": " ",
  27. "content": "\n",
  28. },
  29. },
  30. "word": {
  31. "delimiter": " ",
  32. },
  33. "order": {
  34. "block_label_match_iou_threshold": 0.1,
  35. "block_title_match_iou_threshold": 0.1,
  36. },
  37. }
  38. block_label_mapping = {
  39. "doc_title_labels": ["doc_title"], # 文档标题
  40. "paragraph_title_labels": [
  41. "paragraph_title",
  42. "abstract_title",
  43. "reference_title",
  44. "content_title",
  45. ], # 段落标题
  46. "vision_labels": [
  47. "image",
  48. "table",
  49. "chart",
  50. "flowchart",
  51. "figure",
  52. ], # 图、表、印章、图表、图
  53. "vision_title_labels": ["table_title", "chart_title", "figure_title"], # 图表标题
  54. "unordered_labels": [
  55. "aside_text",
  56. "seal",
  57. "number",
  58. "formula_number",
  59. ],
  60. "text_labels": ["text"],
  61. "header_labels": ["header", "header_image"],
  62. "footer_labels": ["footer", "footer_image", "footnote"],
  63. "visualize_index_labels": [
  64. "text",
  65. "formula",
  66. "algorithm",
  67. "reference",
  68. "content",
  69. "abstract",
  70. "paragraph_title",
  71. "doc_title",
  72. "table_title",
  73. "chart_title",
  74. "figure_title",
  75. "image",
  76. "table",
  77. "chart",
  78. "figure",
  79. "abstract_title",
  80. "refer_title",
  81. "content_title",
  82. "flowchart",
  83. ],
  84. }