table_recognition_v2-zhch.yaml 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. pipeline_name: table_recognition_v2
  2. use_doc_preprocessor: False
  3. use_layout_detection: True
  4. use_ocr_model: True
  5. SubModules:
  6. LayoutDetection:
  7. module_name: layout_detection
  8. model_name: PP-DocLayout_plus-L
  9. model_dir: null
  10. batch_size: 1
  11. threshold:
  12. 0: 0.3 # paragraph_title
  13. 1: 0.5 # image
  14. 2: 0.4 # text
  15. 3: 0.5 # number
  16. 4: 0.5 # abstract
  17. 5: 0.5 # content
  18. 6: 0.5 # figure_table_chart_title
  19. 7: 0.3 # formula
  20. 8: 0.5 # table
  21. 9: 0.5 # reference
  22. 10: 0.5 # doc_title
  23. 11: 0.5 # footnote
  24. 12: 0.5 # header
  25. 13: 0.5 # algorithm
  26. 14: 0.5 # footer
  27. 15: 0.45 # seal
  28. 16: 0.5 # chart
  29. 17: 0.5 # formula_number
  30. 18: 0.5 # aside_text
  31. 19: 0.5 # reference_content
  32. layout_nms: True
  33. layout_unclip_ratio: [1.0, 1.0]
  34. layout_merge_bboxes_mode:
  35. 0: "large" # paragraph_title
  36. 1: "large" # image
  37. 2: "union" # text
  38. 3: "union" # number
  39. 4: "union" # abstract
  40. 5: "union" # content
  41. 6: "union" # figure_table_chart_title
  42. 7: "large" # formula
  43. 8: "union" # table
  44. 9: "union" # reference
  45. 10: "union" # doc_title
  46. 11: "union" # footnote
  47. 12: "union" # header
  48. 13: "union" # algorithm
  49. 14: "union" # footer
  50. 15: "union" # seal
  51. 16: "large" # chart
  52. 17: "union" # formula_number
  53. 18: "union" # aside_text
  54. 19: "union" # reference_content
  55. TableOrientationClassify:
  56. module_name: doc_text_orientation
  57. model_name: PP-LCNet_x1_0_doc_ori
  58. model_dir: null
  59. TableClassification:
  60. module_name: table_classification
  61. model_name: PP-LCNet_x1_0_table_cls
  62. model_dir: null
  63. WiredTableStructureRecognition:
  64. module_name: table_structure_recognition
  65. model_name: SLANeXt_wired
  66. model_dir: null
  67. WirelessTableStructureRecognition:
  68. module_name: table_structure_recognition
  69. model_name: SLANeXt_wireless
  70. model_dir: null
  71. WiredTableCellsDetection:
  72. module_name: table_cells_detection
  73. model_name: RT-DETR-L_wired_table_cell_det
  74. model_dir: null
  75. WirelessTableCellsDetection:
  76. module_name: table_cells_detection
  77. model_name: RT-DETR-L_wireless_table_cell_det
  78. model_dir: null
  79. SubPipelines:
  80. DocPreprocessor:
  81. pipeline_name: doc_preprocessor
  82. use_doc_orientation_classify: False
  83. use_doc_unwarping: False
  84. SubModules:
  85. DocOrientationClassify:
  86. module_name: doc_text_orientation
  87. model_name: PP-LCNet_x1_0_doc_ori
  88. model_dir: null
  89. DocUnwarping:
  90. module_name: image_unwarping
  91. model_name: UVDoc
  92. model_dir: null
  93. GeneralOCR:
  94. pipeline_name: OCR
  95. text_type: general
  96. use_doc_preprocessor: False
  97. use_textline_orientation: False
  98. SubModules:
  99. TextDetection:
  100. module_name: text_detection
  101. model_name: PP-OCRv5_server_det
  102. model_dir: null
  103. limit_side_len: 1600
  104. limit_type: max
  105. max_side_limit: 4000
  106. thresh: 0.3
  107. box_thresh: 0.4
  108. unclip_ratio: 1.5
  109. TextRecognition:
  110. module_name: text_recognition
  111. model_name: PP-OCRv5_server_rec
  112. model_dir: null
  113. batch_size: 1
  114. score_thresh: 0