layout_parsing.yaml 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. pipeline_name: layout_parsing
  2. use_doc_preprocessor: True
  3. use_seal_recognition: True
  4. use_table_recognition: True
  5. use_formula_recognition: False
  6. SubModules:
  7. LayoutDetection:
  8. module_name: layout_detection
  9. model_name: RT-DETR-H_layout_17cls
  10. model_dir: null
  11. SubPipelines:
  12. DocPreprocessor:
  13. pipeline_name: doc_preprocessor
  14. use_doc_orientation_classify: True
  15. use_doc_unwarping: True
  16. SubModules:
  17. DocOrientationClassify:
  18. module_name: doc_text_orientation
  19. model_name: PP-LCNet_x1_0_doc_ori
  20. model_dir: null
  21. DocUnwarping:
  22. module_name: image_unwarping
  23. model_name: UVDoc
  24. model_dir: null
  25. GeneralOCR:
  26. pipeline_name: OCR
  27. text_type: general
  28. use_doc_preprocessor: False
  29. use_textline_orientation: False
  30. SubModules:
  31. TextDetection:
  32. module_name: text_detection
  33. model_name: PP-OCRv4_server_det
  34. model_dir: null
  35. limit_side_len: 960
  36. limit_type: max
  37. max_side_limit: 4000
  38. thresh: 0.3
  39. box_thresh: 0.6
  40. unclip_ratio: 1.5
  41. TextRecognition:
  42. module_name: text_recognition
  43. model_name: PP-OCRv4_server_rec
  44. model_dir: null
  45. batch_size: 6
  46. score_thresh: 0
  47. TableRecognition:
  48. pipeline_name: table_recognition
  49. use_layout_detection: False
  50. use_doc_preprocessor: False
  51. use_ocr_model: False
  52. SubModules:
  53. TableStructureRecognition:
  54. module_name: table_structure_recognition
  55. model_name: SLANet_plus
  56. model_dir: null
  57. SealRecognition:
  58. pipeline_name: seal_recognition
  59. use_layout_detection: False
  60. use_doc_preprocessor: False
  61. SubPipelines:
  62. SealOCR:
  63. pipeline_name: OCR
  64. text_type: seal
  65. use_doc_preprocessor: False
  66. use_textline_orientation: False
  67. SubModules:
  68. TextDetection:
  69. module_name: seal_text_detection
  70. model_name: PP-OCRv4_server_seal_det
  71. model_dir: null
  72. limit_side_len: 736
  73. limit_type: min
  74. max_side_limit: 4000
  75. thresh: 0.2
  76. box_thresh: 0.6
  77. unclip_ratio: 0.5
  78. TextRecognition:
  79. module_name: text_recognition
  80. model_name: PP-OCRv4_server_rec
  81. model_dir: null
  82. batch_size: 1
  83. score_thresh: 0
  84. FormulaRecognition:
  85. pipeline_name: formula_recognition
  86. use_layout_detection: False
  87. use_doc_preprocessor: False
  88. SubModules:
  89. FormulaRecognition:
  90. module_name: formula_recognition
  91. model_name: PP-FormulaNet-L
  92. model_dir: null
  93. batch_size: 5