| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244 |
- pipeline_name: PP-StructureV3
- batch_size: 8
- use_doc_preprocessor: True
- use_seal_recognition: True
- use_table_recognition: True
- use_formula_recognition: True
- use_chart_recognition: False
- use_region_detection: False
- SubModules:
- LayoutDetection:
- module_name: layout_detection
- model_name: PP-DocLayout-L
- model_dir: null
- batch_size: 8
- threshold:
- 0: 0.3 # paragraph_title
- 1: 0.5 # image
- 2: 0.5 # text
- 3: 0.5 # number
- 4: 0.5 # abstract
- 5: 0.5 # content
- 6: 0.5 # figure_title
- 7: 0.3 # formula
- 8: 0.5 # table
- 9: 0.5 # table_title
- 10: 0.5 # reference
- 11: 0.5 # doc_title
- 12: 0.5 # footnote
- 13: 0.5 # header
- 14: 0.5 # algorithm
- 15: 0.5 # footer
- 16: 0.3 # seal
- 17: 0.5 # chart_title
- 18: 0.5 # chart
- 19: 0.5 # formula_number
- 20: 0.5 # header_image
- 21: 0.5 # footer_image
- 22: 0.5 # aside_text
- layout_nms: True
- layout_unclip_ratio:
- 0: [1.0, 1.0] # paragraph_title
- 1: [1.0, 1.0] # image
- 2: [1.0, 1.0] # text
- 3: [1.0, 1.0] # number
- 4: [1.0, 1.0] # abstract
- 5: [1.0, 1.0] # content
- 6: [1.0, 1.0] # figure_title
- 7: [1.0, 1.0] # formula
- 8: [1.0, 1.0] # table
- 9: [1.0, 1.0] # table_title
- 10: [1.0, 1.0] # reference
- 11: [1.0, 1.0] # doc_title
- 12: [1.0, 1.0] # footnote
- 13: [1.0, 1.0] # header
- 14: [1.0, 1.0] # algorithm
- 15: [1.0, 1.0] # footer
- 16: [1.0, 1.0] # seal
- 17: [1.0, 1.0] # chart_title
- 18: [1.0, 1.0] # chart
- 19: [1.0, 1.0] # formula_number
- 20: [1.0, 1.0] # header_image
- 21: [1.0, 1.0] # footer_image
- 22: [1.0, 1.0] # aside_text
- layout_merge_bboxes_mode:
- 0: "large" # paragraph_title
- 1: "large" # image
- 2: "union" # text
- 3: "union" # number
- 4: "union" # abstract
- 5: "union" # content
- 6: "union" # figure_title
- 7: "large" # formula
- 8: "union" # table
- 9: "union" # table_title
- 10: "union" # reference
- 11: "union" # doc_title
- 12: "union" # footnote
- 13: "union" # header
- 14: "union" # algorithm
- 15: "union" # footer
- 16: "union" # seal
- 17: "union" # chart_title
- 18: "large" # chart
- 19: "union" # formula_number
- 20: "union" # header_image
- 21: "union" # footer_image
- 22: "union" # aside_text
- ChartRecognition:
- module_name: chart_recognition
- model_name: PP-Chart2Table
- model_dir: null
- batch_size: 1
- SubPipelines:
- DocPreprocessor:
- pipeline_name: doc_preprocessor
- batch_size: 8
- use_doc_orientation_classify: True
- use_doc_unwarping: True
- SubModules:
- DocOrientationClassify:
- module_name: doc_text_orientation
- model_name: PP-LCNet_x1_0_doc_ori
- model_dir: null
- batch_size: 8
- DocUnwarping:
- module_name: image_unwarping
- model_name: UVDoc
- model_dir: null
- GeneralOCR:
- pipeline_name: OCR
- batch_size: 8
- text_type: general
- use_doc_preprocessor: False
- use_textline_orientation: True
- SubModules:
- TextDetection:
- module_name: text_detection
- model_name: PP-OCRv4_server_det
- model_dir: null
- limit_side_len: 736
- limit_type: min
- max_side_limit: 4000
- thresh: 0.3
- box_thresh: 0.6
- unclip_ratio: 1.5
- TextLineOrientation:
- module_name: textline_orientation
- model_name: PP-LCNet_x0_25_textline_ori
- model_dir: null
- batch_size: 8
- TextRecognition:
- module_name: text_recognition
- model_name: PP-OCRv4_server_rec_doc
- model_dir: null
- batch_size: 8
- score_thresh: 0.0
-
- TableRecognition:
- pipeline_name: table_recognition_v2
- use_layout_detection: False
- use_doc_preprocessor: False
- use_ocr_model: False
- SubModules:
- TableClassification:
- module_name: table_classification
- model_name: PP-LCNet_x1_0_table_cls
- model_dir: null
- WiredTableStructureRecognition:
- module_name: table_structure_recognition
- model_name: SLANeXt_wired
- model_dir: null
-
- WirelessTableStructureRecognition:
- module_name: table_structure_recognition
- model_name: SLANet_plus
- model_dir: null
-
- WiredTableCellsDetection:
- module_name: table_cells_detection
- model_name: RT-DETR-L_wired_table_cell_det
- model_dir: null
-
- WirelessTableCellsDetection:
- module_name: table_cells_detection
- model_name: RT-DETR-L_wireless_table_cell_det
- model_dir: null
- SubPipelines:
- GeneralOCR:
- pipeline_name: OCR
- text_type: general
- use_doc_preprocessor: False
- use_textline_orientation: True
- SubModules:
- TextDetection:
- module_name: text_detection
- model_name: PP-OCRv4_server_det
- model_dir: null
- limit_side_len: 736
- limit_type: min
- max_side_limit: 4000
- thresh: 0.3
- box_thresh: 0.4
- unclip_ratio: 1.5
- TextLineOrientation:
- module_name: textline_orientation
- model_name: PP-LCNet_x0_25_textline_ori
- model_dir: null
- batch_size: 8
- TextRecognition:
- module_name: text_recognition
- model_name: PP-OCRv4_server_rec_doc
- model_dir: null
- batch_size: 8
- score_thresh: 0.0
- SealRecognition:
- pipeline_name: seal_recognition
- batch_size: 8
- use_layout_detection: False
- use_doc_preprocessor: False
- SubPipelines:
- SealOCR:
- pipeline_name: OCR
- batch_size: 8
- text_type: seal
- use_doc_preprocessor: False
- use_textline_orientation: False
- SubModules:
- TextDetection:
- module_name: seal_text_detection
- model_name: PP-OCRv4_server_seal_det
- model_dir: null
- limit_side_len: 736
- limit_type: min
- max_side_limit: 4000
- thresh: 0.2
- box_thresh: 0.6
- unclip_ratio: 0.5
- TextRecognition:
- module_name: text_recognition
- model_name: PP-OCRv4_server_rec
- model_dir: null
- batch_size: 8
- score_thresh: 0
-
- FormulaRecognition:
- pipeline_name: formula_recognition
- batch_size: 8
- use_layout_detection: False
- use_doc_preprocessor: False
- SubModules:
- FormulaRecognition:
- module_name: formula_recognition
- model_name: PP-FormulaNet-L
- model_dir: null
- batch_size: 8
|