pipeline_name: PP-StructureV3 batch_size: 8 use_doc_preprocessor: True use_seal_recognition: True use_table_recognition: True use_formula_recognition: True use_chart_recognition: False use_region_detection: True SubModules: LayoutDetection: module_name: layout_detection model_name: PP-DocLayout_plus-L model_dir: null batch_size: 8 threshold: 0: 0.3 # paragraph_title 1: 0.5 # image 2: 0.5 # text 3: 0.5 # number 4: 0.5 # abstract 5: 0.5 # content 6: 0.5 # figure_title 7: 0.3 # formula 8: 0.5 # table 9: 0.5 # table_title 10: 0.5 # reference 11: 0.5 # doc_title 12: 0.5 # footnote 13: 0.5 # header 14: 0.5 # algorithm 15: 0.5 # footer 16: 0.3 # seal 17: 0.5 # chart_title 18: 0.5 # chart 19: 0.5 # formula_number 20: 0.5 # header_image 21: 0.5 # footer_image 22: 0.5 # aside_text layout_nms: True layout_unclip_ratio: 0: [1.0, 1.0] # paragraph_title 1: [1.0, 1.0] # image 2: [1.0, 1.0] # text 3: [1.0, 1.0] # number 4: [1.0, 1.0] # abstract 5: [1.0, 1.0] # content 6: [1.0, 1.0] # figure_title 7: [1.0, 1.0] # formula 8: [1.0, 1.0] # table 9: [1.0, 1.0] # table_title 10: [1.0, 1.0] # reference 11: [1.0, 1.0] # doc_title 12: [1.0, 1.0] # footnote 13: [1.0, 1.0] # header 14: [1.0, 1.0] # algorithm 15: [1.0, 1.0] # footer 16: [1.0, 1.0] # seal 17: [1.0, 1.0] # chart_title 18: [1.0, 1.0] # chart 19: [1.0, 1.0] # formula_number 20: [1.0, 1.0] # header_image 21: [1.0, 1.0] # footer_image 22: [1.0, 1.0] # aside_text layout_merge_bboxes_mode: 0: "large" # paragraph_title 1: "large" # image 2: "union" # text 3: "union" # number 4: "union" # abstract 5: "union" # content 6: "union" # figure_title 7: "large" # formula 8: "union" # table 9: "union" # table_title 10: "union" # reference 11: "union" # doc_title 12: "union" # footnote 13: "union" # header 14: "union" # algorithm 15: "union" # footer 16: "union" # seal 17: "union" # chart_title 18: "large" # chart 19: "union" # formula_number 20: "union" # header_image 21: "union" # footer_image 22: "union" # aside_text ChartRecognition: module_name: chart_recognition model_name: PP-Chart2Table model_dir: null batch_size: 1 RegionDetection: module_name: layout_detection model_name: PP-DocBlockLayout model_dir: null layout_nms: True layout_merge_bboxes_mode: "small" SubPipelines: DocPreprocessor: pipeline_name: doc_preprocessor batch_size: 8 use_doc_orientation_classify: True use_doc_unwarping: True SubModules: DocOrientationClassify: module_name: doc_text_orientation model_name: PP-LCNet_x1_0_doc_ori model_dir: null batch_size: 8 DocUnwarping: module_name: image_unwarping model_name: UVDoc model_dir: null GeneralOCR: pipeline_name: OCR batch_size: 8 text_type: general use_doc_preprocessor: False use_textline_orientation: True SubModules: TextDetection: module_name: text_detection model_name: PP-OCRv5_server_det model_dir: null limit_side_len: 736 limit_type: min max_side_limit: 4000 thresh: 0.3 box_thresh: 0.6 unclip_ratio: 1.5 TextLineOrientation: module_name: textline_orientation model_name: PP-LCNet_x0_25_textline_ori model_dir: null batch_size: 8 TextRecognition: module_name: text_recognition model_name: PP-OCRv5_server_rec model_dir: null batch_size: 8 score_thresh: 0.0 TableRecognition: pipeline_name: table_recognition_v2 use_layout_detection: False use_doc_preprocessor: False use_ocr_model: False SubModules: TableClassification: module_name: table_classification model_name: PP-LCNet_x1_0_table_cls model_dir: null WiredTableStructureRecognition: module_name: table_structure_recognition model_name: SLANeXt_wired model_dir: null WirelessTableStructureRecognition: module_name: table_structure_recognition model_name: SLANet_plus model_dir: null WiredTableCellsDetection: module_name: table_cells_detection model_name: RT-DETR-L_wired_table_cell_det model_dir: null WirelessTableCellsDetection: module_name: table_cells_detection model_name: RT-DETR-L_wireless_table_cell_det model_dir: null TableOrientationClassify: module_name: doc_text_orientation model_name: PP-LCNet_x1_0_doc_ori model_dir: null SubPipelines: GeneralOCR: pipeline_name: OCR text_type: general use_doc_preprocessor: False use_textline_orientation: True SubModules: TextDetection: module_name: text_detection model_name: PP-OCRv5_server_det model_dir: null limit_side_len: 736 limit_type: min max_side_limit: 4000 thresh: 0.3 box_thresh: 0.4 unclip_ratio: 1.5 TextLineOrientation: module_name: textline_orientation model_name: PP-LCNet_x0_25_textline_ori model_dir: null batch_size: 8 TextRecognition: module_name: text_recognition model_name: PP-OCRv5_server_rec model_dir: null batch_size: 8 score_thresh: 0.0 SealRecognition: pipeline_name: seal_recognition batch_size: 8 use_layout_detection: False use_doc_preprocessor: False SubPipelines: SealOCR: pipeline_name: OCR batch_size: 8 text_type: seal use_doc_preprocessor: False use_textline_orientation: False SubModules: TextDetection: module_name: seal_text_detection model_name: PP-OCRv4_server_seal_det model_dir: null limit_side_len: 736 limit_type: min max_side_limit: 4000 thresh: 0.2 box_thresh: 0.6 unclip_ratio: 0.5 TextRecognition: module_name: text_recognition model_name: PP-OCRv5_server_rec model_dir: null batch_size: 8 score_thresh: 0 FormulaRecognition: pipeline_name: formula_recognition batch_size: 8 use_layout_detection: False use_doc_preprocessor: False SubModules: FormulaRecognition: module_name: formula_recognition model_name: PP-FormulaNet_plus-L model_dir: null batch_size: 8