|
|
@@ -0,0 +1,132 @@
|
|
|
+
|
|
|
+pipeline_name: layout_parsing_v2
|
|
|
+
|
|
|
+use_doc_preprocessor: True
|
|
|
+use_general_ocr: True
|
|
|
+use_seal_recognition: True
|
|
|
+use_table_recognition: True
|
|
|
+use_formula_recognition: True
|
|
|
+
|
|
|
+SubModules:
|
|
|
+ LayoutDetection:
|
|
|
+ module_name: layout_detection
|
|
|
+ model_name: PP-DocLayout-L
|
|
|
+ model_dir: null
|
|
|
+
|
|
|
+SubPipelines:
|
|
|
+ DocPreprocessor:
|
|
|
+ pipeline_name: doc_preprocessor
|
|
|
+ use_doc_orientation_classify: True
|
|
|
+ use_doc_unwarping: True
|
|
|
+ SubModules:
|
|
|
+ DocOrientationClassify:
|
|
|
+ module_name: doc_text_orientation
|
|
|
+ model_name: PP-LCNet_x1_0_doc_ori
|
|
|
+ model_dir: null
|
|
|
+ DocUnwarping:
|
|
|
+ module_name: image_unwarping
|
|
|
+ model_name: UVDoc
|
|
|
+ model_dir: null
|
|
|
+
|
|
|
+ GeneralOCR:
|
|
|
+ pipeline_name: OCR
|
|
|
+ text_type: general
|
|
|
+ use_doc_preprocessor: False
|
|
|
+ use_textline_orientation: False
|
|
|
+ SubModules:
|
|
|
+ TextDetection:
|
|
|
+ module_name: text_detection
|
|
|
+ model_name: PP-OCRv4_server_det
|
|
|
+ model_dir: null
|
|
|
+ limit_side_len: 960
|
|
|
+ limit_type: max
|
|
|
+ thresh: 0.3
|
|
|
+ box_thresh: 0.6
|
|
|
+ unclip_ratio: 2.0
|
|
|
+
|
|
|
+ TextRecognition:
|
|
|
+ module_name: text_recognition
|
|
|
+ model_name: PP-OCRv4_server_rec
|
|
|
+ model_dir: null
|
|
|
+ batch_size: 1
|
|
|
+ score_thresh: 0.0
|
|
|
+
|
|
|
+ # TableRecognition:
|
|
|
+ # pipeline_name: table_recognition_v2
|
|
|
+ # use_layout_detection: False
|
|
|
+ # use_doc_preprocessor: False
|
|
|
+ # use_ocr_model: True
|
|
|
+ # SubModules:
|
|
|
+ # TableClassification:
|
|
|
+ # module_name: table_classification
|
|
|
+ # model_name: PP-LCNet_x1_0_table_cls
|
|
|
+ # model_dir: null
|
|
|
+
|
|
|
+ # WiredTableStructureRecognition:
|
|
|
+ # module_name: table_structure_recognition
|
|
|
+ # model_name: SLANeXt_wired
|
|
|
+ # model_dir: null
|
|
|
+
|
|
|
+ # WirelessTableStructureRecognition:
|
|
|
+ # module_name: table_structure_recognition
|
|
|
+ # model_name: SLANeXt_wireless
|
|
|
+ # model_dir: null
|
|
|
+
|
|
|
+ # WiredTableCellsDetection:
|
|
|
+ # module_name: table_cells_detection
|
|
|
+ # model_name: RT-DETR-L_wired_table_cell_det
|
|
|
+ # model_dir: null
|
|
|
+
|
|
|
+ # WirelessTableCellsDetection:
|
|
|
+ # module_name: table_cells_detection
|
|
|
+ # model_name: RT-DETR-L_wireless_table_cell_det
|
|
|
+ # model_dir: null
|
|
|
+
|
|
|
+ TableRecognition:
|
|
|
+ pipeline_name: table_recognition
|
|
|
+ use_layout_detection: False
|
|
|
+ use_doc_preprocessor: False
|
|
|
+ use_ocr_model: False
|
|
|
+ SubModules:
|
|
|
+ TableStructureRecognition:
|
|
|
+ module_name: table_structure_recognition
|
|
|
+ model_name: SLANet_plus
|
|
|
+ model_dir: null
|
|
|
+
|
|
|
+ SealRecognition:
|
|
|
+ pipeline_name: seal_recognition
|
|
|
+ use_layout_detection: False
|
|
|
+ use_doc_preprocessor: False
|
|
|
+ SubPipelines:
|
|
|
+ SealOCR:
|
|
|
+ pipeline_name: OCR
|
|
|
+ text_type: seal
|
|
|
+ use_doc_preprocessor: False
|
|
|
+ use_textline_orientation: False
|
|
|
+ SubModules:
|
|
|
+ TextDetection:
|
|
|
+ module_name: seal_text_detection
|
|
|
+ model_name: PP-OCRv4_server_seal_det
|
|
|
+ model_dir: null
|
|
|
+ limit_side_len: 736
|
|
|
+ limit_type: min
|
|
|
+ thresh: 0.2
|
|
|
+ box_thresh: 0.6
|
|
|
+ unclip_ratio: 0.5
|
|
|
+ TextRecognition:
|
|
|
+ module_name: text_recognition
|
|
|
+ model_name: PP-OCRv4_server_rec
|
|
|
+ model_dir: null
|
|
|
+ batch_size: 1
|
|
|
+ score_thresh: 0
|
|
|
+
|
|
|
+ FormulaRecognition:
|
|
|
+ pipeline_name: formula_recognition
|
|
|
+ use_layout_detection: False
|
|
|
+ use_doc_preprocessor: False
|
|
|
+ SubModules:
|
|
|
+ FormulaRecognition:
|
|
|
+ module_name: formula_recognition
|
|
|
+ model_name: PP-FormulaNet-L
|
|
|
+ model_dir: null
|
|
|
+ batch_size: 5
|