Эх сурвалжийг харах

support use_textline_orientation for ppchatocrv4 (#4067)

changdazhou 5 сар өмнө
parent
commit
e88330151f

+ 6 - 2
paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml

@@ -177,7 +177,7 @@ SubPipelines:
         pipeline_name: OCR
         text_type: general
         use_doc_preprocessor: False
-        use_textline_orientation: False
+        use_textline_orientation: True
         SubModules:
           TextDetection:
             module_name: text_detection
@@ -189,7 +189,11 @@ SubPipelines:
             thresh: 0.3
             box_thresh: 0.6
             unclip_ratio: 1.5
-            
+          TextLineOrientation:
+            module_name: textline_orientation
+            model_name: PP-LCNet_x0_25_textline_ori 
+            model_dir: null
+            batch_size: 6   
           TextRecognition:
             module_name: text_recognition
             model_name: PP-OCRv4_server_rec_doc

+ 2 - 1
paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py

@@ -545,12 +545,13 @@ def xycut_enhanced(
 
         sorted_blocks_by_pre_cuts.extend(sorted_blocks)
 
-    final_order_res_list = match_unsorted_blocks(
+    final_sorted_blocks = match_unsorted_blocks(
         sorted_blocks_by_pre_cuts,
         unsorted_blocks,
         region=region,
     )
 
+    final_order_res_list.extend(final_sorted_blocks)
     final_order_res_list.extend(footer_blocks)
     final_order_res_list.extend(unordered_blocks)
 

+ 3 - 0
paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py

@@ -249,6 +249,7 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
         input: Union[str, List[str], np.ndarray, List[np.ndarray]],
         use_doc_orientation_classify: Optional[bool] = None,
         use_doc_unwarping: Optional[bool] = None,
+        use_textline_orientation: Optional[bool] = None,
         use_seal_recognition: Optional[bool] = None,
         use_table_recognition: Optional[bool] = None,
         layout_threshold: Optional[Union[float, dict]] = None,
@@ -279,6 +280,7 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
                                                                         numpy array of an image, or list of numpy arrays.
             use_doc_orientation_classify (bool): Flag to use document orientation classification.
             use_doc_unwarping (bool): Flag to use document unwarping.
+            use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
             use_seal_recognition (bool): Flag to use seal recognition.
             use_table_recognition (bool): Flag to use table recognition.
             layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
@@ -320,6 +322,7 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
             input,
             use_doc_orientation_classify=use_doc_orientation_classify,
             use_doc_unwarping=use_doc_unwarping,
+            use_textline_orientation=use_textline_orientation,
             use_seal_recognition=use_seal_recognition,
             use_table_recognition=use_table_recognition,
             layout_threshold=layout_threshold,