Răsfoiți Sursa

update layout_det_params pipeline (#2911)

Co-authored-by: cuicheng01 <45199522+cuicheng01@users.noreply.github.com>
Sunflower7788 10 luni în urmă
părinte
comite
f48c2a2c6d

+ 5 - 1
paddlex/configs/pipelines/seal_recognition.yaml

@@ -7,8 +7,12 @@ use_layout_detection: True
 SubModules:
   LayoutDetection:
     module_name: layout_detection
-    model_name: RT-DETR-H_layout_3cls
+    model_name: PP-DocLayout-L
     model_dir: null
+    threshold: 0.5
+    layout_nms: True
+    layout_unclip_ratio: 1.0
+    layout_merge_bboxes_mode: "large"
 
 SubPipelines:
   DocPreprocessor:

+ 4 - 5
paddlex/inference/models_new/object_detection/predictor.py

@@ -49,7 +49,7 @@ class DetPredictor(BasicPredictor):
         *args,
         img_size: Optional[Union[int, Tuple[int, int]]] = None,
         threshold: Optional[Union[float, dict]] = None,
-        layout_nms: bool = False,
+        layout_nms: Optional[bool] = None,
         layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None,
         layout_merge_bboxes_mode: Optional[str] = None,
         **kwargs,
@@ -97,7 +97,6 @@ class DetPredictor(BasicPredictor):
         if layout_merge_bboxes_mode is not None:
             assert layout_merge_bboxes_mode in ["union", "large", "small"], \
                 f"The value of `layout_merge_bboxes_mode` must be one of ['union', 'large', 'small'], but got {layout_merge_bboxes_mode}"
-
         self.img_size = img_size
         self.threshold = threshold
         self.layout_nms = layout_nms
@@ -201,7 +200,7 @@ class DetPredictor(BasicPredictor):
     def process(self, 
             batch_data: List[Any], 
             threshold: Optional[Union[float, dict]] = None,
-            layout_nms: bool = False,
+            layout_nms: Optional[bool] = None,
             layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None,
             layout_merge_bboxes_mode: Optional[str] = None,
         ):
@@ -211,7 +210,7 @@ class DetPredictor(BasicPredictor):
         Args:
             batch_data (List[Union[str, np.ndarray], ...]): A batch of input data (e.g., image file paths).
             threshold (Optional[float, dict], optional): The threshold for filtering out low-confidence predictions.
-            layout_nms (bool, optional): Whether to use layout-aware NMS. Defaults to False.
+            layout_nms (bool, optional): Whether to use layout-aware NMS. Defaults to None.
             layout_unclip_ratio (Optional[Union[float, Tuple[float, float]]], optional): The ratio of unclipping the bounding box.
             layout_merge_bboxes_mode (Optional[str], optional): The mode for merging bounding boxes. Defaults to None.
 
@@ -327,7 +326,7 @@ class DetPredictor(BasicPredictor):
         if self.threshold is None:
             self.threshold = self.config.get("draw_threshold", 0.5)
         if not self.layout_nms:
-            self.layout_nms = self.config.get("layout_nms", False)
+            self.layout_nms = self.config.get("layout_nms", None)
         if self.layout_unclip_ratio is None:
             self.layout_unclip_ratio = self.config.get("layout_unclip_ratio", None)
         if self.layout_merge_bboxes_mode is None:

+ 2 - 2
paddlex/inference/models_new/object_detection/processors.py

@@ -645,7 +645,7 @@ class DetPostProcess:
             boxes: ndarray, 
             img_size: Tuple[int, int],
             threshold: Union[float, dict], 
-            layout_nms: bool, 
+            layout_nms: Optional[bool],
             layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]], 
             layout_merge_bboxes_mode: Optional[str]
         ) -> Boxes:
@@ -723,7 +723,7 @@ class DetPostProcess:
         batch_outputs: List[dict],
         datas: List[dict],
         threshold: Optional[Union[float, dict]] = None,
-        layout_nms: bool = False,
+        layout_nms: Optional[bool] = None,
         layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None,
         layout_merge_bboxes_mode: Optional[str] = None,
     ) -> List[Boxes]:

+ 1 - 1
paddlex/inference/pipelines_new/object_detection/pipeline.py

@@ -64,7 +64,7 @@ class ObjectDetectionPipeline(BasePipeline):
         self,
         input: Union[str, List[str], np.ndarray, List[np.ndarray]],
         threshold: Optional[Union[float, dict]] = None,
-        layout_nms: bool = False,
+        layout_nms: Optional[bool] = None,
         layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None,
         layout_merge_bboxes_mode: Optional[str] = None,
         **kwargs,

+ 23 - 4
paddlex/inference/pipelines_new/seal_recognition/pipeline.py

@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import os, sys
-from typing import Any, Dict, Optional, Union, List
+from typing import Any, Dict, Optional, Union, Tuple, List
 import numpy as np
 import cv2
 from ..base import BasePipeline
@@ -70,8 +70,16 @@ class SealRecognitionPipeline(BasePipeline):
                 "LayoutDetection",
                 {"model_config_error": "config error for layout_det_model!"},
             )
-            self.layout_det_model = self.create_model(layout_det_config)
-
+            layout_kwargs = {}
+            if (threshold := layout_det_config.get("threshold", None)) is not None:
+                layout_kwargs["threshold"] = threshold
+            if (layout_nms := layout_det_config.get("layout_nms", None)) is not None:
+                layout_kwargs["layout_nms"] = layout_nms
+            if (layout_unclip_ratio := layout_det_config.get("layout_unclip_ratio", None)) is not None:
+                layout_kwargs["layout_unclip_ratio"] = layout_unclip_ratio
+            if (layout_merge_bboxes_mode := layout_det_config.get("layout_merge_bboxes_mode", None)) is not None:
+                layout_kwargs["layout_merge_bboxes_mode"] = layout_merge_bboxes_mode
+            self.layout_det_model = self.create_model(layout_det_config, **layout_kwargs)
         seal_ocr_config = config.get("SubPipelines", {}).get(
             "SealOCR", {"pipeline_config_error": "config error for seal_ocr_pipeline!"}
         )
@@ -156,6 +164,10 @@ class SealRecognitionPipeline(BasePipeline):
         use_doc_unwarping: Optional[bool] = None,
         use_layout_detection: Optional[bool] = None,
         layout_det_res: Optional[DetResult] = None,
+        layout_threshold: Optional[Union[float, dict]] = None,
+        layout_nms: Optional[bool] = None,
+        layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None,
+        layout_merge_bboxes_mode: Optional[str] = None,
         seal_det_limit_side_len: Optional[int] = None,
         seal_det_limit_type: Optional[str] = None,
         seal_det_thresh: Optional[float] = None,
@@ -214,7 +226,14 @@ class SealRecognitionPipeline(BasePipeline):
                 seal_region_id += 1
             else:
                 if model_settings["use_layout_detection"]:
-                    layout_det_res = next(self.layout_det_model(doc_preprocessor_image))
+                    layout_det_res = next(self.layout_det_model(
+                        doc_preprocessor_image,
+                        threshold=layout_threshold,
+                        layout_nms=layout_nms,
+                        layout_unclip_ratio=layout_unclip_ratio,
+                        layout_merge_bboxes_mode=layout_merge_bboxes_mode
+                    )
+                )
 
                 for box_info in layout_det_res["boxes"]:
                     if box_info["label"].lower() in ["seal"]:

+ 1 - 1
paddlex/pipelines/seal_recognition.yaml

@@ -3,7 +3,7 @@ Global:
   input: https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/seal_text_det.png
 
 Pipeline:
-  layout_model: RT-DETR-H_layout_3cls
+  layout_model: PP-DocLayout-L
   text_det_model: PP-OCRv4_server_seal_det
   text_rec_model: PP-OCRv4_server_rec
   layout_batch_size: 1