Browse Source

fix bug for doc_title

zhouchangda 8 months ago
parent
commit
7184a62f43

+ 1 - 1
paddlex/configs/pipelines/PP-StructureV3.yaml

@@ -62,7 +62,7 @@ SubModules:
       21: [1.0, 1.0] # footer_image
       22: [1.0, 1.0] # aside_text
     layout_merge_bboxes_mode: 
-      0: "union"  # paragraph_title
+      0: "large"  # paragraph_title
       1: "large"  # image
       2: "union"  # text
       3: "union"  # number

+ 25 - 1
paddlex/inference/pipelines/layout_parsing/utils.py

@@ -575,6 +575,9 @@ def get_single_block_parsing_res(
     single_block_layout_parsing_res = []
     input_img = overall_ocr_res["doc_preprocessor_res"]["output_img"]
     seal_index = 0
+    with_doc_title = False
+    max_block_area = 0.0
+    paragraph_title_indexs = []
 
     layout_det_res_list, _ = _remove_overlap_blocks(
         deepcopy(layout_det_res["boxes"]),
@@ -582,7 +585,7 @@ def get_single_block_parsing_res(
         smaller=True,
     )
 
-    for box_info in layout_det_res_list:
+    for box_idx, box_info in enumerate(layout_det_res_list):
         block_bbox = box_info["coordinate"]
         label = box_info["label"]
         rec_res = {"boxes": [], "rec_texts": [], "rec_labels": [], "flag": False}
@@ -590,6 +593,14 @@ def get_single_block_parsing_res(
         seg_end_coordinate = float("-inf")
         num_of_lines = 1
 
+        if label == "doc_title":
+            with_doc_title = True
+        elif label == "paragraph_title":
+            paragraph_title_indexs.append(box_idx)
+
+        block_area = (block_bbox[2] - block_bbox[0]) * (block_bbox[3] - block_bbox[1])
+        max_block_area = max(max_block_area, block_area)
+
         if label == "table":
             for table_res in table_res_list:
                 if len(table_res["cell_box_list"]) == 0:
@@ -679,9 +690,22 @@ def get_single_block_parsing_res(
                         "seg_start_coordinate": seg_start_coordinate,
                         "seg_end_coordinate": seg_end_coordinate,
                         "num_of_lines": num_of_lines,
+                        "block_area": block_area,
                     },
                 )
 
+    if (
+        not with_doc_title
+        and len(paragraph_title_indexs) == 1
+        and single_block_layout_parsing_res[paragraph_title_indexs[0]].get(
+            "block_area", 0
+        )
+        > max_block_area * 0.3
+    ):
+        single_block_layout_parsing_res[paragraph_title_indexs[0]][
+            "block_label"
+        ] = "doc_title"
+
     if len(layout_det_res_list) == 0:
         for ocr_rec_box, ocr_rec_text in zip(
             overall_ocr_res["rec_boxes"], overall_ocr_res["rec_texts"]