vor 8 Monaten · 66bdd0923e
--- a/paddlex/configs/pipelines/PP-StructureV3.yaml
+++ b/paddlex/configs/pipelines/PP-StructureV3.yaml
@@ -48,10 +48,10 @@ SubPipelines:
 
				         module_name: text_detection
			
 
				         model_name: PP-OCRv4_server_det
			
 
				         model_dir: null
			
 
				-        limit_side_len: 960
			
 
				+        limit_side_len: 1200
			
 
				         limit_type: max
			
 
				         thresh: 0.3
			
 
				-        box_thresh: 0.6
			
 
				+        box_thresh: 0.4
			
 
				         unclip_ratio: 2.0
			
 
				       TextLineOrientation:
			
 
				         module_name: textline_orientation
			
--- a/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py
+++ b/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py
@@ -256,12 +256,20 @@ class LayoutParsingPipelineV2(BasePipeline):
 
				         matched_ocr_dict = {}
			
 
				         image = np.array(image)
			
 
				         object_boxes = []
			
 
				+        footnote_list = []
			
 
				+        max_bottom_text_coordinate = 0
			
 
				 
			
 
				         for object_box_idx, box_info in enumerate(layout_det_res["boxes"]):
			
 
				             box = box_info["coordinate"]
			
 
				             label = box_info["label"].lower()
			
 
				             object_boxes.append(box)
			
 
				 
			
 
				+            # set the label of footnote to text, when it is above the text boxes
			
 
				+            if label == "footnote":
			
 
				+                footnote_list.append(object_box_idx)
			
 
				+            if label == "text" and box[3] > max_bottom_text_coordinate:
			
 
				+                max_bottom_text_coordinate = box[3]
			
 
				+
			
 
				             if label not in ["formula", "table", "seal"]:
			
 
				                 _, matched_idxs = get_sub_regions_ocr_res(
			
 
				                     overall_ocr_res, [box], return_match_idx=True
			
@@ -272,6 +280,13 @@ class LayoutParsingPipelineV2(BasePipeline):
 
				                     else:
			
 
				                         matched_ocr_dict[matched_idx].append(object_box_idx)
			
 
				 
			
 
				+        for footnote_idx in footnote_list:
			
 
				+            if (
			
 
				+                layout_det_res["boxes"][footnote_idx]["coordinate"][3]
			
 
				+                < max_bottom_text_coordinate
			
 
				+            ):
			
 
				+                layout_det_res["boxes"][footnote_idx]["label"] = "text"
			
 
				+
			
 
				         already_processed = set()
			
 
				         for matched_idx, layout_box_ids in matched_ocr_dict.items():
			
 
				             if len(layout_box_ids) <= 1:
			
@@ -578,9 +593,12 @@ class LayoutParsingPipelineV2(BasePipeline):
 
				                     table_contents["rec_texts"].append(
			
 
				                         f'<div style="text-align: center;"><img src="{img_path}" alt="Image" /></div>'
			
 
				                     )
			
 
				-                    table_contents["rec_boxes"] = np.vstack(
			
 
				-                        (table_contents["rec_boxes"], img["coordinate"])
			
 
				-                    )
			
 
				+                    if table_contents["rec_boxes"].size == 0:
			
 
				+                        table_contents["rec_boxes"] = np.array([img["coordinate"]])
			
 
				+                    else:
			
 
				+                        table_contents["rec_boxes"] = np.vstack(
			
 
				+                            (table_contents["rec_boxes"], img["coordinate"])
			
 
				+                        )
			
 
				                     table_contents["rec_polys"].append(poly_points)
			
 
				                     table_contents["rec_scores"].append(img["score"])