1 年之前 · 3da5c41115
--- a/magic_pdf/model/pek_sub_modules/self_modify.py
+++ b/magic_pdf/model/pek_sub_modules/self_modify.py
@@ -12,6 +12,7 @@ from paddleocr.ppocr.utils.utility import check_and_read, alpha_to_color, binari
 
				 from paddleocr.tools.infer.utility import draw_ocr_box_txt, get_rotate_crop_image, get_minarea_rect_crop
			
 
				 
			
 
				 from magic_pdf.libs.boxbase import __is_overlaps_y_exceeds_threshold
			
 
				+from magic_pdf.pre_proc.ocr_dict_merge import merge_spans_to_line
			
 
				 
			
 
				 logger = get_logger()
			
 
				 
			
@@ -162,6 +163,86 @@ def update_det_boxes(dt_boxes, mfd_res):
 
				     return new_dt_boxes
			
 
				 
			
 
				 
			
 
				+def merge_overlapping_spans(spans):
			
 
				+    """
			
 
				+    Merges overlapping spans on the same line.
			
 
				+
			
 
				+    :param spans: A list of span coordinates [(x1, y1, x2, y2), ...]
			
 
				+    :return: A list of merged spans
			
 
				+    """
			
 
				+    # Return an empty list if the input spans list is empty
			
 
				+    if not spans:
			
 
				+        return []
			
 
				+
			
 
				+    # Sort spans by their starting x-coordinate
			
 
				+    spans.sort(key=lambda x: x[0])
			
 
				+
			
 
				+    # Initialize the list of merged spans
			
 
				+    merged = []
			
 
				+    for span in spans:
			
 
				+        # Unpack span coordinates
			
 
				+        x1, y1, x2, y2 = span
			
 
				+        # If the merged list is empty or there's no horizontal overlap, add the span directly
			
 
				+        if not merged or merged[-1][2] < x1:
			
 
				+            merged.append(span)
			
 
				+        else:
			
 
				+            # If there is horizontal overlap, merge the current span with the previous one
			
 
				+            last_span = merged.pop()
			
 
				+            # Update the merged span's top-left corner to the smaller (x1, y1) and bottom-right to the larger (x2, y2)
			
 
				+            x1 = min(last_span[0], x1)
			
 
				+            y1 = min(last_span[1], y1)
			
 
				+            x2 = max(last_span[2], x2)
			
 
				+            y2 = max(last_span[3], y2)
			
 
				+            # Add the merged span back to the list
			
 
				+            merged.append((x1, y1, x2, y2))
			
 
				+
			
 
				+    # Return the list of merged spans
			
 
				+    return merged
			
 
				+
			
 
				+
			
 
				+def merge_det_boxes(dt_boxes):
			
 
				+    """
			
 
				+    Merge detection boxes.
			
 
				+
			
 
				+    This function takes a list of detected bounding boxes, each represented by four corner points.
			
 
				+    The goal is to merge these bounding boxes into larger text regions.
			
 
				+
			
 
				+    Parameters:
			
 
				+    dt_boxes (list): A list containing multiple text detection boxes, where each box is defined by four corner points.
			
 
				+
			
 
				+    Returns:
			
 
				+    list: A list containing the merged text regions, where each region is represented by four corner points.
			
 
				+    """
			
 
				+    # Convert the detection boxes into a dictionary format with bounding boxes and type
			
 
				+    dt_boxes_dict_list = []
			
 
				+    for text_box in dt_boxes:
			
 
				+        text_bbox = points_to_bbox(text_box)
			
 
				+        text_box_dict = {
			
 
				+            'bbox': text_bbox,
			
 
				+            'type': 'text',
			
 
				+        }
			
 
				+        dt_boxes_dict_list.append(text_box_dict)
			
 
				+
			
 
				+    # Merge adjacent text regions into lines
			
 
				+    lines = merge_spans_to_line(dt_boxes_dict_list)
			
 
				+
			
 
				+    # Initialize a new list for storing the merged text regions
			
 
				+    new_dt_boxes = []
			
 
				+    for line in lines:
			
 
				+        line_bbox_list = []
			
 
				+        for span in line:
			
 
				+            line_bbox_list.append(span['bbox'])
			
 
				+
			
 
				+        # Merge overlapping text regions within the same line
			
 
				+        merged_spans = merge_overlapping_spans(line_bbox_list)
			
 
				+
			
 
				+        # Convert the merged text regions back to point format and add them to the new detection box list
			
 
				+        for span in merged_spans:
			
 
				+            new_dt_boxes.append(bbox_to_points(span))
			
 
				+
			
 
				+    return new_dt_boxes
			
 
				+
			
 
				+
			
 
				 class ModifiedPaddleOCR(PaddleOCR):
			
 
				     def ocr(self, img, det=True, rec=True, cls=True, bin=False, inv=False, mfd_res=None, alpha_color=(255, 255, 255)):
			
 
				         """
			
@@ -265,6 +346,9 @@ class ModifiedPaddleOCR(PaddleOCR):
 
				         img_crop_list = []
			
 
				 
			
 
				         dt_boxes = sorted_boxes(dt_boxes)
			
 
				+
			
 
				+        dt_boxes = merge_det_boxes(dt_boxes)
			
 
				+
			
 
				         if mfd_res:
			
 
				             bef = time.time()
			
 
				             dt_boxes = update_det_boxes(dt_boxes, mfd_res)