Przeglądaj źródła

feat(ocr): improve handling of angled text boxes

- Add calculate_is_angle function to detect angled text boxes
- Update update_det_boxes and merge_det_boxes functions to handle angled text boxes
- Modify angle detection logic in various parts of the code
myhloli 1 rok temu
rodzic
commit
4fd966eb35

+ 24 - 3
magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py

@@ -71,7 +71,13 @@ def remove_intervals(original, masks):
 
 def update_det_boxes(dt_boxes, mfd_res):
     new_dt_boxes = []
+    angle_boxes_list = []
     for text_box in dt_boxes:
+
+        if calculate_is_angle(text_box):
+            angle_boxes_list.append(text_box)
+            continue
+
         text_bbox = points_to_bbox(text_box)
         masks_list = []
         for mf_box in mfd_res:
@@ -85,6 +91,9 @@ def update_det_boxes(dt_boxes, mfd_res):
             temp_dt_box.append(bbox_to_points([text_remove_mask[0], text_bbox[1], text_remove_mask[1], text_bbox[3]]))
         if len(temp_dt_box) > 0:
             new_dt_boxes.extend(temp_dt_box)
+
+    new_dt_boxes.extend(angle_boxes_list)
+
     return new_dt_boxes
 
 
@@ -143,9 +152,11 @@ def merge_det_boxes(dt_boxes):
     angle_boxes_list = []
     for text_box in dt_boxes:
         text_bbox = points_to_bbox(text_box)
-        if text_bbox[2] <= text_bbox[0] or text_bbox[3] <= text_bbox[1]:
+
+        if calculate_is_angle(text_box):
             angle_boxes_list.append(text_box)
             continue
+
         text_box_dict = {
             'bbox': text_bbox,
             'type': 'text',
@@ -202,8 +213,9 @@ def get_ocr_result_list(ocr_res, useful_list):
 
         p1, p2, p3, p4 = box_ocr_res[0]
         text, score = box_ocr_res[1]
-        average_angle_degrees = calculate_angle_degrees(box_ocr_res[0])
-        if average_angle_degrees > 0.5:
+        # average_angle_degrees = calculate_angle_degrees(box_ocr_res[0])
+        # if average_angle_degrees > 0.5:
+        if calculate_is_angle(box_ocr_res[0]):
             # logger.info(f"average_angle_degrees: {average_angle_degrees}, text: {text}")
             # 与x轴的夹角超过0.5度,对边界做一下矫正
             # 计算几何中心
@@ -257,3 +269,12 @@ def calculate_angle_degrees(poly):
     # logger.info(f"average_angle_degrees: {average_angle_degrees}")
     return average_angle_degrees
 
+
+def calculate_is_angle(poly):
+    p1, p2, p3, p4 = poly
+    height = ((p4[1] - p1[1]) + (p3[1] - p2[1])) / 2
+    if 0.8 * height <= (p3[1] - p1[1]) <= 1.2 * height:
+        return False
+    else:
+        # logger.info((p3[1] - p1[1])/height)
+        return True

+ 2 - 3
magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py

@@ -125,9 +125,8 @@ class ModifiedPaddleOCR(PaddleOCR):
 
         dt_boxes = sorted_boxes(dt_boxes)
 
-        # @todo 目前是在bbox层merge,对倾斜文本行的兼容性不佳,需要修改成支持poly的merge
-        # dt_boxes = merge_det_boxes(dt_boxes)
-
+        # merge_det_boxes 和 update_det_boxes 都会把poly转成bbox再转回poly,因此需要过滤所有倾斜程度较大的文本框
+        dt_boxes = merge_det_boxes(dt_boxes)
 
         if mfd_res:
             bef = time.time()