浏览代码

refactor(ocr): remove unused functions and optimize OCR processing loop

- Remove unused function `calculate_angle_degrees`- Refactor `calculate_is_angle` to use directly in OCR processing
- Eliminate unnecessary loop index `idx` in OCR processing loops
myhloli 11 月之前
父节点
当前提交
5f4410b469

+ 0 - 28
magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py

@@ -1,5 +1,3 @@
-import math
-
 import numpy as np
 import numpy as np
 from loguru import logger
 from loguru import logger
 
 
@@ -252,32 +250,6 @@ def get_ocr_result_list(ocr_res, useful_list):
     return ocr_result_list
     return ocr_result_list
 
 
 
 
-def calculate_angle_degrees(poly):
-    # 定义对角线的顶点
-    diagonal1 = (poly[0], poly[2])
-    diagonal2 = (poly[1], poly[3])
-
-    # 计算对角线的斜率
-    def slope(p1, p2):
-        return (p2[1] - p1[1]) / (p2[0] - p1[0]) if p2[0] != p1[0] else float('inf')
-
-    slope1 = slope(diagonal1[0], diagonal1[1])
-    slope2 = slope(diagonal2[0], diagonal2[1])
-
-    # 计算对角线与x轴的夹角(以弧度为单位)
-    angle1_radians = math.atan(slope1)
-    angle2_radians = math.atan(slope2)
-
-    # 将弧度转换为角度
-    angle1_degrees = math.degrees(angle1_radians)
-    angle2_degrees = math.degrees(angle2_radians)
-
-    # 取两条对角线与x轴夹角的平均值
-    average_angle_degrees = abs((angle1_degrees + angle2_degrees) / 2)
-    # logger.info(f"average_angle_degrees: {average_angle_degrees}")
-    return average_angle_degrees
-
-
 def calculate_is_angle(poly):
 def calculate_is_angle(poly):
     p1, p2, p3, p4 = poly
     p1, p2, p3, p4 = poly
     height = ((p4[1] - p1[1]) + (p3[1] - p2[1])) / 2
     height = ((p4[1] - p1[1]) + (p3[1] - p2[1])) / 2

+ 3 - 3
magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py

@@ -63,7 +63,7 @@ class ModifiedPaddleOCR(PaddleOCR):
 
 
         if det and rec:
         if det and rec:
             ocr_res = []
             ocr_res = []
-            for idx, img in enumerate(imgs):
+            for img in imgs:
                 img = preprocess_image(img)
                 img = preprocess_image(img)
                 dt_boxes, rec_res, _ = self.__call__(img, cls, mfd_res=mfd_res)
                 dt_boxes, rec_res, _ = self.__call__(img, cls, mfd_res=mfd_res)
                 if not dt_boxes and not rec_res:
                 if not dt_boxes and not rec_res:
@@ -75,7 +75,7 @@ class ModifiedPaddleOCR(PaddleOCR):
             return ocr_res
             return ocr_res
         elif det and not rec:
         elif det and not rec:
             ocr_res = []
             ocr_res = []
-            for idx, img in enumerate(imgs):
+            for img in imgs:
                 img = preprocess_image(img)
                 img = preprocess_image(img)
                 dt_boxes, elapse = self.text_detector(img)
                 dt_boxes, elapse = self.text_detector(img)
                 if dt_boxes is None:
                 if dt_boxes is None:
@@ -96,7 +96,7 @@ class ModifiedPaddleOCR(PaddleOCR):
         else:
         else:
             ocr_res = []
             ocr_res = []
             cls_res = []
             cls_res = []
-            for idx, img in enumerate(imgs):
+            for img in imgs:
                 if not isinstance(img, list):
                 if not isinstance(img, list):
                     img = preprocess_image(img)
                     img = preprocess_image(img)
                     img = [img]
                     img = [img]