Explorar o código

fix: handle IndirectObject in /Rotate field of PDF pages

Yijie Xu hai 3 meses
pai
achega
f3c933770a
Modificáronse 1 ficheiros con 15 adicións e 17 borrados
  1. 15 17
      mineru/utils/draw_bbox.py

+ 15 - 17
mineru/utils/draw_bbox.py

@@ -24,8 +24,8 @@ def cal_canvas_rect(page, bbox):
     actual_width = page_width    # The width of the final PDF display
     actual_height = page_height  # The height of the final PDF display
     
-    rotation = page.get("/Rotate", 0)
-    rotation = rotation % 360
+    rotation_obj = page.get("/Rotate", 0)
+    rotation = int(rotation_obj) % 360  # cast rotation to int to handle IndirectObject
     
     if rotation in [90, 270]:
         # PDF is rotated 90 degrees or 270 degrees, and the width and height need to be swapped
@@ -35,19 +35,18 @@ def cal_canvas_rect(page, bbox):
     rect_w = abs(x1 - x0)
     rect_h = abs(y1 - y0)
     
-    if 270 == rotation:
+    if rotation == 270:
         rect_w, rect_h = rect_h, rect_w
         x0 = actual_height - y1
         y0 = actual_width - x1
-    elif 180 == rotation:
+    elif rotation == 180:
         x0 = page_width - x1
-        y0 = y0
-    elif 90 == rotation:
+        # y0 stays the same
+    elif rotation == 90:
         rect_w, rect_h = rect_h, rect_w
         x0, y0 = y0, x0 
     else:
-        # 0 == rotation:
-        x0 = x0
+        # rotation == 0
         y0 = page_height - y1
     
     rect = [x0, y0, rect_w, rect_h]        
@@ -91,16 +90,16 @@ def draw_bbox_with_number(i, bbox_list, page, c, rgb_config, fill_config, draw_b
         c.setFontSize(size=10)
         
         c.saveState()
-        rotation = page.get("/Rotate", 0)
-        rotation = rotation % 360
-    
-        if 0 == rotation:
+        rotation_obj = page.get("/Rotate", 0)
+        rotation = int(rotation_obj) % 360  # cast rotation to int to handle IndirectObject
+
+        if rotation == 0:
             c.translate(rect[0] + rect[2] + 2, rect[1] + rect[3] - 10)
-        elif 90 == rotation:
+        elif rotation == 90:
             c.translate(rect[0] + 10, rect[1] + rect[3] + 2)
-        elif 180 == rotation:
+        elif rotation == 180:
             c.translate(rect[0] - 2, rect[1] + 10)
-        elif 270 == rotation:
+        elif rotation == 270:
             c.translate(rect[0] + rect[2] - 10, rect[1] - 2)
             
         c.rotate(rotation)
@@ -114,8 +113,7 @@ def draw_layout_bbox(pdf_info, pdf_bytes, out_path, filename):
     dropped_bbox_list = []
     tables_list, tables_body_list = [], []
     tables_caption_list, tables_footnote_list = [], []
-    imgs_list, imgs_body_list, imgs_caption_list = [], [], []
-    imgs_footnote_list = []
+    imgs_list, imgs_body_list, imgs_caption_list, imgs_footnote_list = [], [], [], []
     titles_list = []
     texts_list = []
     interequations_list = []