Przeglądaj źródła

fix draw_layout_bbox logic

赵小蒙 1 rok temu
rodzic
commit
60208b1ba0
2 zmienionych plików z 30 dodań i 14 usunięć
  1. 6 1
      magic_pdf/cli/magicpdf.py
  2. 24 13
      magic_pdf/libs/draw_bbox.py

+ 6 - 1
magic_pdf/cli/magicpdf.py

@@ -27,6 +27,7 @@ import click
 from loguru import logger
 from pathlib import Path
 
+from magic_pdf.libs.draw_bbox import draw_layout_bbox
 from magic_pdf.pipe.UNIPipe import UNIPipe
 from magic_pdf.pipe.OCRPipe import OCRPipe
 from magic_pdf.pipe.TXTPipe import TXTPipe
@@ -56,7 +57,7 @@ def prepare_env(pdf_file_name, method):
     return local_image_dir, local_md_dir
 
 
-def _do_parse(pdf_file_name, pdf_bytes, model_list, parse_method, image_writer, md_writer, image_dir):
+def _do_parse(pdf_file_name, pdf_bytes, model_list, parse_method, image_writer, md_writer, image_dir, local_md_dir):
     if parse_method == "auto":
         pipe = UNIPipe(pdf_bytes, model_list, image_writer, image_dir, is_debug=True)
     elif parse_method == "txt":
@@ -69,6 +70,8 @@ def _do_parse(pdf_file_name, pdf_bytes, model_list, parse_method, image_writer,
 
     pipe.pipe_classify()
     pipe.pipe_parse()
+    pdf_info = pipe.pdf_mid_data['pdf_info']
+    draw_layout_bbox(pdf_info, pdf_bytes, local_md_dir)
     md_content = pipe.pipe_mk_markdown()
     #part_file_name = datetime.now().strftime("%H-%M-%S")
     md_writer.write(
@@ -144,6 +147,7 @@ def json_command(json, method):
         local_image_rw,
         local_md_rw,
         os.path.basename(local_image_dir),
+        local_md_dir
     )
 
 
@@ -185,6 +189,7 @@ def pdf_command(pdf, model, method):
         local_image_rw,
         local_md_rw,
         os.path.basename(local_image_dir),
+        local_md_dir
     )
 
 

+ 24 - 13
magic_pdf/libs/draw_bbox.py

@@ -2,7 +2,7 @@ from magic_pdf.libs.commons import fitz  # PyMuPDF
 from magic_pdf.libs.ocr_content_type import ContentType
 
 
-def draw_bbox_without_number(i, bbox_list, page, rgb_config):
+def draw_bbox_without_number(i, bbox_list, page, rgb_config, fill_config):
     new_rgb = []
     for item in rgb_config:
         item = float(item) / 255
@@ -11,10 +11,13 @@ def draw_bbox_without_number(i, bbox_list, page, rgb_config):
     for bbox in page_data:
         x0, y0, x1, y1 = bbox
         rect_coords = fitz.Rect(x0, y0, x1, y1)  # Define the rectangle
-        page.draw_rect(rect_coords, color=new_rgb, fill=None, width=0.5, overlay=True)  # Draw the rectangle
+        if fill_config:
+            page.draw_rect(rect_coords, color=None, fill=new_rgb, fill_opacity=0.3, width=0.5, overlay=True)  # Draw the rectangle
+        else:
+            page.draw_rect(rect_coords, color=new_rgb, fill=None, fill_opacity=1, width=0.5, overlay=True)  # Draw the rectangle
 
 
-def draw_bbox_with_number(i, bbox_list, page, rgb_config):
+def draw_bbox_with_number(i, bbox_list, page, rgb_config, fill_config):
     new_rgb = []
     for item in rgb_config:
         item = float(item) / 255
@@ -23,27 +26,35 @@ def draw_bbox_with_number(i, bbox_list, page, rgb_config):
     for j, bbox in enumerate(page_data):
         x0, y0, x1, y1 = bbox
         rect_coords = fitz.Rect(x0, y0, x1, y1)  # Define the rectangle
-        page.draw_rect(rect_coords, color=new_rgb, fill=None, width=0.5, overlay=True)  # Draw the rectangle
-        page.insert_text((x0, y0), str(j + 1), fontsize=10, color=new_rgb)  # Insert the index at the top left corner of the rectangle
+        if fill_config:
+            page.draw_rect(rect_coords, color=None, fill=new_rgb, fill_opacity=0.3, width=0.5, overlay=True)  # Draw the rectangle
+        else:
+            page.draw_rect(rect_coords, color=new_rgb, fill=None, fill_opacity=1, width=0.5, overlay=True)  # Draw the rectangle
+        page.insert_text((x0, y0+10), str(j + 1), fontsize=10, color=new_rgb)  # Insert the index at the top left corner of the rectangle
 
 
-def draw_layout_bbox(pdf_info_dict, pdf_bytes, out_path):
+def draw_layout_bbox(pdf_info, pdf_bytes, out_path):
     layout_bbox_list = []
+    blocks_bbox_list = []
     dropped_bbox_list = []
-    for page in pdf_info_dict.values():
+    for page in pdf_info:
         page_layout_list = []
         page_dropped_list = []
+        page_blocks_bbox_list = []
         for layout in page['layout_bboxes']:
             page_layout_list.append(layout['layout_bbox'])
         layout_bbox_list.append(page_layout_list)
-        for drop_tag, dropped_bboxes in page['droped_bboxes'].items():
-            for dropped_bbox in dropped_bboxes:
-                page_dropped_list.append(dropped_bbox)
+        for dropped_bbox in page['discarded_blocks']:
+            page_dropped_list.append(dropped_bbox['bbox'])
         dropped_bbox_list.append(page_dropped_list)
+        for block in page['para_blocks']:
+            page_blocks_bbox_list.append(block['bbox'])
+        blocks_bbox_list.append(page_blocks_bbox_list)
     pdf_docs = fitz.open("pdf", pdf_bytes)
     for i, page in enumerate(pdf_docs):
-        draw_bbox_with_number(i, layout_bbox_list, page, [255, 0, 0])
-        draw_bbox_without_number(i, dropped_bbox_list, page, [0, 255, 0])
+        draw_bbox_with_number(i, layout_bbox_list, page, [255, 0, 0], False)
+        draw_bbox_without_number(i, dropped_bbox_list, page, [0, 255, 0], True)
+        draw_bbox_without_number(i, blocks_bbox_list, page, [0, 0, 255], True)
     # Save the PDF
     pdf_docs.save(f"{out_path}/layout.pdf")
 
@@ -55,7 +66,7 @@ def draw_text_bbox(pdf_info_dict, pdf_bytes, out_path):
         page_text_list = []
         page_inline_equation_list = []
         page_interline_equation_list = []
-        for block in page['preproc_blocks']:
+        for block in page['para_blocks']:
             for line in block['lines']:
                 for span in line['spans']:
                     if span['type'] == ContentType.Text: