소스 검색

feat(draw_bbox): add model bbox drawing functionality

Implement the feature to draw bounding boxes for model elements in the PDF. This includes
adding new drawing functions and modifying existing ones to accommodate the new feature.
Also, updates are made to CLI tools and common utilities to support the model bbox drawing.
myhloli 1 년 전
부모
커밋
c90ee891d6
4개의 변경된 파일86개의 추가작업 그리고 2개의 파일을 삭제
  1. 66 1
      magic_pdf/libs/draw_bbox.py
  2. 14 0
      magic_pdf/libs/ocr_content_type.py
  3. 2 0
      magic_pdf/tools/cli_dev.py
  4. 4 1
      magic_pdf/tools/common.py

+ 66 - 1
magic_pdf/libs/draw_bbox.py

@@ -1,6 +1,7 @@
 from magic_pdf.libs.Constants import CROSS_PAGE
 from magic_pdf.libs.commons import fitz  # PyMuPDF
-from magic_pdf.libs.ocr_content_type import ContentType, BlockType
+from magic_pdf.libs.ocr_content_type import ContentType, BlockType, CategoryId
+from magic_pdf.model.magic_model import MagicModel
 
 
 def draw_bbox_without_number(i, bbox_list, page, rgb_config, fill_config):
@@ -225,3 +226,67 @@ def draw_span_bbox(pdf_info, pdf_bytes, out_path):
 
     # Save the PDF
     pdf_docs.save(f"{out_path}/spans.pdf")
+
+
+def drow_model_bbox(model_list: list, pdf_bytes, out_path):
+    dropped_bbox_list = []
+    tables_body_list, tables_caption_list, tables_footnote_list = [], [], []
+    imgs_body_list, imgs_caption_list = [], []
+    titles_list = []
+    texts_list = []
+    interequations_list = []
+    pdf_docs = fitz.open("pdf", pdf_bytes)
+    magic_model = MagicModel(model_list, pdf_docs)
+    for i in range(len(model_list)):
+        page_dropped_list = []
+        tables_body, tables_caption, tables_footnote = [], [], []
+        imgs_body, imgs_caption = [], []
+        titles = []
+        texts = []
+        interequations = []
+        page_info = magic_model.get_model_list(i)
+        layout_dets = page_info["layout_dets"]
+        for layout_det in layout_dets:
+            bbox = layout_det["bbox"]
+            if layout_det["category_id"] == CategoryId.Text:
+                texts.append(bbox)
+            elif layout_det["category_id"] == CategoryId.Title:
+                titles.append(bbox)
+            elif layout_det["category_id"] == CategoryId.TableBody:
+                tables_body.append(bbox)
+            elif layout_det["category_id"] == CategoryId.TableCaption:
+                tables_caption.append(bbox)
+            elif layout_det["category_id"] == CategoryId.TableFootnote:
+                tables_footnote.append(bbox)
+            elif layout_det["category_id"] == CategoryId.ImageBody:
+                imgs_body.append(bbox)
+            elif layout_det["category_id"] == CategoryId.ImageCaption:
+                imgs_caption.append(bbox)
+            elif layout_det["category_id"] == CategoryId.InterlineEquation_YOLO:
+                interequations.append(bbox)
+            elif layout_det["category_id"] == CategoryId.Abandon:
+                page_dropped_list.append(bbox)
+
+        tables_body_list.append(tables_body)
+        tables_caption_list.append(tables_caption)
+        tables_footnote_list.append(tables_footnote)
+        imgs_body_list.append(imgs_body)
+        imgs_caption_list.append(imgs_caption)
+        titles_list.append(titles)
+        texts_list.append(texts)
+        interequations_list.append(interequations)
+        dropped_bbox_list.append(page_dropped_list)
+
+    for i, page in enumerate(pdf_docs):
+        draw_bbox_with_number(i, dropped_bbox_list, page, [158, 158, 158], True) # color !
+        draw_bbox_with_number(i, tables_body_list, page, [204, 204, 0], True)
+        draw_bbox_with_number(i, tables_caption_list, page, [255, 255, 102], True)
+        draw_bbox_with_number(i, tables_footnote_list, page, [229, 255, 204], True)
+        draw_bbox_with_number(i, imgs_body_list, page, [153, 255, 51], True)
+        draw_bbox_with_number(i, imgs_caption_list, page, [102, 178, 255], True)
+        draw_bbox_with_number(i, titles_list, page, [102, 102, 255], True)
+        draw_bbox_with_number(i, texts_list, page, [153, 0, 76], True)
+        draw_bbox_with_number(i, interequations_list, page, [0, 255, 0], True)
+
+    # Save the PDF
+    pdf_docs.save(f"{out_path}/model.pdf")

+ 14 - 0
magic_pdf/libs/ocr_content_type.py

@@ -19,3 +19,17 @@ class BlockType:
     Footnote = "footnote"
     Discarded = "discarded"
 
+
+class CategoryId:
+    Title = 0
+    Text = 1
+    Abandon = 2
+    ImageBody = 3
+    ImageCaption = 4
+    TableBody = 5
+    TableCaption = 6
+    TableFootnote = 7
+    InterlineEquation_Layout = 8
+    InlineEquation = 13
+    InterlineEquation_YOLO = 14
+    OcrText = 15

+ 2 - 0
magic_pdf/tools/cli_dev.py

@@ -94,6 +94,7 @@ def jsonl(jsonl, method, output_dir):
         jso["doc_layout_result"],
         method,
         f_dump_content_list=True,
+        f_draw_model_bbox=True,
     )
 
 
@@ -146,6 +147,7 @@ def pdf(pdf, json_data, output_dir, method):
         model_json_list,
         method,
         f_dump_content_list=True,
+        f_draw_model_bbox=True,
     )
 
 

+ 4 - 1
magic_pdf/tools/common.py

@@ -4,7 +4,7 @@ import copy
 import click
 from loguru import logger
 from magic_pdf.libs.MakeContentConfig import DropMode, MakeMode
-from magic_pdf.libs.draw_bbox import draw_layout_bbox, draw_span_bbox
+from magic_pdf.libs.draw_bbox import draw_layout_bbox, draw_span_bbox, drow_model_bbox
 from magic_pdf.pipe.UNIPipe import UNIPipe
 from magic_pdf.pipe.OCRPipe import OCRPipe
 from magic_pdf.pipe.TXTPipe import TXTPipe
@@ -37,6 +37,7 @@ def do_parse(
     f_dump_orig_pdf=True,
     f_dump_content_list=False,
     f_make_md_mode=MakeMode.MM_MD,
+    f_draw_model_bbox=False,
 ):
     orig_model_list = copy.deepcopy(model_list)
     local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method)
@@ -73,6 +74,8 @@ def do_parse(
         draw_layout_bbox(pdf_info, pdf_bytes, local_md_dir)
     if f_draw_span_bbox:
         draw_span_bbox(pdf_info, pdf_bytes, local_md_dir)
+    if f_draw_model_bbox:
+        drow_model_bbox(orig_model_list, pdf_bytes, local_md_dir)
 
     md_content = pipe.pipe_mk_markdown(
         image_dir, drop_mode=DropMode.NONE, md_make_mode=f_make_md_mode