Преглед изворни кода

Merge pull request #386 from myhloli/master

feat(draw_bbox): add model bbox drawing functionality
Xiaomeng Zhao пре 1 година
родитељ
комит
fa3475a4d5

+ 66 - 1
magic_pdf/libs/draw_bbox.py

@@ -1,6 +1,7 @@
 from magic_pdf.libs.Constants import CROSS_PAGE
 from magic_pdf.libs.commons import fitz  # PyMuPDF
-from magic_pdf.libs.ocr_content_type import ContentType, BlockType
+from magic_pdf.libs.ocr_content_type import ContentType, BlockType, CategoryId
+from magic_pdf.model.magic_model import MagicModel
 
 
 def draw_bbox_without_number(i, bbox_list, page, rgb_config, fill_config):
@@ -225,3 +226,67 @@ def draw_span_bbox(pdf_info, pdf_bytes, out_path):
 
     # Save the PDF
     pdf_docs.save(f"{out_path}/spans.pdf")
+
+
+def drow_model_bbox(model_list: list, pdf_bytes, out_path):
+    dropped_bbox_list = []
+    tables_body_list, tables_caption_list, tables_footnote_list = [], [], []
+    imgs_body_list, imgs_caption_list = [], []
+    titles_list = []
+    texts_list = []
+    interequations_list = []
+    pdf_docs = fitz.open("pdf", pdf_bytes)
+    magic_model = MagicModel(model_list, pdf_docs)
+    for i in range(len(model_list)):
+        page_dropped_list = []
+        tables_body, tables_caption, tables_footnote = [], [], []
+        imgs_body, imgs_caption = [], []
+        titles = []
+        texts = []
+        interequations = []
+        page_info = magic_model.get_model_list(i)
+        layout_dets = page_info["layout_dets"]
+        for layout_det in layout_dets:
+            bbox = layout_det["bbox"]
+            if layout_det["category_id"] == CategoryId.Text:
+                texts.append(bbox)
+            elif layout_det["category_id"] == CategoryId.Title:
+                titles.append(bbox)
+            elif layout_det["category_id"] == CategoryId.TableBody:
+                tables_body.append(bbox)
+            elif layout_det["category_id"] == CategoryId.TableCaption:
+                tables_caption.append(bbox)
+            elif layout_det["category_id"] == CategoryId.TableFootnote:
+                tables_footnote.append(bbox)
+            elif layout_det["category_id"] == CategoryId.ImageBody:
+                imgs_body.append(bbox)
+            elif layout_det["category_id"] == CategoryId.ImageCaption:
+                imgs_caption.append(bbox)
+            elif layout_det["category_id"] == CategoryId.InterlineEquation_YOLO:
+                interequations.append(bbox)
+            elif layout_det["category_id"] == CategoryId.Abandon:
+                page_dropped_list.append(bbox)
+
+        tables_body_list.append(tables_body)
+        tables_caption_list.append(tables_caption)
+        tables_footnote_list.append(tables_footnote)
+        imgs_body_list.append(imgs_body)
+        imgs_caption_list.append(imgs_caption)
+        titles_list.append(titles)
+        texts_list.append(texts)
+        interequations_list.append(interequations)
+        dropped_bbox_list.append(page_dropped_list)
+
+    for i, page in enumerate(pdf_docs):
+        draw_bbox_with_number(i, dropped_bbox_list, page, [158, 158, 158], True) # color !
+        draw_bbox_with_number(i, tables_body_list, page, [204, 204, 0], True)
+        draw_bbox_with_number(i, tables_caption_list, page, [255, 255, 102], True)
+        draw_bbox_with_number(i, tables_footnote_list, page, [229, 255, 204], True)
+        draw_bbox_with_number(i, imgs_body_list, page, [153, 255, 51], True)
+        draw_bbox_with_number(i, imgs_caption_list, page, [102, 178, 255], True)
+        draw_bbox_with_number(i, titles_list, page, [102, 102, 255], True)
+        draw_bbox_with_number(i, texts_list, page, [153, 0, 76], True)
+        draw_bbox_with_number(i, interequations_list, page, [0, 255, 0], True)
+
+    # Save the PDF
+    pdf_docs.save(f"{out_path}/model.pdf")

+ 14 - 0
magic_pdf/libs/ocr_content_type.py

@@ -19,3 +19,17 @@ class BlockType:
     Footnote = "footnote"
     Discarded = "discarded"
 
+
+class CategoryId:
+    Title = 0
+    Text = 1
+    Abandon = 2
+    ImageBody = 3
+    ImageCaption = 4
+    TableBody = 5
+    TableCaption = 6
+    TableFootnote = 7
+    InterlineEquation_Layout = 8
+    InlineEquation = 13
+    InterlineEquation_YOLO = 14
+    OcrText = 15

+ 2 - 0
magic_pdf/tools/cli_dev.py

@@ -94,6 +94,7 @@ def jsonl(jsonl, method, output_dir):
         jso["doc_layout_result"],
         method,
         f_dump_content_list=True,
+        f_draw_model_bbox=True,
     )
 
 
@@ -146,6 +147,7 @@ def pdf(pdf, json_data, output_dir, method):
         model_json_list,
         method,
         f_dump_content_list=True,
+        f_draw_model_bbox=True,
     )
 
 

+ 4 - 1
magic_pdf/tools/common.py

@@ -4,7 +4,7 @@ import copy
 import click
 from loguru import logger
 from magic_pdf.libs.MakeContentConfig import DropMode, MakeMode
-from magic_pdf.libs.draw_bbox import draw_layout_bbox, draw_span_bbox
+from magic_pdf.libs.draw_bbox import draw_layout_bbox, draw_span_bbox, drow_model_bbox
 from magic_pdf.pipe.UNIPipe import UNIPipe
 from magic_pdf.pipe.OCRPipe import OCRPipe
 from magic_pdf.pipe.TXTPipe import TXTPipe
@@ -37,6 +37,7 @@ def do_parse(
     f_dump_orig_pdf=True,
     f_dump_content_list=False,
     f_make_md_mode=MakeMode.MM_MD,
+    f_draw_model_bbox=False,
 ):
     orig_model_list = copy.deepcopy(model_list)
     local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method)
@@ -73,6 +74,8 @@ def do_parse(
         draw_layout_bbox(pdf_info, pdf_bytes, local_md_dir)
     if f_draw_span_bbox:
         draw_span_bbox(pdf_info, pdf_bytes, local_md_dir)
+    if f_draw_model_bbox:
+        drow_model_bbox(orig_model_list, pdf_bytes, local_md_dir)
 
     md_content = pipe.pipe_mk_markdown(
         image_dir, drop_mode=DropMode.NONE, md_make_mode=f_make_md_mode