| 12345678910111213141516171819202122232425 |
- # Copyright (c) Opendatalab. All rights reserved.
- from mineru.utils.pipeline_magic_model import MagicModel
- from mineru.version import __version__
- from mineru.utils.hash_utils import str_md5
- def page_model_info_to_page_info(page_model_info, image_dict, page, image_writer, page_index, lang=None, ocr=False):
- scale = image_dict["scale"]
- page_pil_img = image_dict["img_pil"]
- page_img_md5 = str_md5(image_dict["img_base64"])
- width, height = map(int, page.get_size())
- magic_model = MagicModel(page_model_info, scale)
- def result_to_middle_json(model_list, images_list, pdf_doc, image_writer, lang=None, ocr=False):
- middle_json = {"pdf_info": [], "_backend":"vlm", "_version_name": __version__}
- for page_index, page_model_info in enumerate(model_list):
- page = pdf_doc[page_index]
- image_dict = images_list[page_index]
- page_info = page_model_info_to_page_info(
- page_model_info, image_dict, page, image_writer, page_index, lang=lang, ocr=ocr
- )
- middle_json["pdf_info"].append(page_info)
- return middle_json
|