model_json_to_middle_json.py 1.1 KB

12345678910111213141516171819202122232425
  1. # Copyright (c) Opendatalab. All rights reserved.
  2. from mineru.utils.pipeline_magic_model import MagicModel
  3. from mineru.version import __version__
  4. from mineru.utils.hash_utils import str_md5
  5. def page_model_info_to_page_info(page_model_info, image_dict, page, image_writer, page_index, lang=None, ocr=False):
  6. scale = image_dict["scale"]
  7. page_pil_img = image_dict["img_pil"]
  8. page_img_md5 = str_md5(image_dict["img_base64"])
  9. width, height = map(int, page.get_size())
  10. magic_model = MagicModel(page_model_info, scale)
  11. def result_to_middle_json(model_list, images_list, pdf_doc, image_writer, lang=None, ocr=False):
  12. middle_json = {"pdf_info": [], "_backend":"vlm", "_version_name": __version__}
  13. for page_index, page_model_info in enumerate(model_list):
  14. page = pdf_doc[page_index]
  15. image_dict = images_list[page_index]
  16. page_info = page_model_info_to_page_info(
  17. page_model_info, image_dict, page, image_writer, page_index, lang=lang, ocr=ocr
  18. )
  19. middle_json["pdf_info"].append(page_info)
  20. return middle_json