cut_image.py 751 B

123456789101112131415161718192021222324252627
  1. from loguru import logger
  2. from .pdf_image_tools import cut_image
  3. def cut_image_and_table(span, page_pil_img, page_img_md5, page_id, image_writer, scale=2):
  4. def return_path(path_type):
  5. return f"{path_type}/{page_img_md5}"
  6. span_type = span["type"]
  7. if not check_img_bbox(span["bbox"]) or not image_writer:
  8. span["image_path"] = ""
  9. else:
  10. span["image_path"] = cut_image(
  11. span["bbox"], page_id, page_pil_img, return_path=return_path(span_type), image_writer=image_writer, scale=scale
  12. )
  13. return span
  14. def check_img_bbox(bbox) -> bool:
  15. if any([bbox[0] >= bbox[2], bbox[1] >= bbox[3]]):
  16. logger.warning(f"image_bboxes: 错误的box, {bbox}")
  17. return False
  18. return True