utils.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. import fitz
  2. import numpy as np
  3. from loguru import logger
  4. from magic_pdf.utils.annotations import ImportPIL
  5. @ImportPIL
  6. def fitz_doc_to_image(doc, dpi=200) -> dict:
  7. """Convert fitz.Document to image, Then convert the image to numpy array.
  8. Args:
  9. doc (_type_): pymudoc page
  10. dpi (int, optional): reset the dpi of dpi. Defaults to 200.
  11. Returns:
  12. dict: {'img': numpy array, 'width': width, 'height': height }
  13. """
  14. from PIL import Image
  15. mat = fitz.Matrix(dpi / 72, dpi / 72)
  16. pm = doc.get_pixmap(matrix=mat, alpha=False)
  17. # If the width or height exceeds 4500 after scaling, do not scale further.
  18. if pm.width > 4500 or pm.height > 4500:
  19. pm = doc.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
  20. img = Image.frombytes('RGB', (pm.width, pm.height), pm.samples)
  21. img = np.array(img)
  22. img_dict = {'img': img, 'width': pm.width, 'height': pm.height}
  23. return img_dict
  24. @ImportPIL
  25. def load_images_from_pdf(pdf_bytes: bytes, dpi=200, start_page_id=0, end_page_id=None) -> list:
  26. from PIL import Image
  27. images = []
  28. with fitz.open('pdf', pdf_bytes) as doc:
  29. pdf_page_num = doc.page_count
  30. end_page_id = (
  31. end_page_id
  32. if end_page_id is not None and end_page_id >= 0
  33. else pdf_page_num - 1
  34. )
  35. if end_page_id > pdf_page_num - 1:
  36. logger.warning('end_page_id is out of range, use images length')
  37. end_page_id = pdf_page_num - 1
  38. for index in range(0, doc.page_count):
  39. if start_page_id <= index <= end_page_id:
  40. page = doc[index]
  41. mat = fitz.Matrix(dpi / 72, dpi / 72)
  42. pm = page.get_pixmap(matrix=mat, alpha=False)
  43. # If the width or height exceeds 4500 after scaling, do not scale further.
  44. if pm.width > 4500 or pm.height > 4500:
  45. pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
  46. img = Image.frombytes('RGB', (pm.width, pm.height), pm.samples)
  47. img = np.array(img)
  48. img_dict = {'img': img, 'width': pm.width, 'height': pm.height}
  49. else:
  50. img_dict = {'img': [], 'width': 0, 'height': 0}
  51. images.append(img_dict)
  52. return images