utils.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. import fitz
  2. import numpy as np
  3. from loguru import logger
  4. def fitz_doc_to_image(doc, dpi=200) -> dict:
  5. """Convert fitz.Document to image, Then convert the image to numpy array.
  6. Args:
  7. doc (_type_): pymudoc page
  8. dpi (int, optional): reset the dpi of dpi. Defaults to 200.
  9. Returns:
  10. dict: {'img': numpy array, 'width': width, 'height': height }
  11. """
  12. mat = fitz.Matrix(dpi / 72, dpi / 72)
  13. pm = doc.get_pixmap(matrix=mat, alpha=False)
  14. # If the width or height exceeds 4500 after scaling, do not scale further.
  15. if pm.width > 4500 or pm.height > 4500:
  16. pm = doc.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
  17. # Convert pixmap samples directly to numpy array
  18. img = np.frombuffer(pm.samples, dtype=np.uint8).reshape(pm.height, pm.width, 3)
  19. img_dict = {'img': img, 'width': pm.width, 'height': pm.height}
  20. return img_dict
  21. @ImportPIL
  22. def load_images_from_pdf(pdf_bytes: bytes, dpi=200, start_page_id=0, end_page_id=None) -> list:
  23. images = []
  24. with fitz.open('pdf', pdf_bytes) as doc:
  25. pdf_page_num = doc.page_count
  26. end_page_id = (
  27. end_page_id
  28. if end_page_id is not None and end_page_id >= 0
  29. else pdf_page_num - 1
  30. )
  31. if end_page_id > pdf_page_num - 1:
  32. logger.warning('end_page_id is out of range, use images length')
  33. end_page_id = pdf_page_num - 1
  34. for index in range(0, doc.page_count):
  35. if start_page_id <= index <= end_page_id:
  36. page = doc[index]
  37. mat = fitz.Matrix(dpi / 72, dpi / 72)
  38. pm = page.get_pixmap(matrix=mat, alpha=False)
  39. # If the width or height exceeds 4500 after scaling, do not scale further.
  40. if pm.width > 4500 or pm.height > 4500:
  41. pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
  42. # Convert pixmap samples directly to numpy array
  43. img = np.frombuffer(pm.samples, dtype=np.uint8).reshape(pm.height, pm.width, 3)
  44. img_dict = {'img': img, 'width': pm.width, 'height': pm.height}
  45. else:
  46. img_dict = {'img': [], 'width': 0, 'height': 0}
  47. images.append(img_dict)
  48. return images