test_dataset.py 545 B

123456789101112131415161718
  1. from magic_pdf.data.dataset import ImageDataset, PymuDocDataset
  2. def test_pymudataset():
  3. with open('tests/test_data/assets/pdfs/test_01.pdf', 'rb') as f:
  4. bits = f.read()
  5. datasets = PymuDocDataset(bits)
  6. assert len(datasets) > 0
  7. assert datasets.get_page(0).get_page_info().h > 100
  8. def test_imagedataset():
  9. with open('tests/test_data/assets/pngs/test_01.png', 'rb') as f:
  10. bits = f.read()
  11. datasets = ImageDataset(bits)
  12. assert len(datasets) == 1
  13. assert datasets.get_page(0).get_page_info().w > 100