test_utils.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. import json
  2. import os
  3. import shutil
  4. import tempfile
  5. from magic_pdf.integrations.rag.type import CategoryType
  6. from magic_pdf.integrations.rag.utils import (
  7. convert_middle_json_to_layout_elements, inference)
  8. def test_convert_middle_json_to_layout_elements():
  9. # setup
  10. unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
  11. os.makedirs(unitest_dir, exist_ok=True)
  12. temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
  13. os.makedirs(temp_output_dir, exist_ok=True)
  14. # test
  15. with open('tests/test_integrations/test_rag/assets/middle.json') as f:
  16. json_data = json.load(f)
  17. res = convert_middle_json_to_layout_elements(json_data, temp_output_dir)
  18. assert len(res) == 1
  19. assert len(res[0].layout_dets) == 10
  20. assert res[0].layout_dets[0].anno_id == 0
  21. assert res[0].layout_dets[0].category_type == CategoryType.text
  22. assert len(res[0].extra.element_relation) == 3
  23. # teardown
  24. shutil.rmtree(temp_output_dir)
  25. def test_inference():
  26. asset_dir = 'tests/test_integrations/test_rag/assets'
  27. # setup
  28. unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
  29. os.makedirs(unitest_dir, exist_ok=True)
  30. temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
  31. os.makedirs(temp_output_dir, exist_ok=True)
  32. # test
  33. res = inference(
  34. asset_dir + '/one_page_with_table_image.pdf',
  35. temp_output_dir,
  36. 'ocr',
  37. )
  38. assert res is not None
  39. assert len(res) == 1
  40. assert len(res[0].layout_dets) == 10
  41. assert res[0].layout_dets[0].anno_id == 0
  42. assert res[0].layout_dets[0].category_type == CategoryType.text
  43. assert len(res[0].extra.element_relation) == 3
  44. # teardown
  45. shutil.rmtree(temp_output_dir)