test_cli_dev.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. import os
  2. import shutil
  3. import tempfile
  4. from click.testing import CliRunner
  5. from magic_pdf.tools import cli_dev
  6. def test_cli_pdf():
  7. # setup
  8. unitest_dir = '/tmp/magic_pdf/unittest/tools'
  9. filename = 'cli_test_01'
  10. os.makedirs(unitest_dir, exist_ok=True)
  11. temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
  12. # run
  13. runner = CliRunner()
  14. result = runner.invoke(
  15. cli_dev.cli,
  16. [
  17. 'pdf',
  18. '-p',
  19. 'tests/unittest/test_tools/assets/cli/pdf/cli_test_01.pdf',
  20. '-j',
  21. 'tests/unittest/test_tools/assets/cli_dev/cli_test_01.model.json',
  22. '-o',
  23. temp_output_dir,
  24. ],
  25. )
  26. # check
  27. assert result.exit_code == 0
  28. base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
  29. r = os.stat(os.path.join(base_output_dir, f'{filename}_content_list.json'))
  30. assert r.st_size > 5000
  31. r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
  32. assert r.st_size > 7000
  33. r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
  34. assert r.st_size > 200000
  35. r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
  36. assert r.st_size > 15000
  37. r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
  38. assert r.st_size > 400000
  39. r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
  40. assert r.st_size > 400000
  41. r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
  42. assert r.st_size > 400000
  43. assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
  44. assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
  45. # teardown
  46. shutil.rmtree(temp_output_dir)
  47. def test_cli_jsonl():
  48. # setup
  49. unitest_dir = '/tmp/magic_pdf/unittest/tools'
  50. filename = 'cli_test_01'
  51. os.makedirs(unitest_dir, exist_ok=True)
  52. temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
  53. def mock_read_s3_path(s3path):
  54. with open(s3path, 'rb') as f:
  55. return f.read()
  56. cli_dev.read_s3_path = mock_read_s3_path # mock
  57. # run
  58. runner = CliRunner()
  59. result = runner.invoke(
  60. cli_dev.cli,
  61. [
  62. 'jsonl',
  63. '-j',
  64. 'tests/unittest/test_tools/assets/cli_dev/cli_test_01.jsonl',
  65. '-o',
  66. temp_output_dir,
  67. ],
  68. )
  69. # check
  70. assert result.exit_code == 0
  71. base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
  72. r = os.stat(os.path.join(base_output_dir, f'{filename}_content_list.json'))
  73. assert r.st_size > 5000
  74. r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
  75. assert r.st_size > 7000
  76. r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
  77. assert r.st_size > 200000
  78. r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
  79. assert r.st_size > 15000
  80. r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
  81. assert r.st_size > 400000
  82. r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
  83. assert r.st_size > 400000
  84. r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
  85. assert r.st_size > 400000
  86. assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
  87. assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
  88. # teardown
  89. shutil.rmtree(temp_output_dir)