test_cli.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. import os
  2. import shutil
  3. import tempfile
  4. from click.testing import CliRunner
  5. from magic_pdf.tools.cli import cli
  6. def test_cli_pdf():
  7. # setup
  8. unitest_dir = '/tmp/magic_pdf/unittest/tools'
  9. filename = 'cli_test_01'
  10. os.makedirs(unitest_dir, exist_ok=True)
  11. temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
  12. # run
  13. runner = CliRunner()
  14. result = runner.invoke(
  15. cli,
  16. [
  17. '-p',
  18. 'tests/unittest/test_tools/assets/cli/pdf/cli_test_01.pdf',
  19. '-o',
  20. temp_output_dir,
  21. ],
  22. )
  23. # check
  24. assert result.exit_code == 0
  25. base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
  26. r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
  27. assert r.st_size > 7000
  28. r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
  29. assert r.st_size > 200000
  30. r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
  31. assert r.st_size > 15000
  32. r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
  33. assert r.st_size > 400000
  34. r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
  35. assert r.st_size > 400000
  36. r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
  37. assert r.st_size > 400000
  38. assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
  39. assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
  40. assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True
  41. # teardown
  42. shutil.rmtree(temp_output_dir)
  43. def test_cli_path():
  44. # setup
  45. unitest_dir = '/tmp/magic_pdf/unittest/tools'
  46. os.makedirs(unitest_dir, exist_ok=True)
  47. temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
  48. # run
  49. runner = CliRunner()
  50. result = runner.invoke(
  51. cli, ['-p', 'tests/unittest/test_tools/assets/cli/path', '-o', temp_output_dir]
  52. )
  53. # check
  54. assert result.exit_code == 0
  55. filename = 'cli_test_01'
  56. base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
  57. r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
  58. assert r.st_size > 7000
  59. r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
  60. assert r.st_size > 200000
  61. r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
  62. assert r.st_size > 15000
  63. r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
  64. assert r.st_size > 400000
  65. r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
  66. assert r.st_size > 400000
  67. r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
  68. assert r.st_size > 400000
  69. assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
  70. assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
  71. assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True
  72. base_output_dir = os.path.join(temp_output_dir, 'cli_test_02/auto')
  73. filename = 'cli_test_02'
  74. r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
  75. assert r.st_size > 5000
  76. r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
  77. assert r.st_size > 200000
  78. r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
  79. assert r.st_size > 15000
  80. r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
  81. assert r.st_size > 400000
  82. r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
  83. assert r.st_size > 400000
  84. r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
  85. assert r.st_size > 400000
  86. assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
  87. assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
  88. assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True
  89. # teardown
  90. shutil.rmtree(temp_output_dir)