test_cli.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. import os
  2. import shutil
  3. import tempfile
  4. from click.testing import CliRunner
  5. from magic_pdf.tools.cli import cli
  6. def test_cli_pdf():
  7. # setup
  8. unitest_dir = '/tmp/magic_pdf/unittest/tools'
  9. filename = 'cli_test_01'
  10. os.makedirs(unitest_dir, exist_ok=True)
  11. temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
  12. os.makedirs(temp_output_dir, exist_ok=True)
  13. # run
  14. runner = CliRunner()
  15. result = runner.invoke(
  16. cli,
  17. [
  18. '-p',
  19. 'tests/test_tools/assets/cli/pdf/cli_test_01.pdf',
  20. '-o',
  21. temp_output_dir,
  22. ],
  23. )
  24. # check
  25. assert result.exit_code == 0
  26. base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
  27. r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
  28. assert r.st_size > 7000
  29. r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
  30. assert r.st_size > 200000
  31. r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
  32. assert r.st_size > 15000
  33. r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
  34. assert r.st_size > 500000
  35. r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
  36. assert r.st_size > 500000
  37. r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
  38. assert r.st_size > 500000
  39. assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
  40. assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
  41. assert os.path.exists(
  42. os.path.join(base_output_dir,
  43. f'{filename}_content_list.json')) is False
  44. # teardown
  45. shutil.rmtree(temp_output_dir)
  46. def test_cli_path():
  47. # setup
  48. unitest_dir = '/tmp/magic_pdf/unittest/tools'
  49. os.makedirs(unitest_dir, exist_ok=True)
  50. temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
  51. os.makedirs(temp_output_dir, exist_ok=True)
  52. # run
  53. runner = CliRunner()
  54. result = runner.invoke(
  55. cli, ['-p', 'tests/test_tools/assets/cli/path', '-o', temp_output_dir])
  56. # check
  57. assert result.exit_code == 0
  58. filename = 'cli_test_01'
  59. base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
  60. r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
  61. assert r.st_size > 7000
  62. r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
  63. assert r.st_size > 200000
  64. r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
  65. assert r.st_size > 15000
  66. r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
  67. assert r.st_size > 500000
  68. r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
  69. assert r.st_size > 500000
  70. r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
  71. assert r.st_size > 500000
  72. assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
  73. assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
  74. assert os.path.exists(
  75. os.path.join(base_output_dir,
  76. f'{filename}_content_list.json')) is False
  77. base_output_dir = os.path.join(temp_output_dir, 'cli_test_02/auto')
  78. filename = 'cli_test_02'
  79. r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
  80. assert r.st_size > 5000
  81. r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
  82. assert r.st_size > 200000
  83. r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
  84. assert r.st_size > 15000
  85. r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
  86. assert r.st_size > 500000
  87. r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
  88. assert r.st_size > 500000
  89. r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
  90. assert r.st_size > 500000
  91. assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
  92. assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
  93. assert os.path.exists(
  94. os.path.join(base_output_dir,
  95. f'{filename}_content_list.json')) is False
  96. # teardown
  97. shutil.rmtree(temp_output_dir)