test_cli.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. import tempfile
  2. import os
  3. import shutil
  4. from click.testing import CliRunner
  5. from magic_pdf.tools.cli import cli
  6. def test_cli_pdf():
  7. # setup
  8. unitest_dir = "/tmp/magic_pdf/unittest/tools"
  9. filename = "cli_test_01"
  10. os.makedirs(unitest_dir, exist_ok=True)
  11. temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")
  12. # run
  13. runner = CliRunner()
  14. result = runner.invoke(
  15. cli,
  16. [
  17. "-p",
  18. "tests/test_tools/assets/cli/pdf/cli_test_01.pdf",
  19. "-o",
  20. temp_output_dir,
  21. ],
  22. )
  23. # check
  24. assert result.exit_code == 0
  25. base_output_dir = os.path.join(temp_output_dir, "cli_test_01/auto")
  26. r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
  27. assert r.st_size > 7000
  28. r = os.stat(os.path.join(base_output_dir, "middle.json"))
  29. assert r.st_size > 200000
  30. r = os.stat(os.path.join(base_output_dir, "model.json"))
  31. assert r.st_size > 15000
  32. r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
  33. assert r.st_size > 500000
  34. r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
  35. assert r.st_size > 500000
  36. r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
  37. assert r.st_size > 500000
  38. assert os.path.exists(os.path.join(base_output_dir, "images")) is True
  39. assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
  40. assert os.path.exists(os.path.join(base_output_dir, "content_list.json")) is False
  41. # teardown
  42. shutil.rmtree(temp_output_dir)
  43. def test_cli_path():
  44. # setup
  45. unitest_dir = "/tmp/magic_pdf/unittest/tools"
  46. os.makedirs(unitest_dir, exist_ok=True)
  47. temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")
  48. # run
  49. runner = CliRunner()
  50. result = runner.invoke(
  51. cli, ["-p", "tests/test_tools/assets/cli/path", "-o", temp_output_dir]
  52. )
  53. # check
  54. assert result.exit_code == 0
  55. filename = "cli_test_01"
  56. base_output_dir = os.path.join(temp_output_dir, "cli_test_01/auto")
  57. r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
  58. assert r.st_size > 7000
  59. r = os.stat(os.path.join(base_output_dir, "middle.json"))
  60. assert r.st_size > 200000
  61. r = os.stat(os.path.join(base_output_dir, "model.json"))
  62. assert r.st_size > 15000
  63. r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
  64. assert r.st_size > 500000
  65. r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
  66. assert r.st_size > 500000
  67. r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
  68. assert r.st_size > 500000
  69. assert os.path.exists(os.path.join(base_output_dir, "images")) is True
  70. assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
  71. assert os.path.exists(os.path.join(base_output_dir, "content_list.json")) is False
  72. base_output_dir = os.path.join(temp_output_dir, "cli_test_02/auto")
  73. filename = "cli_test_02"
  74. r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
  75. assert r.st_size > 5000
  76. r = os.stat(os.path.join(base_output_dir, "middle.json"))
  77. assert r.st_size > 200000
  78. r = os.stat(os.path.join(base_output_dir, "model.json"))
  79. assert r.st_size > 15000
  80. r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
  81. assert r.st_size > 500000
  82. r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
  83. assert r.st_size > 500000
  84. r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
  85. assert r.st_size > 500000
  86. assert os.path.exists(os.path.join(base_output_dir, "images")) is True
  87. assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
  88. assert os.path.exists(os.path.join(base_output_dir, "content_list.json")) is False
  89. # teardown
  90. shutil.rmtree(temp_output_dir)