test_cli_dev.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. import tempfile
  2. import os
  3. import shutil
  4. from click.testing import CliRunner
  5. from magic_pdf.tools import cli_dev
  6. def test_cli_pdf():
  7. # setup
  8. unitest_dir = "/tmp/magic_pdf/unittest/tools"
  9. filename = "cli_test_01"
  10. os.makedirs(unitest_dir, exist_ok=True)
  11. temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")
  12. # run
  13. runner = CliRunner()
  14. result = runner.invoke(
  15. cli_dev.cli,
  16. [
  17. "pdf",
  18. "-p",
  19. "tests/test_tools/assets/cli/pdf/cli_test_01.pdf",
  20. "-j",
  21. "tests/test_tools/assets/cli_dev/cli_test_01.model.json",
  22. "-o",
  23. temp_output_dir,
  24. ],
  25. )
  26. # check
  27. assert result.exit_code == 0
  28. base_output_dir = os.path.join(temp_output_dir, "cli_test_01/auto")
  29. r = os.stat(os.path.join(base_output_dir, "content_list.json"))
  30. assert r.st_size > 5000
  31. r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
  32. assert r.st_size > 7000
  33. r = os.stat(os.path.join(base_output_dir, "middle.json"))
  34. assert r.st_size > 200000
  35. r = os.stat(os.path.join(base_output_dir, "model.json"))
  36. assert r.st_size > 15000
  37. r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
  38. assert r.st_size > 500000
  39. r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
  40. assert r.st_size > 500000
  41. r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
  42. assert r.st_size > 500000
  43. assert os.path.exists(os.path.join(base_output_dir, "images")) is True
  44. assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
  45. # teardown
  46. shutil.rmtree(temp_output_dir)
  47. def test_cli_jsonl():
  48. # setup
  49. unitest_dir = "/tmp/magic_pdf/unittest/tools"
  50. filename = "cli_test_01"
  51. os.makedirs(unitest_dir, exist_ok=True)
  52. temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")
  53. def mock_read_s3_path(s3path):
  54. with open(s3path, "rb") as f:
  55. return f.read()
  56. cli_dev.read_s3_path = mock_read_s3_path # mock
  57. # run
  58. runner = CliRunner()
  59. result = runner.invoke(
  60. cli_dev.cli,
  61. [
  62. "jsonl",
  63. "-j",
  64. "tests/test_tools/assets/cli_dev/cli_test_01.jsonl",
  65. "-o",
  66. temp_output_dir,
  67. ],
  68. )
  69. # check
  70. assert result.exit_code == 0
  71. base_output_dir = os.path.join(temp_output_dir, "cli_test_01/auto")
  72. r = os.stat(os.path.join(base_output_dir, "content_list.json"))
  73. assert r.st_size > 5000
  74. r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
  75. assert r.st_size > 7000
  76. r = os.stat(os.path.join(base_output_dir, "middle.json"))
  77. assert r.st_size > 200000
  78. r = os.stat(os.path.join(base_output_dir, "model.json"))
  79. assert r.st_size > 15000
  80. r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
  81. assert r.st_size > 500000
  82. r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
  83. assert r.st_size > 500000
  84. r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
  85. assert r.st_size > 500000
  86. assert os.path.exists(os.path.join(base_output_dir, "images")) is True
  87. assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
  88. # teardown
  89. shutil.rmtree(temp_output_dir)