test_pdf2text_recogPara_BlockInnerParasProcessor.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. import unittest
  2. from magic_pdf.post_proc.detect_para import BlockTerminationProcessor
  3. # from ... pdf2text_recogPara import BlockInnerParasProcessor # another way to import
  4. """
  5. Execute the following command to run the test under directory code-clean:
  6. python -m tests.test_para.test_pdf2text_recogPara_ClassName
  7. or
  8. pytest -v -s app/pdf_toolbox/tests/test_para/test_pdf2text_recogPara_BlockInnerParasProcessor.py
  9. """
  10. class TestIsConsistentLines(unittest.TestCase):
  11. def setUp(self):
  12. self.obj = BlockTerminationProcessor()
  13. def test_consistent_with_prev_line(self):
  14. curr_line = {"spans": [{"size": 12, "font": "Arial"}]}
  15. prev_line = {"spans": [{"size": 12, "font": "Arial"}]}
  16. next_line = None
  17. consistent_direction = 0
  18. result = self.obj._is_consistent_lines(curr_line, prev_line, next_line, consistent_direction)
  19. self.assertTrue(result)
  20. def test_consistent_with_next_line(self):
  21. curr_line = {"spans": [{"size": 12, "font": "Arial"}]}
  22. prev_line = None
  23. next_line = {"spans": [{"size": 12, "font": "Arial"}]}
  24. consistent_direction = 1
  25. result = self.obj._is_consistent_lines(curr_line, prev_line, next_line, consistent_direction)
  26. self.assertTrue(result)
  27. def test_consistent_with_both_lines(self):
  28. curr_line = {"spans": [{"size": 12, "font": "Arial"}]}
  29. prev_line = {"spans": [{"size": 12, "font": "Arial"}]}
  30. next_line = {"spans": [{"size": 12, "font": "Arial"}]}
  31. consistent_direction = 2
  32. result = self.obj._is_consistent_lines(curr_line, prev_line, next_line, consistent_direction)
  33. self.assertTrue(result)
  34. def test_inconsistent_with_prev_line(self):
  35. curr_line = {"spans": [{"size": 12, "font": "Arial"}]}
  36. prev_line = {"spans": [{"size": 14, "font": "Arial"}]}
  37. next_line = None
  38. consistent_direction = 0
  39. result = self.obj._is_consistent_lines(curr_line, prev_line, next_line, consistent_direction)
  40. self.assertFalse(result)
  41. def test_inconsistent_with_next_line(self):
  42. curr_line = {"spans": [{"size": 12, "font": "Arial"}]}
  43. prev_line = None
  44. next_line = {"spans": [{"size": 14, "font": "Arial"}]}
  45. consistent_direction = 1
  46. result = self.obj._is_consistent_lines(curr_line, prev_line, next_line, consistent_direction)
  47. self.assertFalse(result)
  48. def test_inconsistent_with_both_lines(self):
  49. curr_line = {"spans": [{"size": 12, "font": "Arial"}]}
  50. prev_line = {"spans": [{"size": 14, "font": "Arial"}]}
  51. next_line = {"spans": [{"size": 14, "font": "Arial"}]}
  52. consistent_direction = 2
  53. result = self.obj._is_consistent_lines(curr_line, prev_line, next_line, consistent_direction)
  54. self.assertFalse(result)
  55. def test_invalid_consistent_direction(self):
  56. curr_line = {"spans": [{"size": 12, "font": "Arial"}]}
  57. prev_line = None
  58. next_line = None
  59. consistent_direction = 3
  60. result = self.obj._is_consistent_lines(curr_line, prev_line, next_line, consistent_direction)
  61. self.assertFalse(result)
  62. def test_possible_start_of_para(self):
  63. curr_line = {"bbox": (0, 0, 100, 10)}
  64. prev_line = {"bbox": (0, 20, 100, 30)}
  65. next_line = {"bbox": (0, 40, 100, 50)}
  66. X0 = 0
  67. X1 = 100
  68. avg_char_width = 5
  69. avg_font_size = 10
  70. result, _, _ = self.obj._is_possible_start_of_para(
  71. curr_line, prev_line, next_line, X0, X1, avg_char_width, avg_font_size
  72. )
  73. self.assertTrue(result)
  74. def test_not_possible_start_of_para(self):
  75. curr_line = {"bbox": (0, 0, 100, 10)}
  76. prev_line = {"bbox": (0, 20, 100, 30)}
  77. next_line = {"bbox": (0, 40, 100, 50)}
  78. X0 = 0
  79. X1 = 100
  80. avg_char_width = 5
  81. avg_font_size = 10
  82. result, _, _ = self.obj._is_possible_start_of_para(curr_line, prev_line, next_line, X0, X1, avg_char_width, avg_font_size)
  83. self.assertTrue(result)
  84. if __name__ == "__main__":
  85. unittest.main()