test_pdf2text_recogPara_Common.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. import unittest
  2. from magic_pdf.post_proc.detect_para import (
  3. is_bbox_overlap,
  4. is_in_bbox,
  5. is_line_right_aligned_from_neighbors,
  6. is_line_left_aligned_from_neighbors,
  7. )
  8. # from ... pdf2text_recogPara import * # another way to import
  9. """
  10. Execute the following command to run the test under directory code-clean:
  11. python -m tests.test_para.test_pdf2text_recogPara_Common
  12. or
  13. pytest -v -s app/pdf_toolbox/tests/test_para/test_pdf2text_recogPara_Common.py
  14. """
  15. class TestIsBboxOverlap(unittest.TestCase):
  16. def test_overlap(self):
  17. bbox1 = [0, 0, 10, 10]
  18. bbox2 = [5, 5, 15, 15]
  19. result = is_bbox_overlap(bbox1, bbox2)
  20. self.assertTrue(result)
  21. def test_no_overlap(self):
  22. bbox1 = [0, 0, 10, 10]
  23. bbox2 = [11, 11, 15, 15]
  24. result = is_bbox_overlap(bbox1, bbox2)
  25. self.assertFalse(result)
  26. def test_partial_overlap(self):
  27. bbox1 = [0, 0, 10, 10]
  28. bbox2 = [5, 5, 15, 15]
  29. result = is_bbox_overlap(bbox1, bbox2)
  30. self.assertTrue(result)
  31. def test_same_bbox(self):
  32. bbox1 = [0, 0, 10, 10]
  33. bbox2 = [0, 0, 10, 10]
  34. result = is_bbox_overlap(bbox1, bbox2)
  35. self.assertTrue(result)
  36. # Test is_in_bbox function
  37. class TestIsInBbox(unittest.TestCase):
  38. def test_bbox1_in_bbox2(self):
  39. bbox1 = [0, 0, 10, 10]
  40. bbox2 = [0, 0, 20, 20]
  41. result = is_in_bbox(bbox1, bbox2)
  42. self.assertTrue(result)
  43. def test_bbox1_not_in_bbox2(self):
  44. bbox1 = [0, 0, 30, 30]
  45. bbox2 = [0, 0, 20, 20]
  46. result = is_in_bbox(bbox1, bbox2)
  47. self.assertFalse(result)
  48. def test_bbox1_equal_to_bbox2(self):
  49. bbox1 = [0, 0, 20, 20]
  50. bbox2 = [0, 0, 20, 20]
  51. result = is_in_bbox(bbox1, bbox2)
  52. self.assertTrue(result)
  53. def test_bbox1_partially_in_bbox2(self):
  54. bbox1 = [10, 10, 30, 30]
  55. bbox2 = [0, 0, 20, 20]
  56. result = is_in_bbox(bbox1, bbox2)
  57. self.assertFalse(result)
  58. # Test is_line_right_aligned_from_neighbors function
  59. class TestIsLineRightAlignedFromNeighbors(unittest.TestCase):
  60. def test_right_aligned_with_prev_line(self):
  61. curr_line_bbox = [0, 0, 100, 100]
  62. prev_line_bbox = [0, 0, 90, 100]
  63. next_line_bbox = None
  64. avg_char_width = 10
  65. direction = 0
  66. result = is_line_right_aligned_from_neighbors(curr_line_bbox, prev_line_bbox, next_line_bbox, avg_char_width, direction)
  67. self.assertFalse(result)
  68. def test_right_aligned_with_next_line(self):
  69. curr_line_bbox = [0, 0, 100, 100]
  70. prev_line_bbox = None
  71. next_line_bbox = [0, 0, 110, 100]
  72. avg_char_width = 10
  73. direction = 1
  74. result = is_line_right_aligned_from_neighbors(curr_line_bbox, prev_line_bbox, next_line_bbox, avg_char_width, direction)
  75. self.assertFalse(result)
  76. def test_right_aligned_with_both_lines(self):
  77. curr_line_bbox = [0, 0, 100, 100]
  78. prev_line_bbox = [0, 0, 90, 100]
  79. next_line_bbox = [0, 0, 110, 100]
  80. avg_char_width = 10
  81. direction = 2
  82. result = is_line_right_aligned_from_neighbors(curr_line_bbox, prev_line_bbox, next_line_bbox, avg_char_width, direction)
  83. self.assertFalse(result)
  84. def test_not_right_aligned_with_prev_line(self):
  85. curr_line_bbox = [0, 0, 100, 100]
  86. prev_line_bbox = [0, 0, 80, 100]
  87. next_line_bbox = None
  88. avg_char_width = 10
  89. direction = 0
  90. result = is_line_right_aligned_from_neighbors(curr_line_bbox, prev_line_bbox, next_line_bbox, avg_char_width, direction)
  91. self.assertFalse(result)
  92. def test_not_right_aligned_with_next_line(self):
  93. curr_line_bbox = [0, 0, 100, 100]
  94. prev_line_bbox = None
  95. next_line_bbox = [0, 0, 120, 100]
  96. avg_char_width = 10
  97. direction = 1
  98. result = is_line_right_aligned_from_neighbors(curr_line_bbox, prev_line_bbox, next_line_bbox, avg_char_width, direction)
  99. self.assertFalse(result)
  100. def test_not_right_aligned_with_both_lines(self):
  101. curr_line_bbox = [0, 0, 100, 100]
  102. prev_line_bbox = [0, 0, 80, 100]
  103. next_line_bbox = [0, 0, 120, 100]
  104. avg_char_width = 10
  105. direction = 2
  106. result = is_line_right_aligned_from_neighbors(curr_line_bbox, prev_line_bbox, next_line_bbox, avg_char_width, direction)
  107. self.assertFalse(result)
  108. def test_invalid_direction(self):
  109. curr_line_bbox = [0, 0, 100, 100]
  110. prev_line_bbox = None
  111. next_line_bbox = None
  112. avg_char_width = 10
  113. direction = 3
  114. result = is_line_right_aligned_from_neighbors(curr_line_bbox, prev_line_bbox, next_line_bbox, avg_char_width, direction)
  115. self.assertFalse(result)
  116. # Test is_line_left_aligned_from_neighbors function
  117. class TestIsLineLeftAlignedFromNeighbors(unittest.TestCase):
  118. def test_left_aligned_with_prev_line(self):
  119. curr_line_bbox = [10, 20, 30, 40]
  120. prev_line_bbox = [5, 20, 30, 40]
  121. next_line_bbox = None
  122. avg_char_width = 5.0
  123. direction = 0
  124. result = is_line_left_aligned_from_neighbors(curr_line_bbox, prev_line_bbox, next_line_bbox, avg_char_width, direction)
  125. self.assertFalse(result)
  126. def test_left_aligned_with_next_line(self):
  127. curr_line_bbox = [10, 20, 30, 40]
  128. prev_line_bbox = None
  129. next_line_bbox = [15, 20, 30, 40]
  130. avg_char_width = 5.0
  131. direction = 1
  132. result = is_line_left_aligned_from_neighbors(curr_line_bbox, prev_line_bbox, next_line_bbox, avg_char_width, direction)
  133. self.assertFalse(result)
  134. def test_left_aligned_with_both_lines(self):
  135. curr_line_bbox = [10, 20, 30, 40]
  136. prev_line_bbox = [5, 20, 30, 40]
  137. next_line_bbox = [15, 20, 30, 40]
  138. avg_char_width = 5.0
  139. direction = 2
  140. result = is_line_left_aligned_from_neighbors(curr_line_bbox, prev_line_bbox, next_line_bbox, avg_char_width, direction)
  141. self.assertFalse(result)
  142. def test_not_left_aligned_with_prev_line(self):
  143. curr_line_bbox = [10, 20, 30, 40]
  144. prev_line_bbox = [5, 20, 30, 40]
  145. next_line_bbox = None
  146. avg_char_width = 5.0
  147. direction = 0
  148. result = is_line_left_aligned_from_neighbors(curr_line_bbox, prev_line_bbox, next_line_bbox, avg_char_width, direction)
  149. self.assertFalse(result)
  150. def test_not_left_aligned_with_next_line(self):
  151. curr_line_bbox = [10, 20, 30, 40]
  152. prev_line_bbox = None
  153. next_line_bbox = [15, 20, 30, 40]
  154. avg_char_width = 5.0
  155. direction = 1
  156. result = is_line_left_aligned_from_neighbors(curr_line_bbox, prev_line_bbox, next_line_bbox, avg_char_width, direction)
  157. self.assertFalse(result)
  158. def test_not_left_aligned_with_both_lines(self):
  159. curr_line_bbox = [10, 20, 30, 40]
  160. prev_line_bbox = [5, 20, 30, 40]
  161. next_line_bbox = [15, 20, 30, 40]
  162. avg_char_width = 5.0
  163. direction = 2
  164. result = is_line_left_aligned_from_neighbors(curr_line_bbox, prev_line_bbox, next_line_bbox, avg_char_width, direction)
  165. self.assertFalse(result)
  166. def test_invalid_direction(self):
  167. curr_line_bbox = [10, 20, 30, 40]
  168. prev_line_bbox = None
  169. next_line_bbox = None
  170. avg_char_width = 5.0
  171. direction = 3
  172. result = is_line_left_aligned_from_neighbors(curr_line_bbox, prev_line_bbox, next_line_bbox, avg_char_width, direction)
  173. self.assertFalse(result)
  174. if __name__ == "__main__":
  175. unittest.main()