test_unit.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. import pytest
  2. from magic_pdf.libs.boxbase import _is_in_or_part_overlap, _is_in_or_part_overlap_with_area_ratio, _is_in, \
  3. _is_part_overlap, _left_intersect, _right_intersect, _is_vertical_full_overlap, _is_bottom_full_overlap, \
  4. _is_left_overlap, __is_overlaps_y_exceeds_threshold, calculate_iou, calculate_overlap_area_2_minbox_area_ratio, \
  5. calculate_overlap_area_in_bbox1_area_ratio, get_minbox_if_overlap_by_ratio
  6. from magic_pdf.libs.commons import mymax, join_path, get_top_percent_list
  7. from magic_pdf.libs.path_utils import parse_s3path
  8. # 输入一个列表,如果列表空返回0,否则返回最大元素
  9. @pytest.mark.parametrize("list_input, target_num",
  10. [
  11. # ([0, 0, 0, 0], 0),
  12. # ([0], 0),
  13. # ([1, 2, 5, 8, 4], 8),
  14. # ([], 0),
  15. # ([1.1, 7.6, 1.009, 9.9], 9.9),
  16. ([1.0 * 10 ** 2, 3.5 * 10 ** 3, 0.9 * 10 ** 6], 0.9 * 10 ** 6),
  17. ])
  18. def test_list_max(list_input: list, target_num) -> None:
  19. """
  20. list_input: 输入列表元素,元素均为数字类型
  21. """
  22. assert target_num == mymax(list_input)
  23. # 连接多个参数生成路径信息,使用"/"作为连接符,生成的结果需要是一个合法路径
  24. @pytest.mark.parametrize("path_input, target_path", [
  25. # (['https:', '', 'www.baidu.com'], 'https://www.baidu.com'),
  26. # (['https:', 'www.baidu.com'], 'https:/www.baidu.com'),
  27. (['D:', 'file', 'pythonProject', 'demo' + '.py'], 'D:/file/pythonProject/demo.py'),
  28. ])
  29. def test_join_path(path_input: list, target_path: str) -> None:
  30. """
  31. path_input: 输入path的列表,列表元素均为字符串
  32. """
  33. assert target_path == join_path(*path_input)
  34. # 获取列表中前百分之多少的元素
  35. @pytest.mark.parametrize("num_list, percent, target_num_list", [
  36. # ([], 0.75, []),
  37. # ([-5, -10, 9, 3, 7, -7, 0, 23, -1, -11], 0.8, [23, 9, 7, 3, 0, -1, -5, -7]),
  38. # ([-5, -10, 9, 3, 7, -7, 0, 23, -1, -11], 0, []),
  39. ([-5, -10, 9, 3, 7, -7, 0, 23, -1, -11, 28], 0.8, [28, 23, 9, 7, 3, 0, -1, -5])
  40. ])
  41. def test_get_top_percent_list(num_list: list, percent: float, target_num_list: list) -> None:
  42. """
  43. num_list: 数字列表,列表元素为数字
  44. percent: 占比,float, 向下取证
  45. """
  46. assert target_num_list == get_top_percent_list(num_list, percent)
  47. # 输入一个s3路径,返回bucket名字和其余部分(key)
  48. @pytest.mark.parametrize("s3_path, target_data", [
  49. # ("s3://bucket/path/to/my/file.txt", "bucket"),
  50. # ("/path/to/my/file1.txt", "path"),
  51. ("bucket/path/to/my/file2.txt", "bucket"),
  52. # ("file2.txt", "False")
  53. ])
  54. def test_parse_s3path(s3_path: str, target_data: str):
  55. """
  56. s3_path: s3路径
  57. 如果为无效路径,则返回对应的bucket名字和其余部分
  58. 如果为异常路径 例如:file2.txt,则报异常
  59. """
  60. out_keys = parse_s3path(s3_path)
  61. assert target_data == out_keys[0]
  62. # 2个box是否处于包含或者部分重合关系。
  63. # 如果某边界重合算重合。
  64. # 部分边界重合,其他在内部也算包含
  65. @pytest.mark.parametrize("box1, box2, target_bool", [
  66. ((120, 133, 223, 248), (128, 168, 269, 295), True),
  67. # ((137, 53, 245, 157), (134, 11, 200, 147), True), # 部分重合
  68. # ((137, 56, 211, 116), (140, 66, 202, 199), True), # 部分重合
  69. # ((42, 34, 69, 65), (42, 34, 69, 65), True), # 部分重合
  70. # ((39, 63, 87, 106), (37, 66, 85, 109), True), # 部分重合
  71. # ((13, 37, 55, 66), (7, 46, 49, 75), True), # 部分重合
  72. # ((56, 83, 85, 104), (64, 85, 93, 106), True), # 部分重合
  73. # ((12, 53, 48, 94), (14, 53, 50, 94), True), # 部分重合
  74. # ((43, 54, 93, 131), (55, 82, 77, 106), True), # 包含
  75. # ((63, 2, 134, 71), (72, 43, 104, 78), True), # 包含
  76. # ((25, 57, 109, 127), (26, 73, 49, 95), True), # 包含
  77. # ((24, 47, 111, 115), (34, 81, 58, 106), True), # 包含
  78. # ((34, 8, 105, 83), (76, 20, 116, 45), True), # 包含
  79. ])
  80. def test_is_in_or_part_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
  81. """
  82. box1: 坐标数组
  83. box2: 坐标数组
  84. """
  85. assert target_bool == _is_in_or_part_overlap(box1, box2)
  86. # 如果box1在box2内部,返回True
  87. # 如果是部分重合的,则重合面积占box1的比例大于阈值时候返回True
  88. @pytest.mark.parametrize("box1, box2, target_bool", [
  89. # ((35, 28, 108, 90), (47, 60, 83, 96), True), # 包含 box1 up box2, box2 多半,box1少半
  90. # ((65, 151, 92, 177), (49, 99, 105, 198), True), # 包含 box1 in box2
  91. # ((80, 62, 112, 84), (74, 40, 144, 111), True), # 包含 box1 in box2
  92. # ((65, 88, 127, 144), (92, 102, 131, 139), False), # 包含 box2 多半,box1约一半
  93. # ((92, 102, 131, 139), (65, 88, 127, 144), True), # 包含 box1 多半
  94. # ((100, 93, 199, 168), (169, 126, 198, 165), False), # 包含 box2 in box1
  95. # ((26, 75, 106, 172), (65, 108, 90, 128), False), # 包含 box2 in box1
  96. # ((28, 90, 77, 126), (35, 84, 84, 120), True), # 相交 box1多半,box2多半
  97. # ((37, 6, 69, 52), (28, 3, 60, 49), True), # 相交 box1多半,box2多半
  98. ((94, 29, 133, 60), (84, 30, 123, 61), True), # 相交 box1多半,box2多半
  99. ])
  100. def test_is_in_or_part_overlap_with_area_ratio(box1: tuple, box2: tuple, target_bool: bool) -> None:
  101. out_bool = _is_in_or_part_overlap_with_area_ratio(box1, box2)
  102. assert target_bool == out_bool
  103. # box1在box2内部或者box2在box1内部返回True。如果部分边界重合也算作包含。
  104. @pytest.mark.parametrize("box1, box2, target_bool", [
  105. # ((), (), False),
  106. # ((65, 151, 92, 177), (49, 99, 105, 198), True), # 包含 box1 in box2
  107. # ((80, 62, 112, 84), (74, 40, 144, 111), True), # 包含 box1 in box2
  108. # ((76, 140, 154, 277), (121, 326, 192, 384), False), # 分离
  109. # ((65, 88, 127, 144), (92, 102, 131, 139), False), # 包含 box2 多半,box1约一半
  110. # ((92, 102, 131, 139), (65, 88, 127, 144), False), # 包含 box1 多半
  111. # ((68, 94, 118, 120), (68, 90, 118, 122), True), # 包含,box1 in box2 两边x相切
  112. # ((69, 94, 118, 120), (68, 90, 118, 122), True), # 包含,box1 in box2 一边x相切
  113. ((69, 114, 118, 122), (68, 90, 118, 122), True), # 包含,box1 in box2 一边y相切
  114. # ((100, 93, 199, 168), (169, 126, 198, 165), True), # 包含 box2 in box1 Error
  115. # ((26, 75, 106, 172), (65, 108, 90, 128), True), # 包含 box2 in box1 Error
  116. # ((38, 94, 122, 120), (68, 94, 118, 120), True), # 包含,box2 in box1 两边y相切 Error
  117. # ((68, 34, 118, 158), (68, 94, 118, 120), True), # 包含,box2 in box1 两边x相切 Error
  118. # ((68, 34, 118, 158), (68, 94, 84, 120), True), # 包含,box2 in box1 一边x相切 Error
  119. # ((27, 94, 118, 158), (68, 94, 84, 120), True), # 包含,box2 in box1 一边y相切 Error
  120. ])
  121. def test_is_in(box1: tuple, box2: tuple, target_bool: bool) -> None:
  122. assert target_bool == _is_in(box1, box2)
  123. # 仅仅是部分包含关系,返回True,如果是完全包含关系则返回False
  124. @pytest.mark.parametrize("box1, box2, target_bool", [
  125. ((65, 151, 92, 177), (49, 99, 105, 198), False), # 包含 box1 in box2
  126. # ((80, 62, 112, 84), (74, 40, 144, 111), False), # 包含 box1 in box2
  127. # ((76, 140, 154, 277), (121, 326, 192, 384), False), # 分离 Error
  128. # ((76, 140, 154, 277), (121, 277, 192, 384), True), # 外相切
  129. # ((65, 88, 127, 144), (92, 102, 131, 139), True), # 包含 box2 多半,box1约一半
  130. # ((92, 102, 131, 139), (65, 88, 127, 144), True), # 包含 box1 多半
  131. # ((68, 94, 118, 120), (68, 90, 118, 122), False), # 包含,box1 in box2 两边x相切
  132. # ((69, 94, 118, 120), (68, 90, 118, 122), False), # 包含,box1 in box2 一边x相切
  133. # ((69, 114, 118, 122), (68, 90, 118, 122), False), # 包含,box1 in box2 一边y相切
  134. # ((26, 75, 106, 172), (65, 108, 90, 128), False), # 包含 box2 in box1 Error
  135. # ((38, 94, 122, 120), (68, 94, 118, 120), False), # 包含,box2 in box1 两边y相切 Error
  136. # ((68, 34, 118, 158), (68, 94, 84, 120), False), # 包含,box2 in box1 一边x相切 Error
  137. ])
  138. def test_is_part_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
  139. assert target_bool == _is_part_overlap(box1, box2)
  140. # left_box右侧是否和right_box左侧有部分重叠
  141. @pytest.mark.parametrize("box1, box2, target_bool", [
  142. # (None, None, False),
  143. # ((88, 81, 222, 173), (60, 221, 123, 358), False), # 分离
  144. # ((121, 149, 184, 289), (172, 130, 230, 268), True), # box1 left bottom box2 相交
  145. # ((172, 130, 230, 268),(121, 149, 184, 289), False), # box2 left bottom box1 相交
  146. # ((109, 68, 182, 146), (215, 188, 277, 253), False), # box1 top left box2 分离
  147. # ((117, 53, 222, 176), (174, 142, 298, 276), True), # box1 left top box2 相交
  148. # ((174, 142, 298, 276), (117, 53, 222, 176), False), # box2 left top box1 相交
  149. # ((65, 88, 127, 144), (92, 102, 131, 139), True), # box1 left box2 y:box2 in box1
  150. # ((92, 102, 131, 139), (65, 88, 127, 144), False), # box2 left box1 y:box1 in box2
  151. # ((182, 130, 230, 268), (121, 149, 174, 289), False), # box2 left box1 分离
  152. ((1, 10, 26, 45), (3, 4, 20, 39), True), # box1 bottom box2 x:box2 in box1
  153. ])
  154. def test_left_intersect(box1: tuple, box2: tuple, target_bool: bool) -> None:
  155. assert target_bool == _left_intersect(box1, box2)
  156. # left_box左侧是否和right_box右侧部分重叠
  157. @pytest.mark.parametrize("box1, box2, target_bool", [
  158. # (None, None, False),
  159. # ((88, 81, 222, 173), (60, 221, 123, 358), False), # 分离
  160. # ((121, 149, 184, 289), (172, 130, 230, 268), False), # box1 left bottom box2 相交
  161. # ((172, 130, 230, 268), (121, 149, 184, 289), True), # box2 left bottom box1 相交
  162. # ((109, 68, 182, 146), (215, 188, 277, 253), False), # box1 top left box2 分离
  163. # ((117, 53, 222, 176), (174, 142, 298, 276), False), # box1 left top box2 相交
  164. # ((174, 142, 298, 276), (117, 53, 222, 176), True), # box2 left top box1 相交
  165. # ((65, 88, 127, 144), (92, 102, 131, 139), False), # box1 left box2 y:box2 in box1
  166. # ((92, 102, 131, 139), (65, 88, 127, 144), True), # box2 left box1 y:box1 in box2 Error
  167. ((182, 130, 230, 268), (121, 149, 174, 289), False), # box2 left box1 分离
  168. # ((1, 10, 26, 45), (3, 4, 20, 39), False), # box1 bottom box2 x:box2 in box1 Error
  169. ])
  170. def test_right_intersect(box1: tuple, box2: tuple, target_bool: bool) -> None:
  171. assert target_bool == _right_intersect(box1, box2)
  172. # x方向上:要么box1包含box2, 要么box2包含box1。不能部分包含
  173. # y方向上:box1和box2有重叠
  174. @pytest.mark.parametrize("box1, box2, target_bool", [
  175. # (None, None, False), # Error
  176. # ((35, 28, 108, 90), (47, 60, 83, 96), True), # box1 top box2, x:box2 in box1, y:有重叠
  177. # ((35, 28, 98, 90), (27, 60, 103, 96), True), # box1 top box2, x:box1 in box2, y:有重叠
  178. # ((57, 77, 130, 210), (59, 219, 119, 293), False), # box1 top box2, x: box2 in box1, y:无重叠
  179. # ((47, 60, 83, 96),(35, 28, 108, 90), True), # box2 top box1, x:box1 in box2, y:有重叠
  180. # ((27, 60, 103, 96), (35, 28, 98, 90), True), # box2 top box1, x:box2 in box1, y:有重叠
  181. # ((59, 219, 119, 293), (57, 77, 130, 210), False), # box2 top box1, x: box1 in box2, y:无重叠
  182. # ((35, 28, 55, 90), (57, 60, 83, 96), False), # box1 top box2, x:无重叠, y:有重叠
  183. ((47, 60, 63, 96), (65, 28, 108, 90), False), # box2 top box1, x:无重叠, y:有重叠
  184. ])
  185. def test_is_vertical_full_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
  186. assert target_bool == _is_vertical_full_overlap(box1, box2)
  187. # 检查box1下方和box2的上方有轻微的重叠,轻微程度收到y_tolerance的限制
  188. @pytest.mark.parametrize("box1, box2, target_bool", [
  189. # (None, None, False),
  190. # ((35, 28, 108, 90), (47, 89, 83, 116), True), # box1 top box2, y:有重叠
  191. # ((35, 28, 108, 90), (47, 60, 83, 96), False), # box1 top box2, y:有重叠且过多
  192. # ((57, 77, 130, 210), (59, 219, 119, 293), False), # box1 top box2, y:无重叠
  193. # ((47, 60, 83, 96), (35, 28, 108, 90), False), # box2 top box1, y:有重叠且过多
  194. # ((27, 89, 103, 116), (35, 28, 98, 90), False), # box2 top box1, y:有重叠
  195. ((59, 219, 119, 293), (57, 77, 130, 210), False), # box2 top box1, y:无重叠
  196. ])
  197. def test_is_bottom_full_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
  198. assert target_bool == _is_bottom_full_overlap(box1, box2)
  199. # 检查box1的左侧是否和box2有重叠
  200. @pytest.mark.parametrize("box1, box2, target_bool", [
  201. # (None, None, False),
  202. # ((88, 81, 222, 173), (60, 221, 123, 358), False), # 分离
  203. # ((121, 149, 184, 289), (172, 130, 230, 268), False), # box1 left bottom box2 相交 Error
  204. # ((172, 130, 230, 268), (121, 149, 184, 289), True), # box2 left bottom box1 相交 Error
  205. # ((109, 68, 182, 146), (215, 188, 277, 253), False), # box1 top left box2 分离
  206. # ((117, 53, 222, 176), (174, 142, 298, 276), False), # box1 left top box2 相交
  207. # ((174, 142, 298, 276), (117, 53, 222, 176), True), # box2 left top box1 相交 Error
  208. # ((65, 88, 127, 144), (92, 102, 131, 139), False), # box1 left box2 y:box2 in box1 Error
  209. ((1, 10, 26, 45), (3, 4, 20, 39), True), # box1 middle bottom box2 x:box2 in box1
  210. ])
  211. def test_is_left_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
  212. assert target_bool == _is_left_overlap(box1, box2)
  213. # 查两个bbox在y轴上是否有重叠,并且该重叠区域的高度占两个bbox高度更低的那个超过阈值
  214. @pytest.mark.parametrize("box1, box2, target_bool", [
  215. # (None, None, "Error"), # Error
  216. # ((51, 69, 192, 147), (75, 48, 132, 187), True), # y: box1 in box2
  217. # ((51, 39, 192, 197), (75, 48, 132, 187), True), # y: box2 in box1
  218. # ((88, 81, 222, 173), (60, 221, 123, 358), False), # y: box1 top box2
  219. # ((109, 68, 182, 196), (215, 188, 277, 253), False), # y: box1 top box2 little
  220. # ((109, 68, 182, 196), (215, 78, 277, 253), True), # y: box1 top box2 more
  221. # ((109, 68, 182, 196), (215, 138, 277, 213), False), # y: box1 top box2 more but lower overlap_ratio_threshold
  222. ((109, 68, 182, 196), (215, 138, 277, 203), True), # y: box1 top box2 more and more overlap_ratio_threshold
  223. ])
  224. def test_is_overlaps_y_exceeds_threshold(box1: tuple, box2: tuple, target_bool: bool) -> None:
  225. assert target_bool == __is_overlaps_y_exceeds_threshold(box1, box2)
  226. # Determine the coordinates of the intersection rectangle
  227. @pytest.mark.parametrize("box1, box2, target_num", [
  228. # (None, None, "Error"), # Error
  229. # ((88, 81, 222, 173), (60, 221, 123, 358), 0.0), # 分离
  230. # ((76, 140, 154, 277), (121, 326, 192, 384), 0.0), # 分离
  231. # ((142, 109, 238, 164), (134, 211, 224, 270), 0.0) # 分离
  232. # ((109, 68, 182, 196), (175, 138, 277, 213), 0.024475524475524476) # 相交
  233. # ((56, 90, 170, 219), (103, 212, 171, 304), 0.02288586346557361), # 相交
  234. # ((109, 126, 204, 245), (130, 127, 232, 186), 0.33696071621517326), # 相交
  235. # ((109, 126, 204, 245), (110, 127, 232, 206), 0.5493822593770807), # 相交
  236. ((76, 140, 154, 277), (121, 277, 192, 384), 0.0) # 相切
  237. ])
  238. def test_calculate_iou(box1: tuple, box2: tuple, target_num: float) -> None:
  239. assert target_num == calculate_iou(box1, box2)
  240. # 计算box1和box2的重叠面积占最小面积的box的比例
  241. @pytest.mark.parametrize("box1, box2, target_num", [
  242. # (None, None, "Error"), # Error
  243. # ((142, 109, 238, 164), (134, 211, 224, 270), 0.0), # 分离
  244. ((88, 81, 222, 173), (60, 221, 123, 358), 0.0), # 分离
  245. # ((76, 140, 154, 277), (121, 326, 192, 384), 0.0), # 分离
  246. # ((76, 140, 154, 277), (121, 277, 192, 384), 0.0), # 相切
  247. # ((109, 126, 204, 245), (110, 127, 232, 206), 0.7704918032786885), # 相交
  248. # ((56, 90, 170, 219), (103, 212, 171, 304), 0.07496803069053709), # 相交
  249. # ((121, 149, 184, 289), (172, 130, 230, 268), 0.17841079460269865), # 相交
  250. # ((51, 69, 192, 147), (75, 48, 132, 187), 0.5611510791366906), # 相交
  251. # ((117, 53, 222, 176), (174, 142, 298, 276), 0.12636469221835075), # 相交
  252. # ((102, 60, 233, 203), (70, 190, 220, 319), 0.08188757807078417), # 相交
  253. # ((109, 126, 204, 245), (130, 127, 232, 186), 0.7254901960784313), # 相交
  254. ])
  255. def test_calculate_overlap_area_2_minbox_area_ratio(box1: tuple, box2: tuple, target_num: float) -> None:
  256. assert target_num == calculate_overlap_area_2_minbox_area_ratio(box1, box2)
  257. # 计算box1和box2的重叠面积占bbox1的比例
  258. @pytest.mark.parametrize("box1, box2, target_num", [
  259. # (None, None, "Error"), # Error
  260. # ((142, 109, 238, 164), (134, 211, 224, 270), 0.0), # 分离
  261. # ((88, 81, 222, 173), (60, 221, 123, 358), 0.0), # 分离
  262. # ((76, 140, 154, 277), (121, 326, 192, 384), 0.0), # 分离
  263. # ((76, 140, 154, 277), (121, 277, 192, 384), 0.0), # 相切
  264. # ((142, 109, 238, 164), (134, 164, 224, 270), 0.0), # 相切
  265. # ((109, 126, 204, 245), (110, 127, 232, 206), 0.6568774878372402), # 相交
  266. # ((56, 90, 170, 219), (103, 212, 171, 304), 0.03189174486604107), # 相交
  267. # ((121, 149, 184, 289), (172, 130, 230, 268), 0.1619047619047619), # 相交
  268. # ((51, 69, 192, 147), (75, 48, 132, 187), 0.40425531914893614), # 相交
  269. # ((117, 53, 222, 176), (174, 142, 298, 276), 0.12636469221835075), # 相交
  270. # ((102, 60, 233, 203), (70, 190, 220, 319), 0.08188757807078417), # 相交
  271. ((109, 126, 204, 245), (130, 127, 232, 186), 0.38620079610791685), # 相交
  272. ])
  273. def test_calculate_overlap_area_in_bbox1_area_ratio(box1: tuple, box2: tuple, target_num: float) -> None:
  274. assert target_num == calculate_overlap_area_in_bbox1_area_ratio(box1, box2)
  275. # 计算两个bbox重叠的面积占最小面积的box的比例,如果比例大于ratio,则返回小的那个bbox,否则返回None
  276. @pytest.mark.parametrize("box1, box2, ratio, target_box", [
  277. # (None, None, 0.8, "Error"), # Error
  278. ((142, 109, 238, 164), (134, 211, 224, 270), 0.0, None), # 分离
  279. # ((109, 126, 204, 245), (110, 127, 232, 206), 0.5, (110, 127, 232, 206)),
  280. # ((56, 90, 170, 219), (103, 212, 171, 304), 0.5, None),
  281. # ((121, 149, 184, 289), (172, 130, 230, 268), 0.5, None),
  282. # ((51, 69, 192, 147), (75, 48, 132, 187), 0.5, (75, 48, 132, 187)),
  283. # ((117, 53, 222, 176), (174, 142, 298, 276), 0.5, None),
  284. # ((102, 60, 233, 203), (70, 190, 220, 319), 0.5, None),
  285. # ((109, 126, 204, 245), (130, 127, 232, 186), 0.5, (130, 127, 232, 186)),
  286. ])
  287. def test_get_minbox_if_overlap_by_ratio(box1: tuple, box2: tuple, ratio: float, target_box: list) -> None:
  288. assert target_box == get_minbox_if_overlap_by_ratio(box1, box2, ratio)