test_unit.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. import pytest
  2. import os
  3. from magic_pdf.libs.boxbase import _is_in_or_part_overlap, _is_in_or_part_overlap_with_area_ratio, _is_in, \
  4. _is_part_overlap, _left_intersect, _right_intersect, _is_vertical_full_overlap, _is_bottom_full_overlap, \
  5. _is_left_overlap, __is_overlaps_y_exceeds_threshold, calculate_iou, calculate_overlap_area_2_minbox_area_ratio, \
  6. calculate_overlap_area_in_bbox1_area_ratio, get_minbox_if_overlap_by_ratio, get_bbox_in_boundry, \
  7. find_top_nearest_text_bbox, find_bottom_nearest_text_bbox, find_left_nearest_text_bbox, \
  8. find_right_nearest_text_bbox, bbox_relative_pos, bbox_distance
  9. from magic_pdf.libs.commons import mymax, join_path, get_top_percent_list
  10. from magic_pdf.libs.config_reader import get_s3_config
  11. from magic_pdf.libs.path_utils import parse_s3path
  12. # 输入一个列表,如果列表空返回0,否则返回最大元素
  13. @pytest.mark.parametrize("list_input, target_num",
  14. [
  15. ([0, 0, 0, 0], 0),
  16. ([0], 0),
  17. ([1, 2, 5, 8, 4], 8),
  18. ([], 0),
  19. ([1.1, 7.6, 1.009, 9.9], 9.9),
  20. ([1.0 * 10 ** 2, 3.5 * 10 ** 3, 0.9 * 10 ** 6], 0.9 * 10 ** 6),
  21. ])
  22. def test_list_max(list_input: list, target_num) -> None:
  23. """
  24. list_input: 输入列表元素,元素均为数字类型
  25. """
  26. assert target_num == mymax(list_input)
  27. # 连接多个参数生成路径信息,使用"/"作为连接符,生成的结果需要是一个合法路径
  28. @pytest.mark.parametrize("path_input, target_path", [
  29. (['https:', '', 'www.baidu.com'], 'https://www.baidu.com'),
  30. (['https:', 'www.baidu.com'], 'https:/www.baidu.com'),
  31. (['D:', 'file', 'pythonProject', 'demo' + '.py'], 'D:/file/pythonProject/demo.py'),
  32. ])
  33. def test_join_path(path_input: list, target_path: str) -> None:
  34. """
  35. path_input: 输入path的列表,列表元素均为字符串
  36. """
  37. assert target_path == join_path(*path_input)
  38. # 获取列表中前百分之多少的元素
  39. @pytest.mark.parametrize("num_list, percent, target_num_list", [
  40. ([], 0.75, []),
  41. ([-5, -10, 9, 3, 7, -7, 0, 23, -1, -11], 0.8, [23, 9, 7, 3, 0, -1, -5, -7]),
  42. ([-5, -10, 9, 3, 7, -7, 0, 23, -1, -11], 0, []),
  43. ([-5, -10, 9, 3, 7, -7, 0, 23, -1, -11, 28], 0.8, [28, 23, 9, 7, 3, 0, -1, -5])
  44. ])
  45. def test_get_top_percent_list(num_list: list, percent: float, target_num_list: list) -> None:
  46. """
  47. num_list: 数字列表,列表元素为数字
  48. percent: 占比,float, 向下取证
  49. """
  50. assert target_num_list == get_top_percent_list(num_list, percent)
  51. # 输入一个s3路径,返回bucket名字和其余部分(key)
  52. @pytest.mark.parametrize("s3_path, target_data", [
  53. ("s3://bucket/path/to/my/file.txt", "bucket"),
  54. ("s3a://bucket1/path/to/my/file2.txt", "bucket1"),
  55. # ("/path/to/my/file1.txt", "path"),
  56. # ("bucket/path/to/my/file2.txt", "bucket"),
  57. ])
  58. def test_parse_s3path(s3_path: str, target_data: str):
  59. """
  60. s3_path: s3路径
  61. 如果为无效路径,则返回对应的bucket名字和其余部分
  62. 如果为异常路径 例如:file2.txt,则报异常
  63. """
  64. bucket_name, key = parse_s3path(s3_path)
  65. assert target_data == bucket_name
  66. # 2个box是否处于包含或者部分重合关系。
  67. # 如果某边界重合算重合。
  68. # 部分边界重合,其他在内部也算包含
  69. @pytest.mark.parametrize("box1, box2, target_bool", [
  70. ((120, 133, 223, 248), (128, 168, 269, 295), True),
  71. ((137, 53, 245, 157), (134, 11, 200, 147), True), # 部分重合
  72. ((137, 56, 211, 116), (140, 66, 202, 199), True), # 部分重合
  73. ((42, 34, 69, 65), (42, 34, 69, 65), True), # 部分重合
  74. ((39, 63, 87, 106), (37, 66, 85, 109), True), # 部分重合
  75. ((13, 37, 55, 66), (7, 46, 49, 75), True), # 部分重合
  76. ((56, 83, 85, 104), (64, 85, 93, 106), True), # 部分重合
  77. ((12, 53, 48, 94), (14, 53, 50, 94), True), # 部分重合
  78. ((43, 54, 93, 131), (55, 82, 77, 106), True), # 包含
  79. ((63, 2, 134, 71), (72, 43, 104, 78), True), # 包含
  80. ((25, 57, 109, 127), (26, 73, 49, 95), True), # 包含
  81. ((24, 47, 111, 115), (34, 81, 58, 106), True), # 包含
  82. ((34, 8, 105, 83), (76, 20, 116, 45), True), # 包含
  83. ])
  84. def test_is_in_or_part_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
  85. """
  86. box1: 坐标数组
  87. box2: 坐标数组
  88. """
  89. assert target_bool == _is_in_or_part_overlap(box1, box2)
  90. # 如果box1在box2内部,返回True
  91. # 如果是部分重合的,则重合面积占box1的比例大于阈值时候返回True
  92. @pytest.mark.parametrize("box1, box2, target_bool", [
  93. ((35, 28, 108, 90), (47, 60, 83, 96), False), # 包含 box1 up box2, box2 多半,box1少半
  94. ((65, 151, 92, 177), (49, 99, 105, 198), True), # 包含 box1 in box2
  95. ((80, 62, 112, 84), (74, 40, 144, 111), True), # 包含 box1 in box2
  96. ((65, 88, 127, 144), (92, 102, 131, 139), False), # 包含 box2 多半,box1约一半
  97. ((92, 102, 131, 139), (65, 88, 127, 144), True), # 包含 box1 多半
  98. ((100, 93, 199, 168), (169, 126, 198, 165), False), # 包含 box2 in box1
  99. ((26, 75, 106, 172), (65, 108, 90, 128), False), # 包含 box2 in box1
  100. ((28, 90, 77, 126), (35, 84, 84, 120), True), # 相交 box1多半,box2多半
  101. ((37, 6, 69, 52), (28, 3, 60, 49), True), # 相交 box1多半,box2多半
  102. ((94, 29, 133, 60), (84, 30, 123, 61), True), # 相交 box1多半,box2多半
  103. ])
  104. def test_is_in_or_part_overlap_with_area_ratio(box1: tuple, box2: tuple, target_bool: bool) -> None:
  105. out_bool = _is_in_or_part_overlap_with_area_ratio(box1, box2)
  106. assert target_bool == out_bool
  107. # box1在box2内部或者box2在box1内部返回True。如果部分边界重合也算作包含。
  108. @pytest.mark.parametrize("box1, box2, target_bool", [
  109. # ((), (), "Error"), # Error
  110. ((65, 151, 92, 177), (49, 99, 105, 198), True), # 包含 box1 in box2
  111. ((80, 62, 112, 84), (74, 40, 144, 111), True), # 包含 box1 in box2
  112. ((76, 140, 154, 277), (121, 326, 192, 384), False), # 分离
  113. ((65, 88, 127, 144), (92, 102, 131, 139), False), # 包含 box2 多半,box1约一半
  114. ((92, 102, 131, 139), (65, 88, 127, 144), False), # 包含 box1 多半
  115. ((68, 94, 118, 120), (68, 90, 118, 122), True), # 包含,box1 in box2 两边x相切
  116. ((69, 94, 118, 120), (68, 90, 118, 122), True), # 包含,box1 in box2 一边x相切
  117. ((69, 114, 118, 122), (68, 90, 118, 122), True), # 包含,box1 in box2 一边y相切
  118. # ((100, 93, 199, 168), (169, 126, 198, 165), True), # 包含 box2 in box1 Error
  119. # ((26, 75, 106, 172), (65, 108, 90, 128), True), # 包含 box2 in box1 Error
  120. # ((38, 94, 122, 120), (68, 94, 118, 120), True), # 包含,box2 in box1 两边y相切 Error
  121. # ((68, 34, 118, 158), (68, 94, 118, 120), True), # 包含,box2 in box1 两边x相切 Error
  122. # ((68, 34, 118, 158), (68, 94, 84, 120), True), # 包含,box2 in box1 一边x相切 Error
  123. # ((27, 94, 118, 158), (68, 94, 84, 120), True), # 包含,box2 in box1 一边y相切 Error
  124. ])
  125. def test_is_in(box1: tuple, box2: tuple, target_bool: bool) -> None:
  126. assert target_bool == _is_in(box1, box2)
  127. # 仅仅是部分包含关系,返回True,如果是完全包含关系则返回False
  128. @pytest.mark.parametrize("box1, box2, target_bool", [
  129. ((65, 151, 92, 177), (49, 99, 105, 198), False), # 包含 box1 in box2
  130. ((80, 62, 112, 84), (74, 40, 144, 111), False), # 包含 box1 in box2
  131. # ((76, 140, 154, 277), (121, 326, 192, 384), False), # 分离 Error
  132. ((76, 140, 154, 277), (121, 277, 192, 384), True), # 外相切
  133. ((65, 88, 127, 144), (92, 102, 131, 139), True), # 包含 box2 多半,box1约一半
  134. ((92, 102, 131, 139), (65, 88, 127, 144), True), # 包含 box1 多半
  135. ((68, 94, 118, 120), (68, 90, 118, 122), False), # 包含,box1 in box2 两边x相切
  136. ((69, 94, 118, 120), (68, 90, 118, 122), False), # 包含,box1 in box2 一边x相切
  137. ((69, 114, 118, 122), (68, 90, 118, 122), False), # 包含,box1 in box2 一边y相切
  138. # ((26, 75, 106, 172), (65, 108, 90, 128), False), # 包含 box2 in box1 Error
  139. # ((38, 94, 122, 120), (68, 94, 118, 120), False), # 包含,box2 in box1 两边y相切 Error
  140. # ((68, 34, 118, 158), (68, 94, 84, 120), False), # 包含,box2 in box1 一边x相切 Error
  141. ])
  142. def test_is_part_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
  143. assert target_bool == _is_part_overlap(box1, box2)
  144. # left_box右侧是否和right_box左侧有部分重叠
  145. @pytest.mark.parametrize("box1, box2, target_bool", [
  146. (None, None, False),
  147. ((88, 81, 222, 173), (60, 221, 123, 358), False), # 分离
  148. ((121, 149, 184, 289), (172, 130, 230, 268), True), # box1 left bottom box2 相交
  149. ((172, 130, 230, 268), (121, 149, 184, 289), False), # box2 left bottom box1 相交
  150. ((109, 68, 182, 146), (215, 188, 277, 253), False), # box1 top left box2 分离
  151. ((117, 53, 222, 176), (174, 142, 298, 276), True), # box1 left top box2 相交
  152. ((174, 142, 298, 276), (117, 53, 222, 176), False), # box2 left top box1 相交
  153. ((65, 88, 127, 144), (92, 102, 131, 139), True), # box1 left box2 y:box2 in box1
  154. ((92, 102, 131, 139), (65, 88, 127, 144), False), # box2 left box1 y:box1 in box2
  155. ((182, 130, 230, 268), (121, 149, 174, 289), False), # box2 left box1 分离
  156. ((1, 10, 26, 45), (3, 4, 20, 39), True), # box1 bottom box2 x:box2 in box1
  157. ])
  158. def test_left_intersect(box1: tuple, box2: tuple, target_bool: bool) -> None:
  159. assert target_bool == _left_intersect(box1, box2)
  160. # left_box左侧是否和right_box右侧部分重叠
  161. @pytest.mark.parametrize("box1, box2, target_bool", [
  162. (None, None, False),
  163. ((88, 81, 222, 173), (60, 221, 123, 358), False), # 分离
  164. ((121, 149, 184, 289), (172, 130, 230, 268), False), # box1 left bottom box2 相交
  165. ((172, 130, 230, 268), (121, 149, 184, 289), True), # box2 left bottom box1 相交
  166. ((109, 68, 182, 146), (215, 188, 277, 253), False), # box1 top left box2 分离
  167. ((117, 53, 222, 176), (174, 142, 298, 276), False), # box1 left top box2 相交
  168. ((174, 142, 298, 276), (117, 53, 222, 176), True), # box2 left top box1 相交
  169. ((65, 88, 127, 144), (92, 102, 131, 139), False), # box1 left box2 y:box2 in box1
  170. # ((92, 102, 131, 139), (65, 88, 127, 144), True), # box2 left box1 y:box1 in box2 Error
  171. ((182, 130, 230, 268), (121, 149, 174, 289), False), # box2 left box1 分离
  172. # ((1, 10, 26, 45), (3, 4, 20, 39), False), # box1 bottom box2 x:box2 in box1 Error
  173. ])
  174. def test_right_intersect(box1: tuple, box2: tuple, target_bool: bool) -> None:
  175. assert target_bool == _right_intersect(box1, box2)
  176. # x方向上:要么box1包含box2, 要么box2包含box1。不能部分包含
  177. # y方向上:box1和box2有重叠
  178. @pytest.mark.parametrize("box1, box2, target_bool", [
  179. # (None, None, False), # Error
  180. ((35, 28, 108, 90), (47, 60, 83, 96), True), # box1 top box2, x:box2 in box1, y:有重叠
  181. ((35, 28, 98, 90), (27, 60, 103, 96), True), # box1 top box2, x:box1 in box2, y:有重叠
  182. ((57, 77, 130, 210), (59, 219, 119, 293), False), # box1 top box2, x: box2 in box1, y:无重叠
  183. ((47, 60, 83, 96), (35, 28, 108, 90), True), # box2 top box1, x:box1 in box2, y:有重叠
  184. ((27, 60, 103, 96), (35, 28, 98, 90), True), # box2 top box1, x:box2 in box1, y:有重叠
  185. ((59, 219, 119, 293), (57, 77, 130, 210), False), # box2 top box1, x: box1 in box2, y:无重叠
  186. ((35, 28, 55, 90), (57, 60, 83, 96), False), # box1 top box2, x:无重叠, y:有重叠
  187. ((47, 60, 63, 96), (65, 28, 108, 90), False), # box2 top box1, x:无重叠, y:有重叠
  188. ])
  189. def test_is_vertical_full_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
  190. assert target_bool == _is_vertical_full_overlap(box1, box2)
  191. # 检查box1下方和box2的上方有轻微的重叠,轻微程度收到y_tolerance的限制
  192. @pytest.mark.parametrize("box1, box2, target_bool", [
  193. (None, None, False),
  194. ((35, 28, 108, 90), (47, 89, 83, 116), True), # box1 top box2, y:有重叠
  195. ((35, 28, 108, 90), (47, 60, 83, 96), False), # box1 top box2, y:有重叠且过多
  196. ((57, 77, 130, 210), (59, 219, 119, 293), False), # box1 top box2, y:无重叠
  197. ((47, 60, 83, 96), (35, 28, 108, 90), False), # box2 top box1, y:有重叠且过多
  198. ((27, 89, 103, 116), (35, 28, 98, 90), False), # box2 top box1, y:有重叠
  199. ((59, 219, 119, 293), (57, 77, 130, 210), False), # box2 top box1, y:无重叠
  200. ])
  201. def test_is_bottom_full_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
  202. assert target_bool == _is_bottom_full_overlap(box1, box2)
  203. # 检查box1的左侧是否和box2有重叠
  204. @pytest.mark.parametrize("box1, box2, target_bool", [
  205. (None, None, False),
  206. ((88, 81, 222, 173), (60, 221, 123, 358), False), # 分离
  207. # ((121, 149, 184, 289), (172, 130, 230, 268), False), # box1 left bottom box2 相交 Error
  208. # ((172, 130, 230, 268), (121, 149, 184, 289), True), # box2 left bottom box1 相交 Error
  209. ((109, 68, 182, 146), (215, 188, 277, 253), False), # box1 top left box2 分离
  210. ((117, 53, 222, 176), (174, 142, 298, 276), False), # box1 left top box2 相交
  211. # ((174, 142, 298, 276), (117, 53, 222, 176), True), # box2 left top box1 相交 Error
  212. # ((65, 88, 127, 144), (92, 102, 131, 139), False), # box1 left box2 y:box2 in box1 Error
  213. ((1, 10, 26, 45), (3, 4, 20, 39), True), # box1 middle bottom box2 x:box2 in box1
  214. ])
  215. def test_is_left_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
  216. assert target_bool == _is_left_overlap(box1, box2)
  217. # 查两个bbox在y轴上是否有重叠,并且该重叠区域的高度占两个bbox高度更低的那个超过阈值
  218. @pytest.mark.parametrize("box1, box2, target_bool", [
  219. # (None, None, "Error"), # Error
  220. ((51, 69, 192, 147), (75, 48, 132, 187), True), # y: box1 in box2
  221. ((51, 39, 192, 197), (75, 48, 132, 187), True), # y: box2 in box1
  222. ((88, 81, 222, 173), (60, 221, 123, 358), False), # y: box1 top box2
  223. ((109, 68, 182, 196), (215, 188, 277, 253), False), # y: box1 top box2 little
  224. ((109, 68, 182, 196), (215, 78, 277, 253), True), # y: box1 top box2 more
  225. ((109, 68, 182, 196), (215, 138, 277, 213), False), # y: box1 top box2 more but lower overlap_ratio_threshold
  226. ((109, 68, 182, 196), (215, 138, 277, 203), True), # y: box1 top box2 more and more overlap_ratio_threshold
  227. ])
  228. def test_is_overlaps_y_exceeds_threshold(box1: tuple, box2: tuple, target_bool: bool) -> None:
  229. assert target_bool == __is_overlaps_y_exceeds_threshold(box1, box2)
  230. # Determine the coordinates of the intersection rectangle
  231. @pytest.mark.parametrize("box1, box2, target_num", [
  232. # (None, None, "Error"), # Error
  233. ((88, 81, 222, 173), (60, 221, 123, 358), 0.0), # 分离
  234. ((76, 140, 154, 277), (121, 326, 192, 384), 0.0), # 分离
  235. ((142, 109, 238, 164), (134, 211, 224, 270), 0.0), # 分离
  236. ((109, 68, 182, 196), (175, 138, 277, 213), 0.024475524475524476), # 相交
  237. ((56, 90, 170, 219), (103, 212, 171, 304), 0.02288586346557361), # 相交
  238. ((109, 126, 204, 245), (130, 127, 232, 186), 0.33696071621517326), # 相交
  239. ((109, 126, 204, 245), (110, 127, 232, 206), 0.5493822593770807), # 相交
  240. ((76, 140, 154, 277), (121, 277, 192, 384), 0.0) # 相切
  241. ])
  242. def test_calculate_iou(box1: tuple, box2: tuple, target_num: float) -> None:
  243. assert target_num == calculate_iou(box1, box2)
  244. # 计算box1和box2的重叠面积占最小面积的box的比例
  245. @pytest.mark.parametrize("box1, box2, target_num", [
  246. # (None, None, "Error"), # Error
  247. ((142, 109, 238, 164), (134, 211, 224, 270), 0.0), # 分离
  248. ((88, 81, 222, 173), (60, 221, 123, 358), 0.0), # 分离
  249. ((76, 140, 154, 277), (121, 326, 192, 384), 0.0), # 分离
  250. ((76, 140, 154, 277), (121, 277, 192, 384), 0.0), # 相切
  251. ((109, 126, 204, 245), (110, 127, 232, 206), 0.7704918032786885), # 相交
  252. ((56, 90, 170, 219), (103, 212, 171, 304), 0.07496803069053709), # 相交
  253. ((121, 149, 184, 289), (172, 130, 230, 268), 0.17841079460269865), # 相交
  254. ((51, 69, 192, 147), (75, 48, 132, 187), 0.5611510791366906), # 相交
  255. ((117, 53, 222, 176), (174, 142, 298, 276), 0.12636469221835075), # 相交
  256. ((102, 60, 233, 203), (70, 190, 220, 319), 0.08188757807078417), # 相交
  257. ((109, 126, 204, 245), (130, 127, 232, 186), 0.7254901960784313), # 相交
  258. ])
  259. def test_calculate_overlap_area_2_minbox_area_ratio(box1: tuple, box2: tuple, target_num: float) -> None:
  260. assert target_num == calculate_overlap_area_2_minbox_area_ratio(box1, box2)
  261. # 计算box1和box2的重叠面积占bbox1的比例
  262. @pytest.mark.parametrize("box1, box2, target_num", [
  263. # (None, None, "Error"), # Error
  264. ((142, 109, 238, 164), (134, 211, 224, 270), 0.0), # 分离
  265. ((88, 81, 222, 173), (60, 221, 123, 358), 0.0), # 分离
  266. ((76, 140, 154, 277), (121, 326, 192, 384), 0.0), # 分离
  267. ((76, 140, 154, 277), (121, 277, 192, 384), 0.0), # 相切
  268. ((142, 109, 238, 164), (134, 164, 224, 270), 0.0), # 相切
  269. ((109, 126, 204, 245), (110, 127, 232, 206), 0.6568774878372402), # 相交
  270. ((56, 90, 170, 219), (103, 212, 171, 304), 0.03189174486604107), # 相交
  271. ((121, 149, 184, 289), (172, 130, 230, 268), 0.1619047619047619), # 相交
  272. ((51, 69, 192, 147), (75, 48, 132, 187), 0.40425531914893614), # 相交
  273. ((117, 53, 222, 176), (174, 142, 298, 276), 0.12636469221835075), # 相交
  274. ((102, 60, 233, 203), (70, 190, 220, 319), 0.08188757807078417), # 相交
  275. ((109, 126, 204, 245), (130, 127, 232, 186), 0.38620079610791685), # 相交
  276. ])
  277. def test_calculate_overlap_area_in_bbox1_area_ratio(box1: tuple, box2: tuple, target_num: float) -> None:
  278. assert target_num == calculate_overlap_area_in_bbox1_area_ratio(box1, box2)
  279. # 计算两个bbox重叠的面积占最小面积的box的比例,如果比例大于ratio,则返回小的那个bbox,否则返回None
  280. @pytest.mark.parametrize("box1, box2, ratio, target_box", [
  281. # (None, None, 0.8, "Error"), # Error
  282. ((142, 109, 238, 164), (134, 211, 224, 270), 0.0, None), # 分离
  283. ((109, 126, 204, 245), (110, 127, 232, 206), 0.5, (110, 127, 232, 206)),
  284. ((56, 90, 170, 219), (103, 212, 171, 304), 0.5, None),
  285. ((121, 149, 184, 289), (172, 130, 230, 268), 0.5, None),
  286. ((51, 69, 192, 147), (75, 48, 132, 187), 0.5, (75, 48, 132, 187)),
  287. ((117, 53, 222, 176), (174, 142, 298, 276), 0.5, None),
  288. ((102, 60, 233, 203), (70, 190, 220, 319), 0.5, None),
  289. ((109, 126, 204, 245), (130, 127, 232, 186), 0.5, (130, 127, 232, 186)),
  290. ])
  291. def test_get_minbox_if_overlap_by_ratio(box1: tuple, box2: tuple, ratio: float, target_box: list) -> None:
  292. assert target_box == get_minbox_if_overlap_by_ratio(box1, box2, ratio)
  293. # 根据boundry获取在这个范围内的所有的box的列表,完全包含关系
  294. @pytest.mark.parametrize("boxes, boundry, target_boxs", [
  295. # ([], (), "Error"), # Error
  296. ([], (110, 340, 209, 387), []),
  297. ([(142, 109, 238, 164)], (134, 211, 224, 270), []), # 分离
  298. ([(109, 126, 204, 245), (110, 127, 232, 206)], (105, 116, 258, 300), [(109, 126, 204, 245), (110, 127, 232, 206)]),
  299. ([(109, 126, 204, 245), (110, 127, 232, 206)], (105, 116, 258, 230), [(110, 127, 232, 206)]),
  300. ([(81, 280, 123, 315), (282, 203, 342, 247), (183, 100, 300, 155), (46, 99, 133, 148), (33, 156, 97, 211),
  301. (137, 29, 287, 87)], (80, 90, 249, 200), []),
  302. ([(81, 280, 123, 315), (282, 203, 342, 247), (183, 100, 300, 155), (46, 99, 133, 148), (33, 156, 97, 211),
  303. (137, 29, 287, 87)], (30, 20, 349, 320),
  304. [(81, 280, 123, 315), (282, 203, 342, 247), (183, 100, 300, 155), (46, 99, 133, 148), (33, 156, 97, 211),
  305. (137, 29, 287, 87)]),
  306. ([(81, 280, 123, 315), (282, 203, 342, 247), (183, 100, 300, 155), (46, 99, 133, 148), (33, 156, 97, 211),
  307. (137, 29, 287, 87)], (30, 20, 200, 320),
  308. [(81, 280, 123, 315), (46, 99, 133, 148), (33, 156, 97, 211)]),
  309. ])
  310. def test_get_bbox_in_boundry(boxes: list, boundry: tuple, target_boxs: list) -> None:
  311. assert target_boxs == get_bbox_in_boundry(boxes, boundry)
  312. # 寻找上方距离最近的box,margin 4个单位, x方向有重合,y方向最近的
  313. @pytest.mark.parametrize("pymu_blocks, obj_box, target_boxs", [
  314. ([{"bbox": (81, 280, 123, 315)}, {"bbox": (282, 203, 342, 247)}, {"bbox": (183, 100, 300, 155)},
  315. {"bbox": (46, 99, 133, 148)}, {"bbox": (33, 156, 97, 211)},
  316. {"bbox": (137, 29, 287, 87)}], (81, 280, 123, 315), {"bbox": (33, 156, 97, 211)}),
  317. # ([{"bbox": (168, 120, 263, 159)},
  318. # {"bbox": (231, 61, 279, 159)},
  319. # {"bbox": (35, 85, 136, 110)},
  320. # {"bbox": (228, 193, 347, 225)},
  321. # {"bbox": (144, 264, 188, 323)},
  322. # {"bbox": (62, 37, 126, 64)}], (228, 193, 347, 225),
  323. # [{"bbox": (168, 120, 263, 159)}, {"bbox": (231, 61, 279, 159)}]), # y:方向最近的有两个,x: 两个均有重合 Error
  324. ([{"bbox": (35, 85, 136, 159)},
  325. {"bbox": (168, 120, 263, 159)},
  326. {"bbox": (231, 61, 279, 118)},
  327. {"bbox": (228, 193, 347, 225)},
  328. {"bbox": (144, 264, 188, 323)},
  329. {"bbox": (62, 37, 126, 64)}], (228, 193, 347, 225),
  330. {"bbox": (168, 120, 263, 159)},), # y:方向最近的有两个,x:只有一个有重合
  331. ([{"bbox": (239, 115, 379, 167)},
  332. {"bbox": (33, 237, 104, 262)},
  333. {"bbox": (124, 288, 168, 325)},
  334. {"bbox": (242, 291, 379, 340)},
  335. {"bbox": (55, 117, 121, 154)},
  336. {"bbox": (266, 183, 384, 217)}, ], (124, 288, 168, 325), {'bbox': (55, 117, 121, 154)}),
  337. ([{"bbox": (239, 115, 379, 167)},
  338. {"bbox": (33, 237, 104, 262)},
  339. {"bbox": (124, 288, 168, 325)},
  340. {"bbox": (242, 291, 379, 340)},
  341. {"bbox": (55, 117, 119, 154)},
  342. {"bbox": (266, 183, 384, 217)}, ], (124, 288, 168, 325), None), # x没有重合
  343. ([{"bbox": (80, 90, 249, 200)},
  344. {"bbox": (183, 100, 240, 155)}, ], (183, 100, 240, 155), None), # 包含
  345. ])
  346. def test_find_top_nearest_text_bbox(pymu_blocks: list, obj_box: tuple, target_boxs: dict) -> None:
  347. assert target_boxs == find_top_nearest_text_bbox(pymu_blocks, obj_box)
  348. # 寻找下方距离自己最近的box, x方向有重合,y方向最近的
  349. @pytest.mark.parametrize("pymu_blocks, obj_box, target_boxs", [
  350. ([{"bbox": (165, 96, 300, 114)},
  351. {"bbox": (11, 157, 139, 201)},
  352. {"bbox": (124, 208, 265, 262)},
  353. {"bbox": (124, 283, 248, 306)},
  354. {"bbox": (39, 267, 84, 301)},
  355. {"bbox": (36, 89, 114, 145)}, ], (165, 96, 300, 114), {"bbox": (124, 208, 265, 262)}),
  356. ([{"bbox": (187, 37, 303, 49)},
  357. {"bbox": (2, 227, 90, 283)},
  358. {"bbox": (158, 174, 200, 212)},
  359. {"bbox": (259, 174, 324, 228)},
  360. {"bbox": (205, 61, 316, 97)},
  361. {"bbox": (295, 248, 374, 287)}, ], (205, 61, 316, 97), {"bbox": (259, 174, 324, 228)}), # y有两个最近的, x只有一个重合
  362. # ([{"bbox": (187, 37, 303, 49)},
  363. # {"bbox": (2, 227, 90, 283)},
  364. # {"bbox": (259, 174, 324, 228)},
  365. # {"bbox": (205, 61, 316, 97)},
  366. # {"bbox": (295, 248, 374, 287)},
  367. # {"bbox": (158, 174, 209, 212)}, ], (205, 61, 316, 97),
  368. # [{"bbox": (259, 174, 324, 228)}, {"bbox": (158, 174, 209, 212)}]), # x有重合,y有两个最近的 Error
  369. ([{"bbox": (287, 132, 398, 191)},
  370. {"bbox": (44, 141, 163, 188)},
  371. {"bbox": (132, 191, 240, 241)},
  372. {"bbox": (81, 25, 142, 67)},
  373. {"bbox": (74, 297, 116, 314)},
  374. {"bbox": (77, 84, 224, 107)}, ], (287, 132, 398, 191), None), # x没有重合
  375. ([{"bbox": (80, 90, 249, 200)},
  376. {"bbox": (183, 100, 240, 155)}, ], (183, 100, 240, 155), None), # 包含
  377. ])
  378. def test_find_bottom_nearest_text_bbox(pymu_blocks: list, obj_box: tuple, target_boxs: dict) -> None:
  379. assert target_boxs == find_bottom_nearest_text_bbox(pymu_blocks, obj_box)
  380. # 寻找左侧距离自己最近的box, y方向有重叠,x方向最近
  381. @pytest.mark.parametrize("pymu_blocks, obj_box, target_boxs", [
  382. ([{"bbox": (80, 90, 249, 200)}, {"bbox": (183, 100, 240, 155)}], (183, 100, 240, 155), None), # 包含
  383. ([{"bbox": (28, 90, 77, 126)}, {"bbox": (35, 84, 84, 120)}], (35, 84, 84, 120), None), # y:重叠,x:重叠大于2
  384. ([{"bbox": (28, 90, 77, 126)}, {"bbox": (75, 84, 134, 120)}], (75, 84, 134, 120), {"bbox": (28, 90, 77, 126)}),
  385. # y:重叠,x:重叠小于等于2
  386. ([{"bbox": (239, 115, 379, 167)},
  387. {"bbox": (33, 237, 104, 262)},
  388. {"bbox": (124, 288, 168, 325)},
  389. {"bbox": (242, 291, 379, 340)},
  390. {"bbox": (55, 113, 161, 154)},
  391. {"bbox": (266, 123, 384, 217)}], (266, 123, 384, 217), {"bbox": (55, 113, 161, 154)}), # y重叠,x left
  392. # ([{"bbox": (136, 219, 268, 240)},
  393. # {"bbox": (169, 115, 268, 181)},
  394. # {"bbox": (33, 237, 104, 262)},
  395. # {"bbox": (124, 288, 168, 325)},
  396. # {"bbox": (55, 117, 161, 154)},
  397. # {"bbox": (266, 183, 384, 217)}], (266, 183, 384, 217),
  398. # [{"bbox": (136, 219, 267, 240)}, {"bbox": (169, 115, 267, 181)}]), # y有重叠,x重叠小于2或者在left Error
  399. ])
  400. def test_find_left_nearest_text_bbox(pymu_blocks: list, obj_box: tuple, target_boxs: dict) -> None:
  401. assert target_boxs == find_left_nearest_text_bbox(pymu_blocks, obj_box)
  402. # 寻找右侧距离自己最近的box, y方向有重叠,x方向最近
  403. @pytest.mark.parametrize("pymu_blocks, obj_box, target_boxs", [
  404. ([{"bbox": (80, 90, 249, 200)}, {"bbox": (183, 100, 240, 155)}], (183, 100, 240, 155), None), # 包含
  405. ([{"bbox": (28, 90, 77, 126)}, {"bbox": (35, 84, 84, 120)}], (28, 90, 77, 126), None), # y:重叠,x:重叠大于2
  406. ([{"bbox": (28, 90, 77, 126)}, {"bbox": (75, 84, 134, 120)}], (28, 90, 77, 126), {"bbox": (75, 84, 134, 120)}),
  407. # y:重叠,x:重叠小于等于2
  408. ([{"bbox": (239, 115, 379, 167)},
  409. {"bbox": (33, 237, 104, 262)},
  410. {"bbox": (124, 288, 168, 325)},
  411. {"bbox": (242, 291, 379, 340)},
  412. {"bbox": (55, 113, 161, 154)},
  413. {"bbox": (266, 123, 384, 217)}], (55, 113, 161, 154), {"bbox": (239, 115, 379, 167)}), # y重叠,x right
  414. # ([{"bbox": (169, 115, 298, 181)},
  415. # {"bbox": (169, 219, 268, 240)},
  416. # {"bbox": (33, 177, 104, 262)},
  417. # {"bbox": (124, 288, 168, 325)},
  418. # {"bbox": (55, 117, 161, 154)},
  419. # {"bbox": (266, 183, 384, 217)}], (33, 177, 104, 262),
  420. # [{"bbox": (169, 115, 298, 181)}, {"bbox": (169, 219, 268, 240)}]), # y有重叠,x重叠小于2或者在right Error
  421. ])
  422. def test_find_right_nearest_text_bbox(pymu_blocks: list, obj_box: tuple, target_boxs: dict) -> None:
  423. assert target_boxs == find_right_nearest_text_bbox(pymu_blocks, obj_box)
  424. # 判断两个矩形框的相对位置关系 (left, right, bottom, top)
  425. @pytest.mark.parametrize("box1, box2, target_box", [
  426. # (None, None, "Error"), # Error
  427. ((80, 90, 249, 200), (183, 100, 240, 155), (False, False, False, False)), # 包含
  428. # ((124, 81, 222, 173), (60, 221, 123, 358), (False, True, False, True)), # 分离,右上 Error
  429. ((142, 109, 238, 164), (134, 211, 224, 270), (False, False, False, True)), # 分离,上
  430. # ((51, 69, 192, 147), (205, 198, 282, 297), (True, False, False, True)), # 分离,左上 Error
  431. # ((101, 149, 164, 289), (172, 130, 230, 268), (True, False, False, False)), # 分离,左 Error
  432. # ((69, 196, 124, 285), (130, 127, 232, 186), (True, False, True, False)), # 分离,左下 Error
  433. ((103, 212, 171, 304), (56, 90, 170, 209), (False, False, True, False)), # 分离,下
  434. # ((124, 367, 222, 415), (60, 221, 123, 358), (False, True, True, False)), # 分离,右下 Error
  435. # ((172, 130, 230, 268), (101, 149, 164, 289), (False, True, False, False)), # 分离,右 Error
  436. ])
  437. def test_bbox_relative_pos(box1: tuple, box2: tuple, target_box: tuple) -> None:
  438. assert target_box == bbox_relative_pos(box1, box2)
  439. # 计算两个矩形框的距离
  440. """
  441. 受bbox_relative_pos方法的影响,左右相反,这里计算结果全部受影响,在错误的基础上计算出了正确的结果
  442. """
  443. @pytest.mark.parametrize("box1, box2, target_num", [
  444. # (None, None, "Error"), # Error
  445. ((80, 90, 249, 200), (183, 100, 240, 155), 0.0), # 包含
  446. ((142, 109, 238, 164), (134, 211, 224, 270), 47.0), # 分离,上
  447. ((103, 212, 171, 304), (56, 90, 170, 209), 3.0), # 分离,下
  448. ((101, 149, 164, 289), (172, 130, 230, 268), 8.0), # 分离,左
  449. ((172, 130, 230, 268), (101, 149, 164, 289), 8.0), # 分离,右
  450. ((80.3, 90.8, 249.0, 200.5), (183.8, 100.6, 240.2, 155.1), 0.0), # 包含
  451. ((142.3, 109.5, 238.9, 164.2), (134.4, 211.2, 224.8, 270.1), 47.0), # 分离,上
  452. ((103.5, 212.6, 171.1, 304.8), (56.1, 90.9, 170.6, 209.2), 3.4), # 分离,下
  453. ((101.1, 149.3, 164.9, 289.0), (172.1, 130.1, 230.5, 268.5), 7.2), # 分离,左
  454. ((172.1, 130.3, 230.1, 268.1), (101.2, 149.9, 164.3, 289.1), 7.8), # 分离,右
  455. ((124.3, 81.1, 222.5, 173.8), (60.3, 221.5, 123.0, 358.9), 47.717711596429254), # 分离,右上
  456. ((51.2, 69.31, 192.5, 147.9), (205.0, 198.1, 282.98, 297.09), 51.73287156151299), # 分离,左上
  457. ((124.3, 367.1, 222.9, 415.7), (60.9, 221.4, 123.2, 358.6), 8.570880934886448), # 分离,右下
  458. ((69.9, 196.2, 124.1, 285.7), (130.0, 127.3, 232.6, 186.1), 11.69700816448377), # 分离,左下
  459. ])
  460. def test_bbox_distance(box1: tuple, box2: tuple, target_num: float) -> None:
  461. assert target_num - bbox_distance(box1, box2) < 1
  462. @pytest.mark.skip(reason="skip")
  463. # 根据bucket_name获取s3配置ak,sk,endpoint
  464. def test_get_s3_config() -> None:
  465. bucket_name = os.getenv('bucket_name')
  466. target_data = os.getenv('target_data')
  467. assert convert_string_to_list(target_data) == list(get_s3_config(bucket_name))
  468. def convert_string_to_list(s):
  469. cleaned_s = s.strip("'")
  470. items = cleaned_s.split(',')
  471. cleaned_items = [item.strip() for item in items]
  472. return cleaned_items