table_line_rec_utils.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. import math
  2. import cv2
  3. import numpy as np
  4. from scipy.spatial import distance as dist
  5. from skimage import measure
  6. def get_table_line(binimg, axis=0, lineW=10):
  7. ##获取表格线
  8. ##axis=0 横线
  9. ##axis=1 竖线
  10. labels = measure.label(binimg > 0, connectivity=2) # 8连通区域标记
  11. regions = measure.regionprops(labels)
  12. if axis == 1:
  13. lineboxes = [
  14. min_area_rect(line.coords)
  15. for line in regions
  16. if line.bbox[2] - line.bbox[0] > lineW
  17. ]
  18. else:
  19. lineboxes = [
  20. min_area_rect(line.coords)
  21. for line in regions
  22. if line.bbox[3] - line.bbox[1] > lineW
  23. ]
  24. return lineboxes
  25. def min_area_rect(coords):
  26. """
  27. 多边形外接矩形
  28. """
  29. rect = cv2.minAreaRect(coords[:, ::-1])
  30. box = cv2.boxPoints(rect)
  31. box = box.reshape((8,)).tolist()
  32. box = image_location_sort_box(box)
  33. x1, y1, x2, y2, x3, y3, x4, y4 = box
  34. w, h = calculate_center_rotate_angle(box)
  35. if w < h:
  36. xmin = (x1 + x2) / 2
  37. xmax = (x3 + x4) / 2
  38. ymin = (y1 + y2) / 2
  39. ymax = (y3 + y4) / 2
  40. else:
  41. xmin = (x1 + x4) / 2
  42. xmax = (x2 + x3) / 2
  43. ymin = (y1 + y4) / 2
  44. ymax = (y2 + y3) / 2
  45. return [xmin, ymin, xmax, ymax]
  46. def image_location_sort_box(box):
  47. x1, y1, x2, y2, x3, y3, x4, y4 = box[:8]
  48. pts = (x1, y1), (x2, y2), (x3, y3), (x4, y4)
  49. pts = np.array(pts, dtype="float32")
  50. (x1, y1), (x2, y2), (x3, y3), (x4, y4) = _order_points(pts)
  51. return [x1, y1, x2, y2, x3, y3, x4, y4]
  52. def calculate_center_rotate_angle(box):
  53. x1, y1, x2, y2, x3, y3, x4, y4 = box[:8]
  54. w = (
  55. np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
  56. + np.sqrt((x3 - x4) ** 2 + (y3 - y4) ** 2)
  57. ) / 2
  58. h = (
  59. np.sqrt((x2 - x3) ** 2 + (y2 - y3) ** 2)
  60. + np.sqrt((x1 - x4) ** 2 + (y1 - y4) ** 2)
  61. ) / 2
  62. return w, h
  63. def _order_points(pts):
  64. # 根据x坐标对点进行排序
  65. """
  66. ---------------------
  67. 本项目中是为了排序后得到[(xmin,ymin),(xmax,ymin),(xmax,ymax),(xmin,ymax)]
  68. 作者:Tong_T
  69. 来源:CSDN
  70. 原文:https://blog.csdn.net/Tong_T/article/details/81907132
  71. 版权声明:本文为博主原创文章,转载请附上博文链接!
  72. """
  73. x_sorted = pts[np.argsort(pts[:, 0]), :]
  74. left_most = x_sorted[:2, :]
  75. right_most = x_sorted[2:, :]
  76. left_most = left_most[np.argsort(left_most[:, 1]), :]
  77. (tl, bl) = left_most
  78. distance = dist.cdist(tl[np.newaxis], right_most, "euclidean")[0]
  79. (br, tr) = right_most[np.argsort(distance)[::-1], :]
  80. return np.array([tl, tr, br, bl], dtype="float32")
  81. def sqrt(p1, p2):
  82. return np.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2)
  83. def adjust_lines(lines, alph=50, angle=50):
  84. lines_n = len(lines)
  85. new_lines = []
  86. for i in range(lines_n):
  87. x1, y1, x2, y2 = lines[i]
  88. cx1, cy1 = (x1 + x2) / 2, (y1 + y2) / 2
  89. for j in range(lines_n):
  90. if i != j:
  91. x3, y3, x4, y4 = lines[j]
  92. cx2, cy2 = (x3 + x4) / 2, (y3 + y4) / 2
  93. if (x3 < cx1 < x4 or y3 < cy1 < y4) or (
  94. x1 < cx2 < x2 or y1 < cy2 < y2
  95. ): # 判断两个横线在y方向的投影重不重合
  96. continue
  97. else:
  98. r = sqrt((x1, y1), (x3, y3))
  99. k = abs((y3 - y1) / (x3 - x1 + 1e-10))
  100. a = math.atan(k) * 180 / math.pi
  101. if r < alph and a < angle:
  102. new_lines.append((x1, y1, x3, y3))
  103. r = sqrt((x1, y1), (x4, y4))
  104. k = abs((y4 - y1) / (x4 - x1 + 1e-10))
  105. a = math.atan(k) * 180 / math.pi
  106. if r < alph and a < angle:
  107. new_lines.append((x1, y1, x4, y4))
  108. r = sqrt((x2, y2), (x3, y3))
  109. k = abs((y3 - y2) / (x3 - x2 + 1e-10))
  110. a = math.atan(k) * 180 / math.pi
  111. if r < alph and a < angle:
  112. new_lines.append((x2, y2, x3, y3))
  113. r = sqrt((x2, y2), (x4, y4))
  114. k = abs((y4 - y2) / (x4 - x2 + 1e-10))
  115. a = math.atan(k) * 180 / math.pi
  116. if r < alph and a < angle:
  117. new_lines.append((x2, y2, x4, y4))
  118. return new_lines
  119. def final_adjust_lines(rowboxes, colboxes):
  120. nrow = len(rowboxes)
  121. ncol = len(colboxes)
  122. for i in range(nrow):
  123. for j in range(ncol):
  124. rowboxes[i] = line_to_line(rowboxes[i], colboxes[j], alpha=20, angle=30)
  125. colboxes[j] = line_to_line(colboxes[j], rowboxes[i], alpha=20, angle=30)
  126. return rowboxes, colboxes
  127. def draw_lines(im, bboxes, color=(0, 0, 0), lineW=3):
  128. """
  129. boxes: bounding boxes
  130. """
  131. tmp = np.copy(im)
  132. c = color
  133. h, w = im.shape[:2]
  134. for box in bboxes:
  135. x1, y1, x2, y2 = box[:4]
  136. cv2.line(
  137. tmp, (int(x1), int(y1)), (int(x2), int(y2)), c, lineW, lineType=cv2.LINE_AA
  138. )
  139. return tmp
  140. def line_to_line(points1, points2, alpha=10, angle=30):
  141. """
  142. 线段之间的距离
  143. """
  144. x1, y1, x2, y2 = points1
  145. ox1, oy1, ox2, oy2 = points2
  146. xy = np.array([(x1, y1), (x2, y2)], dtype="float32")
  147. A1, B1, C1 = fit_line(xy)
  148. oxy = np.array([(ox1, oy1), (ox2, oy2)], dtype="float32")
  149. A2, B2, C2 = fit_line(oxy)
  150. flag1 = point_line_cor(np.array([x1, y1], dtype="float32"), A2, B2, C2)
  151. flag2 = point_line_cor(np.array([x2, y2], dtype="float32"), A2, B2, C2)
  152. if (flag1 > 0 and flag2 > 0) or (flag1 < 0 and flag2 < 0): # 横线或者竖线在竖线或者横线的同一侧
  153. if (A1 * B2 - A2 * B1) != 0:
  154. x = (B1 * C2 - B2 * C1) / (A1 * B2 - A2 * B1)
  155. y = (A2 * C1 - A1 * C2) / (A1 * B2 - A2 * B1)
  156. # x, y = round(x, 2), round(y, 2)
  157. p = (x, y) # 横线与竖线的交点
  158. r0 = sqrt(p, (x1, y1))
  159. r1 = sqrt(p, (x2, y2))
  160. if min(r0, r1) < alpha: # 若交点与线起点或者终点的距离小于alpha,则延长线到交点
  161. if r0 < r1:
  162. k = abs((y2 - p[1]) / (x2 - p[0] + 1e-10))
  163. a = math.atan(k) * 180 / math.pi
  164. if a < angle or abs(90 - a) < angle:
  165. points1 = np.array([p[0], p[1], x2, y2], dtype="float32")
  166. else:
  167. k = abs((y1 - p[1]) / (x1 - p[0] + 1e-10))
  168. a = math.atan(k) * 180 / math.pi
  169. if a < angle or abs(90 - a) < angle:
  170. points1 = np.array([x1, y1, p[0], p[1]], dtype="float32")
  171. return points1
  172. def min_area_rect_box(
  173. regions, flag=True, W=0, H=0, filtersmall=False, adjust_box=False
  174. ):
  175. """
  176. 多边形外接矩形
  177. """
  178. boxes = []
  179. for region in regions:
  180. if region.bbox_area > H * W * 3 / 4: # 过滤大的单元格
  181. continue
  182. rect = cv2.minAreaRect(region.coords[:, ::-1])
  183. box = cv2.boxPoints(rect)
  184. box = box.reshape((8,)).tolist()
  185. box = image_location_sort_box(box)
  186. x1, y1, x2, y2, x3, y3, x4, y4 = box
  187. w, h = calculate_center_rotate_angle(box)
  188. if w * h < 0.5 * W * H:
  189. if filtersmall and (
  190. w < 15 or h < 15
  191. ): # or w / h > 30 or h / w > 30): # 过滤小的单元格
  192. continue
  193. boxes.append([x1, y1, x2, y2, x3, y3, x4, y4])
  194. return boxes
  195. def point_line_cor(p, A, B, C):
  196. ##判断点与线之间的位置关系
  197. # 一般式直线方程(Ax+By+c)=0
  198. x, y = p
  199. r = A * x + B * y + C
  200. return r
  201. def fit_line(p):
  202. """A = Y2 - Y1
  203. B = X1 - X2
  204. C = X2*Y1 - X1*Y2
  205. AX+BY+C=0
  206. 直线一般方程
  207. """
  208. x1, y1 = p[0]
  209. x2, y2 = p[1]
  210. A = y2 - y1
  211. B = x1 - x2
  212. C = x2 * y1 - x1 * y2
  213. return A, B, C