crop_image_regions.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from ..base import BaseComponent
  15. import numpy as np
  16. from ....utils.io import ImageReader
  17. import copy
  18. import cv2
  19. from .seal_det_warp import AutoRectifier
  20. from shapely.geometry import Polygon
  21. from numpy.linalg import norm
  22. class CropByPolys(BaseComponent):
  23. """Crop Image by Polys"""
  24. entities = "CropByPolys"
  25. def __init__(self, det_box_type="quad"):
  26. super().__init__()
  27. self.det_box_type = det_box_type
  28. def __call__(self, img, dt_polys):
  29. """__call__"""
  30. if self.det_box_type == "quad":
  31. dt_boxes = np.array(dt_polys)
  32. output_list = []
  33. for bno in range(len(dt_boxes)):
  34. tmp_box = copy.deepcopy(dt_boxes[bno])
  35. img_crop = self.get_minarea_rect_crop(img, tmp_box)
  36. output_list.append(
  37. {
  38. "img": img_crop,
  39. "img_size": [img_crop.shape[1], img_crop.shape[0]],
  40. }
  41. )
  42. elif self.det_box_type == "poly":
  43. output_list = []
  44. dt_boxes = dt_polys
  45. for bno in range(len(dt_boxes)):
  46. tmp_box = copy.deepcopy(dt_boxes[bno])
  47. img_crop = self.get_poly_rect_crop(img.copy(), tmp_box)
  48. output_list.append(
  49. {
  50. "img": img_crop,
  51. "img_size": [img_crop.shape[1], img_crop.shape[0]],
  52. }
  53. )
  54. else:
  55. raise NotImplementedError
  56. return output_list
  57. def get_minarea_rect_crop(self, img, points):
  58. """get_minarea_rect_crop"""
  59. bounding_box = cv2.minAreaRect(np.array(points).astype(np.int32))
  60. points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
  61. index_a, index_b, index_c, index_d = 0, 1, 2, 3
  62. if points[1][1] > points[0][1]:
  63. index_a = 0
  64. index_d = 1
  65. else:
  66. index_a = 1
  67. index_d = 0
  68. if points[3][1] > points[2][1]:
  69. index_b = 2
  70. index_c = 3
  71. else:
  72. index_b = 3
  73. index_c = 2
  74. box = [points[index_a], points[index_b], points[index_c], points[index_d]]
  75. crop_img = self.get_rotate_crop_image(img, np.array(box))
  76. return crop_img
  77. def get_rotate_crop_image(self, img, points):
  78. """
  79. img_height, img_width = img.shape[0:2]
  80. left = int(np.min(points[:, 0]))
  81. right = int(np.max(points[:, 0]))
  82. top = int(np.min(points[:, 1]))
  83. bottom = int(np.max(points[:, 1]))
  84. img_crop = img[top:bottom, left:right, :].copy()
  85. points[:, 0] = points[:, 0] - left
  86. points[:, 1] = points[:, 1] - top
  87. """
  88. assert len(points) == 4, "shape of points must be 4*2"
  89. img_crop_width = int(
  90. max(
  91. np.linalg.norm(points[0] - points[1]),
  92. np.linalg.norm(points[2] - points[3]),
  93. )
  94. )
  95. img_crop_height = int(
  96. max(
  97. np.linalg.norm(points[0] - points[3]),
  98. np.linalg.norm(points[1] - points[2]),
  99. )
  100. )
  101. pts_std = np.float32(
  102. [
  103. [0, 0],
  104. [img_crop_width, 0],
  105. [img_crop_width, img_crop_height],
  106. [0, img_crop_height],
  107. ]
  108. )
  109. M = cv2.getPerspectiveTransform(points, pts_std)
  110. dst_img = cv2.warpPerspective(
  111. img,
  112. M,
  113. (img_crop_width, img_crop_height),
  114. borderMode=cv2.BORDER_REPLICATE,
  115. flags=cv2.INTER_CUBIC,
  116. )
  117. dst_img_height, dst_img_width = dst_img.shape[0:2]
  118. if dst_img_height * 1.0 / dst_img_width >= 1.5:
  119. dst_img = np.rot90(dst_img)
  120. return dst_img
  121. def reorder_poly_edge(self, points):
  122. """Get the respective points composing head edge, tail edge, top
  123. sideline and bottom sideline.
  124. Args:
  125. points (ndarray): The points composing a text polygon.
  126. Returns:
  127. head_edge (ndarray): The two points composing the head edge of text
  128. polygon.
  129. tail_edge (ndarray): The two points composing the tail edge of text
  130. polygon.
  131. top_sideline (ndarray): The points composing top curved sideline of
  132. text polygon.
  133. bot_sideline (ndarray): The points composing bottom curved sideline
  134. of text polygon.
  135. """
  136. assert points.ndim == 2
  137. assert points.shape[0] >= 4
  138. assert points.shape[1] == 2
  139. orientation_thr = 2.0 # 一个经验超参数
  140. head_inds, tail_inds = self.find_head_tail(points, orientation_thr)
  141. head_edge, tail_edge = points[head_inds], points[tail_inds]
  142. pad_points = np.vstack([points, points])
  143. if tail_inds[1] < 1:
  144. tail_inds[1] = len(points)
  145. sideline1 = pad_points[head_inds[1] : tail_inds[1]]
  146. sideline2 = pad_points[tail_inds[1] : (head_inds[1] + len(points))]
  147. return head_edge, tail_edge, sideline1, sideline2
  148. def vector_slope(self, vec):
  149. assert len(vec) == 2
  150. return abs(vec[1] / (vec[0] + 1e-8))
  151. def find_head_tail(self, points, orientation_thr):
  152. """Find the head edge and tail edge of a text polygon.
  153. Args:
  154. points (ndarray): The points composing a text polygon.
  155. orientation_thr (float): The threshold for distinguishing between
  156. head edge and tail edge among the horizontal and vertical edges
  157. of a quadrangle.
  158. Returns:
  159. head_inds (list): The indexes of two points composing head edge.
  160. tail_inds (list): The indexes of two points composing tail edge.
  161. """
  162. assert points.ndim == 2
  163. assert points.shape[0] >= 4
  164. assert points.shape[1] == 2
  165. assert isinstance(orientation_thr, float)
  166. if len(points) > 4:
  167. pad_points = np.vstack([points, points[0]])
  168. edge_vec = pad_points[1:] - pad_points[:-1]
  169. theta_sum = []
  170. adjacent_vec_theta = []
  171. for i, edge_vec1 in enumerate(edge_vec):
  172. adjacent_ind = [x % len(edge_vec) for x in [i - 1, i + 1]]
  173. adjacent_edge_vec = edge_vec[adjacent_ind]
  174. temp_theta_sum = np.sum(self.vector_angle(edge_vec1, adjacent_edge_vec))
  175. temp_adjacent_theta = self.vector_angle(
  176. adjacent_edge_vec[0], adjacent_edge_vec[1]
  177. )
  178. theta_sum.append(temp_theta_sum)
  179. adjacent_vec_theta.append(temp_adjacent_theta)
  180. theta_sum_score = np.array(theta_sum) / np.pi
  181. adjacent_theta_score = np.array(adjacent_vec_theta) / np.pi
  182. poly_center = np.mean(points, axis=0)
  183. edge_dist = np.maximum(
  184. norm(pad_points[1:] - poly_center, axis=-1),
  185. norm(pad_points[:-1] - poly_center, axis=-1),
  186. )
  187. dist_score = edge_dist / np.max(edge_dist)
  188. position_score = np.zeros(len(edge_vec))
  189. score = 0.5 * theta_sum_score + 0.15 * adjacent_theta_score
  190. score += 0.35 * dist_score
  191. if len(points) % 2 == 0:
  192. position_score[(len(score) // 2 - 1)] += 1
  193. position_score[-1] += 1
  194. score += 0.1 * position_score
  195. pad_score = np.concatenate([score, score])
  196. score_matrix = np.zeros((len(score), len(score) - 3))
  197. x = np.arange(len(score) - 3) / float(len(score) - 4)
  198. gaussian = (
  199. 1.0
  200. / (np.sqrt(2.0 * np.pi) * 0.5)
  201. * np.exp(-np.power((x - 0.5) / 0.5, 2.0) / 2)
  202. )
  203. gaussian = gaussian / np.max(gaussian)
  204. for i in range(len(score)):
  205. score_matrix[i, :] = (
  206. score[i]
  207. + pad_score[(i + 2) : (i + len(score) - 1)] * gaussian * 0.3
  208. )
  209. head_start, tail_increment = np.unravel_index(
  210. score_matrix.argmax(), score_matrix.shape
  211. )
  212. tail_start = (head_start + tail_increment + 2) % len(points)
  213. head_end = (head_start + 1) % len(points)
  214. tail_end = (tail_start + 1) % len(points)
  215. if head_end > tail_end:
  216. head_start, tail_start = tail_start, head_start
  217. head_end, tail_end = tail_end, head_end
  218. head_inds = [head_start, head_end]
  219. tail_inds = [tail_start, tail_end]
  220. else:
  221. if self.vector_slope(points[1] - points[0]) + self.vector_slope(
  222. points[3] - points[2]
  223. ) < self.vector_slope(points[2] - points[1]) + self.vector_slope(
  224. points[0] - points[3]
  225. ):
  226. horizontal_edge_inds = [[0, 1], [2, 3]]
  227. vertical_edge_inds = [[3, 0], [1, 2]]
  228. else:
  229. horizontal_edge_inds = [[3, 0], [1, 2]]
  230. vertical_edge_inds = [[0, 1], [2, 3]]
  231. vertical_len_sum = norm(
  232. points[vertical_edge_inds[0][0]] - points[vertical_edge_inds[0][1]]
  233. ) + norm(
  234. points[vertical_edge_inds[1][0]] - points[vertical_edge_inds[1][1]]
  235. )
  236. horizontal_len_sum = norm(
  237. points[horizontal_edge_inds[0][0]] - points[horizontal_edge_inds[0][1]]
  238. ) + norm(
  239. points[horizontal_edge_inds[1][0]] - points[horizontal_edge_inds[1][1]]
  240. )
  241. if vertical_len_sum > horizontal_len_sum * orientation_thr:
  242. head_inds = horizontal_edge_inds[0]
  243. tail_inds = horizontal_edge_inds[1]
  244. else:
  245. head_inds = vertical_edge_inds[0]
  246. tail_inds = vertical_edge_inds[1]
  247. return head_inds, tail_inds
  248. def vector_angle(self, vec1, vec2):
  249. if vec1.ndim > 1:
  250. unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8).reshape((-1, 1))
  251. else:
  252. unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8)
  253. if vec2.ndim > 1:
  254. unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8).reshape((-1, 1))
  255. else:
  256. unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8)
  257. return np.arccos(np.clip(np.sum(unit_vec1 * unit_vec2, axis=-1), -1.0, 1.0))
  258. def get_minarea_rect(self, img, points):
  259. bounding_box = cv2.minAreaRect(points)
  260. points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
  261. index_a, index_b, index_c, index_d = 0, 1, 2, 3
  262. if points[1][1] > points[0][1]:
  263. index_a = 0
  264. index_d = 1
  265. else:
  266. index_a = 1
  267. index_d = 0
  268. if points[3][1] > points[2][1]:
  269. index_b = 2
  270. index_c = 3
  271. else:
  272. index_b = 3
  273. index_c = 2
  274. box = [points[index_a], points[index_b], points[index_c], points[index_d]]
  275. crop_img = self.get_rotate_crop_image(img, np.array(box))
  276. return crop_img, box
  277. def sample_points_on_bbox_bp(self, line, n=50):
  278. """Resample n points on a line.
  279. Args:
  280. line (ndarray): The points composing a line.
  281. n (int): The resampled points number.
  282. Returns:
  283. resampled_line (ndarray): The points composing the resampled line.
  284. """
  285. from numpy.linalg import norm
  286. # 断言检查输入参数的有效性
  287. assert line.ndim == 2
  288. assert line.shape[0] >= 2
  289. assert line.shape[1] == 2
  290. assert isinstance(n, int)
  291. assert n > 0
  292. length_list = [norm(line[i + 1] - line[i]) for i in range(len(line) - 1)]
  293. total_length = sum(length_list)
  294. length_cumsum = np.cumsum([0.0] + length_list)
  295. delta_length = total_length / (float(n) + 1e-8)
  296. current_edge_ind = 0
  297. resampled_line = [line[0]]
  298. for i in range(1, n):
  299. current_line_len = i * delta_length
  300. while (
  301. current_edge_ind + 1 < len(length_cumsum)
  302. and current_line_len >= length_cumsum[current_edge_ind + 1]
  303. ):
  304. current_edge_ind += 1
  305. current_edge_end_shift = current_line_len - length_cumsum[current_edge_ind]
  306. if current_edge_ind >= len(length_list):
  307. break
  308. end_shift_ratio = current_edge_end_shift / length_list[current_edge_ind]
  309. current_point = (
  310. line[current_edge_ind]
  311. + (line[current_edge_ind + 1] - line[current_edge_ind])
  312. * end_shift_ratio
  313. )
  314. resampled_line.append(current_point)
  315. resampled_line.append(line[-1])
  316. resampled_line = np.array(resampled_line)
  317. return resampled_line
  318. def sample_points_on_bbox(self, line, n=50):
  319. """Resample n points on a line.
  320. Args:
  321. line (ndarray): The points composing a line.
  322. n (int): The resampled points number.
  323. Returns:
  324. resampled_line (ndarray): The points composing the resampled line.
  325. """
  326. assert line.ndim == 2
  327. assert line.shape[0] >= 2
  328. assert line.shape[1] == 2
  329. assert isinstance(n, int)
  330. assert n > 0
  331. length_list = [norm(line[i + 1] - line[i]) for i in range(len(line) - 1)]
  332. total_length = sum(length_list)
  333. mean_length = total_length / (len(length_list) + 1e-8)
  334. group = [[0]]
  335. for i in range(len(length_list)):
  336. point_id = i + 1
  337. if length_list[i] < 0.9 * mean_length:
  338. for g in group:
  339. if i in g:
  340. g.append(point_id)
  341. break
  342. else:
  343. g = [point_id]
  344. group.append(g)
  345. top_tail_len = norm(line[0] - line[-1])
  346. if top_tail_len < 0.9 * mean_length:
  347. group[0].extend(g)
  348. group.remove(g)
  349. mean_positions = []
  350. for indices in group:
  351. x_sum = 0
  352. y_sum = 0
  353. for index in indices:
  354. x, y = line[index]
  355. x_sum += x
  356. y_sum += y
  357. num_points = len(indices)
  358. mean_x = x_sum / num_points
  359. mean_y = y_sum / num_points
  360. mean_positions.append((mean_x, mean_y))
  361. resampled_line = np.array(mean_positions)
  362. return resampled_line
  363. def get_poly_rect_crop(self, img, points):
  364. """
  365. 修改该函数,实现使用polygon,对不规则、弯曲文本的矫正以及crop
  366. args: img: 图片 ndarrary格式
  367. points: polygon格式的多点坐标 N*2 shape, ndarray格式
  368. return: 矫正后的图片 ndarray格式
  369. """
  370. points = np.array(points).astype(np.int32).reshape(-1, 2)
  371. temp_crop_img, temp_box = self.get_minarea_rect(img, points)
  372. # 计算最小外接矩形与polygon的IoU
  373. def get_union(pD, pG):
  374. return Polygon(pD).union(Polygon(pG)).area
  375. def get_intersection_over_union(pD, pG):
  376. return get_intersection(pD, pG) / (get_union(pD, pG) + 1e-10)
  377. def get_intersection(pD, pG):
  378. return Polygon(pD).intersection(Polygon(pG)).area
  379. cal_IoU = get_intersection_over_union(points, temp_box)
  380. if cal_IoU >= 0.7:
  381. points = self.sample_points_on_bbox_bp(points, 31)
  382. return temp_crop_img
  383. points_sample = self.sample_points_on_bbox(points)
  384. points_sample = points_sample.astype(np.int32)
  385. head_edge, tail_edge, top_line, bot_line = self.reorder_poly_edge(points_sample)
  386. resample_top_line = self.sample_points_on_bbox_bp(top_line, 15)
  387. resample_bot_line = self.sample_points_on_bbox_bp(bot_line, 15)
  388. sideline_mean_shift = np.mean(resample_top_line, axis=0) - np.mean(
  389. resample_bot_line, axis=0
  390. )
  391. if sideline_mean_shift[1] > 0:
  392. resample_bot_line, resample_top_line = resample_top_line, resample_bot_line
  393. rectifier = AutoRectifier()
  394. new_points = np.concatenate([resample_top_line, resample_bot_line])
  395. new_points_list = list(new_points.astype(np.float32).reshape(1, -1).tolist())
  396. if len(img.shape) == 2:
  397. img = np.stack((img,) * 3, axis=-1)
  398. img_crop, image = rectifier.run(img, new_points_list, mode="homography")
  399. return np.array(img_crop[0], dtype=np.uint8)
  400. class CropByBoxes(BaseComponent):
  401. """Crop Image by Box"""
  402. entities = "CropByBoxes"
  403. def __init__(self):
  404. super().__init__()
  405. def __call__(self, img, boxes):
  406. """__call__"""
  407. output_list = []
  408. for bbox_info in boxes:
  409. label_id = bbox_info["cls_id"]
  410. box = bbox_info["coordinate"]
  411. label = bbox_info.get("label", label_id)
  412. xmin, ymin, xmax, ymax = [int(i) for i in box]
  413. img_crop = img[ymin:ymax, xmin:xmax].copy()
  414. output_list.append({"img": img_crop, "box": box, "label": label})
  415. return output_list