utils.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698
  1. import os, sys
  2. import numpy as np
  3. from numpy import cos, sin, arctan, sqrt
  4. import cv2
  5. import copy
  6. import time
  7. def Homography(image, img_points, world_width, world_height,
  8. interpolation=cv2.INTER_CUBIC, ratio_width=1.0, ratio_height=1.0):
  9. """
  10. 将图像透视变换到新的视角,返回变换后的图像。
  11. Args:
  12. image (np.ndarray): 输入的图像,应为numpy数组类型。
  13. img_points (List[Tuple[int, int]]): 图像上的四个点的坐标,顺序为左上角、右上角、右下角、左下角。
  14. world_width (int): 变换后图像在世界坐标系中的宽度。
  15. world_height (int): 变换后图像在世界坐标系中的高度。
  16. interpolation (int, optional): 插值方式,默认为cv2.INTER_CUBIC。
  17. ratio_width (float, optional): 变换后图像在x轴上的缩放比例,默认为1.0。
  18. ratio_height (float, optional): 变换后图像在y轴上的缩放比例,默认为1.0。
  19. Returns:
  20. np.ndarray: 变换后的图像,为numpy数组类型。
  21. """
  22. _points = np.array(img_points).reshape(-1, 2).astype(np.float32)
  23. expand_x = int(0.5 * world_width * (ratio_width - 1))
  24. expand_y = int(0.5 * world_height * (ratio_height - 1))
  25. pt_lefttop = [expand_x, expand_y]
  26. pt_righttop = [expand_x + world_width, expand_y]
  27. pt_leftbottom = [expand_x + world_width, expand_y + world_height]
  28. pt_rightbottom = [expand_x, expand_y + world_height]
  29. pts_std = np.float32([pt_lefttop, pt_righttop,
  30. pt_leftbottom, pt_rightbottom])
  31. img_crop_width = int(world_width * ratio_width)
  32. img_crop_height = int(world_height * ratio_height)
  33. M = cv2.getPerspectiveTransform(_points, pts_std)
  34. dst_img = cv2.warpPerspective(
  35. image,
  36. M, (img_crop_width, img_crop_height),
  37. borderMode=cv2.BORDER_CONSTANT, # BORDER_CONSTANT BORDER_REPLICATE
  38. flags=interpolation)
  39. return dst_img
  40. class CurveTextRectifier:
  41. """
  42. spatial transformer via monocular vision
  43. """
  44. def __init__(self):
  45. self.get_virtual_camera_parameter()
  46. def get_virtual_camera_parameter(self):
  47. vcam_thz = 0
  48. vcam_thx1 = 180
  49. vcam_thy = 180
  50. vcam_thx2 = 0
  51. vcam_x = 0
  52. vcam_y = 0
  53. vcam_z = 100
  54. radian = np.pi / 180
  55. angle_z = radian * vcam_thz
  56. angle_x1 = radian * vcam_thx1
  57. angle_y = radian * vcam_thy
  58. angle_x2 = radian * vcam_thx2
  59. optic_x = vcam_x
  60. optic_y = vcam_y
  61. optic_z = vcam_z
  62. fu = 100
  63. fv = 100
  64. matT = np.zeros((4, 4))
  65. matT[0, 0] = cos(angle_z) * cos(angle_y) - sin(angle_z) * sin(angle_x1) * sin(angle_y)
  66. matT[0, 1] = cos(angle_z) * sin(angle_y) * sin(angle_x2) - sin(angle_z) * (
  67. cos(angle_x1) * cos(angle_x2) - sin(angle_x1) * cos(angle_y) * sin(angle_x2))
  68. matT[0, 2] = cos(angle_z) * sin(angle_y) * cos(angle_x2) + sin(angle_z) * (
  69. cos(angle_x1) * sin(angle_x2) + sin(angle_x1) * cos(angle_y) * cos(angle_x2))
  70. matT[0, 3] = optic_x
  71. matT[1, 0] = sin(angle_z) * cos(angle_y) + cos(angle_z) * sin(angle_x1) * sin(angle_y)
  72. matT[1, 1] = sin(angle_z) * sin(angle_y) * sin(angle_x2) + cos(angle_z) * (
  73. cos(angle_x1) * cos(angle_x2) - sin(angle_x1) * cos(angle_y) * sin(angle_x2))
  74. matT[1, 2] = sin(angle_z) * sin(angle_y) * cos(angle_x2) - cos(angle_z) * (
  75. cos(angle_x1) * sin(angle_x2) + sin(angle_x1) * cos(angle_y) * cos(angle_x2))
  76. matT[1, 3] = optic_y
  77. matT[2, 0] = -cos(angle_x1) * sin(angle_y)
  78. matT[2, 1] = cos(angle_x1) * cos(angle_y) * sin(angle_x2) + sin(angle_x1) * cos(angle_x2)
  79. matT[2, 2] = cos(angle_x1) * cos(angle_y) * cos(angle_x2) - sin(angle_x1) * sin(angle_x2)
  80. matT[2, 3] = optic_z
  81. matT[3, 0] = 0
  82. matT[3, 1] = 0
  83. matT[3, 2] = 0
  84. matT[3, 3] = 1
  85. matS = np.zeros((4, 4))
  86. matS[2, 3] = 0.5
  87. matS[3, 2] = 0.5
  88. self.ifu = 1 / fu
  89. self.ifv = 1 / fv
  90. self.matT = matT
  91. self.matS = matS
  92. self.K = np.dot(matT.T, matS)
  93. self.K = np.dot(self.K, matT)
  94. def vertical_text_process(self, points, org_size):
  95. """
  96. change sequence amd process
  97. :param points:
  98. :param org_size:
  99. :return:
  100. """
  101. org_w, org_h = org_size
  102. _points = np.array(points).reshape(-1).tolist()
  103. _points = np.array(_points[2:] + _points[:2]).reshape(-1, 2)
  104. # convert to horizontal points
  105. adjusted_points = np.zeros(_points.shape, dtype=np.float32)
  106. adjusted_points[:, 0] = _points[:, 1]
  107. adjusted_points[:, 1] = org_h - _points[:, 0] - 1
  108. _image_coord, _world_coord, _new_image_size = self.horizontal_text_process(adjusted_points)
  109. # # convert to vertical points back
  110. image_coord = _points.reshape(1, -1, 2)
  111. world_coord = np.zeros(_world_coord.shape, dtype=np.float32)
  112. world_coord[:, :, 0] = 0 - _world_coord[:, :, 1]
  113. world_coord[:, :, 1] = _world_coord[:, :, 0]
  114. world_coord[:, :, 2] = _world_coord[:, :, 2]
  115. new_image_size = (_new_image_size[1], _new_image_size[0])
  116. return image_coord, world_coord, new_image_size
  117. def horizontal_text_process(self, points):
  118. """
  119. get image coordinate and world coordinate
  120. :param points:
  121. :return:
  122. """
  123. poly = np.array(points).reshape(-1)
  124. dx_list = []
  125. dy_list = []
  126. for i in range(1, len(poly) // 2):
  127. xdx = poly[i * 2] - poly[(i - 1) * 2]
  128. xdy = poly[i * 2 + 1] - poly[(i - 1) * 2 + 1]
  129. d = sqrt(xdx ** 2 + xdy ** 2)
  130. dx_list.append(d)
  131. for i in range(0, len(poly) // 4):
  132. ydx = poly[i * 2] - poly[len(poly) - 1 - (i * 2 + 1)]
  133. ydy = poly[i * 2 + 1] - poly[len(poly) - 1 - (i * 2)]
  134. d = sqrt(ydx ** 2 + ydy ** 2)
  135. dy_list.append(d)
  136. dx_list = [(dx_list[i] + dx_list[len(dx_list) - 1 - i]) / 2 for i in range(len(dx_list) // 2)]
  137. height = np.around(np.mean(dy_list))
  138. rect_coord = [0, 0]
  139. for i in range(0, len(poly) // 4 - 1):
  140. x = rect_coord[-2]
  141. x += dx_list[i]
  142. y = 0
  143. rect_coord.append(x)
  144. rect_coord.append(y)
  145. rect_coord_half = copy.deepcopy(rect_coord)
  146. for i in range(0, len(poly) // 4):
  147. x = rect_coord_half[len(rect_coord_half) - 2 * i - 2]
  148. y = height
  149. rect_coord.append(x)
  150. rect_coord.append(y)
  151. np_rect_coord = np.array(rect_coord).reshape(-1, 2)
  152. x_min = np.min(np_rect_coord[:, 0])
  153. y_min = np.min(np_rect_coord[:, 1])
  154. x_max = np.max(np_rect_coord[:, 0])
  155. y_max = np.max(np_rect_coord[:, 1])
  156. new_image_size = (int(x_max - x_min + 0.5), int(y_max - y_min + 0.5))
  157. x_mean = (x_max - x_min) / 2
  158. y_mean = (y_max - y_min) / 2
  159. np_rect_coord[:, 0] -= x_mean
  160. np_rect_coord[:, 1] -= y_mean
  161. rect_coord = np_rect_coord.reshape(-1).tolist()
  162. rect_coord = np.array(rect_coord).reshape(-1, 2)
  163. world_coord = np.ones((len(rect_coord), 3)) * 0
  164. world_coord[:, :2] = rect_coord
  165. image_coord = np.array(poly).reshape(1, -1, 2)
  166. world_coord = world_coord.reshape(1, -1, 3)
  167. return image_coord, world_coord, new_image_size
  168. def horizontal_text_estimate(self, points):
  169. """
  170. horizontal or vertical text
  171. :param points:
  172. :return:
  173. """
  174. pts = np.array(points).reshape(-1, 2)
  175. x_min = int(np.min(pts[:, 0]))
  176. y_min = int(np.min(pts[:, 1]))
  177. x_max = int(np.max(pts[:, 0]))
  178. y_max = int(np.max(pts[:, 1]))
  179. x = x_max - x_min
  180. y = y_max - y_min
  181. is_horizontal_text = True
  182. if y / x > 1.5: # vertical text condition
  183. is_horizontal_text = False
  184. return is_horizontal_text
  185. def virtual_camera_to_world(self, size):
  186. ifu, ifv = self.ifu, self.ifv
  187. K, matT = self.K, self.matT
  188. ppu = size[0] / 2 + 1e-6
  189. ppv = size[1] / 2 + 1e-6
  190. P = np.zeros((size[1], size[0], 3))
  191. lu = np.array([i for i in range(size[0])])
  192. lv = np.array([i for i in range(size[1])])
  193. u, v = np.meshgrid(lu, lv)
  194. yp = (v - ppv) * ifv
  195. xp = (u - ppu) * ifu
  196. angle_a = arctan(sqrt(xp * xp + yp * yp))
  197. angle_b = arctan(yp / xp)
  198. D0 = sin(angle_a) * cos(angle_b)
  199. D1 = sin(angle_a) * sin(angle_b)
  200. D2 = cos(angle_a)
  201. D0[xp <= 0] = -D0[xp <= 0]
  202. D1[xp <= 0] = -D1[xp <= 0]
  203. ratio_a = K[0, 0] * D0 * D0 + K[1, 1] * D1 * D1 + K[2, 2] * D2 * D2 + \
  204. (K[0, 1] + K[1, 0]) * D0 * D1 + (K[0, 2] + K[2, 0]) * D0 * D2 + (K[1, 2] + K[2, 1]) * D1 * D2
  205. ratio_b = (K[0, 3] + K[3, 0]) * D0 + (K[1, 3] + K[3, 1]) * D1 + (K[2, 3] + K[3, 2]) * D2
  206. ratio_c = K[3, 3] * np.ones(ratio_b.shape)
  207. delta = ratio_b * ratio_b - 4 * ratio_a * ratio_c
  208. t = np.zeros(delta.shape)
  209. t[ratio_a == 0] = -ratio_c[ratio_a == 0] / ratio_b[ratio_a == 0]
  210. t[ratio_a != 0] = (-ratio_b[ratio_a != 0] + sqrt(delta[ratio_a != 0])) / (2 * ratio_a[ratio_a != 0])
  211. t[delta < 0] = 0
  212. P[:, :, 0] = matT[0, 3] + t * (matT[0, 0] * D0 + matT[0, 1] * D1 + matT[0, 2] * D2)
  213. P[:, :, 1] = matT[1, 3] + t * (matT[1, 0] * D0 + matT[1, 1] * D1 + matT[1, 2] * D2)
  214. P[:, :, 2] = matT[2, 3] + t * (matT[2, 0] * D0 + matT[2, 1] * D1 + matT[2, 2] * D2)
  215. return P
  216. def world_to_image(self, image_size, world, intrinsic, distCoeffs, rotation, tvec):
  217. r11 = rotation[0, 0]
  218. r12 = rotation[0, 1]
  219. r13 = rotation[0, 2]
  220. r21 = rotation[1, 0]
  221. r22 = rotation[1, 1]
  222. r23 = rotation[1, 2]
  223. r31 = rotation[2, 0]
  224. r32 = rotation[2, 1]
  225. r33 = rotation[2, 2]
  226. t1 = tvec[0]
  227. t2 = tvec[1]
  228. t3 = tvec[2]
  229. k1 = distCoeffs[0]
  230. k2 = distCoeffs[1]
  231. p1 = distCoeffs[2]
  232. p2 = distCoeffs[3]
  233. k3 = distCoeffs[4]
  234. k4 = distCoeffs[5]
  235. k5 = distCoeffs[6]
  236. k6 = distCoeffs[7]
  237. if len(distCoeffs) > 8:
  238. s1 = distCoeffs[8]
  239. s2 = distCoeffs[9]
  240. s3 = distCoeffs[10]
  241. s4 = distCoeffs[11]
  242. else:
  243. s1 = s2 = s3 = s4 = 0
  244. if len(distCoeffs) > 12:
  245. tx = distCoeffs[12]
  246. ty = distCoeffs[13]
  247. else:
  248. tx = ty = 0
  249. fu = intrinsic[0, 0]
  250. fv = intrinsic[1, 1]
  251. ppu = intrinsic[0, 2]
  252. ppv = intrinsic[1, 2]
  253. cos_tx = cos(tx)
  254. cos_ty = cos(ty)
  255. sin_tx = sin(tx)
  256. sin_ty = sin(ty)
  257. tao11 = cos_ty * cos_tx * cos_ty + sin_ty * cos_tx * sin_ty
  258. tao12 = cos_ty * cos_tx * sin_ty * sin_tx - sin_ty * cos_tx * cos_ty * sin_tx
  259. tao13 = -cos_ty * cos_tx * sin_ty * cos_tx + sin_ty * cos_tx * cos_ty * cos_tx
  260. tao21 = -sin_tx * sin_ty
  261. tao22 = cos_ty * cos_tx * cos_tx + sin_tx * cos_ty * sin_tx
  262. tao23 = cos_ty * cos_tx * sin_tx - sin_tx * cos_ty * cos_tx
  263. P = np.zeros((image_size[1], image_size[0], 2))
  264. c3 = r31 * world[:, :, 0] + r32 * world[:, :, 1] + r33 * world[:, :, 2] + t3
  265. c1 = r11 * world[:, :, 0] + r12 * world[:, :, 1] + r13 * world[:, :, 2] + t1
  266. c2 = r21 * world[:, :, 0] + r22 * world[:, :, 1] + r23 * world[:, :, 2] + t2
  267. x1 = c1 / c3
  268. y1 = c2 / c3
  269. x12 = x1 * x1
  270. y12 = y1 * y1
  271. x1y1 = 2 * x1 * y1
  272. r2 = x12 + y12
  273. r4 = r2 * r2
  274. r6 = r2 * r4
  275. radial_distortion = (1 + k1 * r2 + k2 * r4 + k3 * r6) / (1 + k4 * r2 + k5 * r4 + k6 * r6)
  276. x2 = x1 * radial_distortion + p1 * x1y1 + p2 * (r2 + 2 * x12) + s1 * r2 + s2 * r4
  277. y2 = y1 * radial_distortion + p2 * x1y1 + p1 * (r2 + 2 * y12) + s3 * r2 + s4 * r4
  278. x3 = tao11 * x2 + tao12 * y2 + tao13
  279. y3 = tao21 * x2 + tao22 * y2 + tao23
  280. P[:, :, 0] = fu * x3 + ppu
  281. P[:, :, 1] = fv * y3 + ppv
  282. P[c3 <= 0] = 0
  283. return P
  284. def spatial_transform(self, image_data, new_image_size, mtx, dist, rvecs, tvecs, interpolation):
  285. rotation, _ = cv2.Rodrigues(rvecs)
  286. world_map = self.virtual_camera_to_world(new_image_size)
  287. image_map = self.world_to_image(new_image_size, world_map, mtx, dist, rotation, tvecs)
  288. image_map = image_map.astype(np.float32)
  289. dst = cv2.remap(image_data, image_map[:, :, 0], image_map[:, :, 1], interpolation)
  290. return dst
  291. def calibrate(self, org_size, image_coord, world_coord):
  292. """
  293. calibration
  294. :param org_size:
  295. :param image_coord:
  296. :param world_coord:
  297. :return:
  298. """
  299. # flag = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL | cv2.CALIB_THIN_PRISM_MODEL
  300. flag = cv2.CALIB_RATIONAL_MODEL
  301. flag2 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL
  302. flag3 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_THIN_PRISM_MODEL
  303. flag4 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_ZERO_TANGENT_DIST | cv2.CALIB_FIX_ASPECT_RATIO
  304. flag5 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL | cv2.CALIB_ZERO_TANGENT_DIST
  305. flag6 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_FIX_ASPECT_RATIO
  306. flag_list = [flag2, flag3, flag4, flag5, flag6]
  307. ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(world_coord.astype(np.float32),
  308. image_coord.astype(np.float32),
  309. org_size,
  310. None,
  311. None,
  312. flags=flag)
  313. if ret > 2:
  314. # strategies
  315. min_ret = ret
  316. for i, flag in enumerate(flag_list):
  317. _ret, _mtx, _dist, _rvecs, _tvecs = cv2.calibrateCamera(world_coord.astype(np.float32),
  318. image_coord.astype(np.float32),
  319. org_size,
  320. None,
  321. None,
  322. flags=flag)
  323. if _ret < min_ret:
  324. min_ret = _ret
  325. ret, mtx, dist, rvecs, tvecs = _ret, _mtx, _dist, _rvecs, _tvecs
  326. return ret, mtx, dist, rvecs, tvecs
  327. def dc_homo(self, img, img_points, obj_points, is_horizontal_text, interpolation=cv2.INTER_LINEAR,
  328. ratio_width=1.0, ratio_height=1.0):
  329. """
  330. divide and conquer: homography
  331. # ratio_width and ratio_height must be 1.0 here
  332. """
  333. _img_points = img_points.reshape(-1, 2)
  334. _obj_points = obj_points.reshape(-1, 3)
  335. homo_img_list = []
  336. width_list = []
  337. height_list = []
  338. # divide and conquer
  339. for i in range(len(_img_points) // 2 - 1):
  340. new_img_points = np.zeros((4, 2)).astype(np.float32)
  341. new_obj_points = np.zeros((4, 2)).astype(np.float32)
  342. new_img_points[0:2, :] = _img_points[i:(i + 2), :2]
  343. new_img_points[2:4, :] = _img_points[::-1, :][i:(i + 2), :2][::-1, :]
  344. new_obj_points[0:2, :] = _obj_points[i:(i + 2), :2]
  345. new_obj_points[2:4, :] = _obj_points[::-1, :][i:(i + 2), :2][::-1, :]
  346. if is_horizontal_text:
  347. world_width = np.abs(new_obj_points[1, 0] - new_obj_points[0, 0])
  348. world_height = np.abs(new_obj_points[3, 1] - new_obj_points[0, 1])
  349. else:
  350. world_width = np.abs(new_obj_points[1, 1] - new_obj_points[0, 1])
  351. world_height = np.abs(new_obj_points[3, 0] - new_obj_points[0, 0])
  352. homo_img = Homography(img, new_img_points, world_width, world_height,
  353. interpolation=interpolation,
  354. ratio_width=ratio_width, ratio_height=ratio_height)
  355. homo_img_list.append(homo_img)
  356. _h, _w = homo_img.shape[:2]
  357. width_list.append(_w)
  358. height_list.append(_h)
  359. # stitching
  360. rectified_image = np.zeros((np.max(height_list), sum(width_list), 3)).astype(np.uint8)
  361. st = 0
  362. for (homo_img, w, h) in zip(homo_img_list, width_list, height_list):
  363. rectified_image[:h, st:st + w, :] = homo_img
  364. st += w
  365. if not is_horizontal_text:
  366. # vertical rotation
  367. rectified_image = np.rot90(rectified_image, 3)
  368. return rectified_image
  369. def Homography(self, image, img_points, world_width, world_height,
  370. interpolation=cv2.INTER_CUBIC, ratio_width=1.0, ratio_height=1.0):
  371. """
  372. 将图像透视变换到新的视角,返回变换后的图像。
  373. Args:
  374. image (np.ndarray): 输入的图像,应为numpy数组类型。
  375. img_points (List[Tuple[int, int]]): 图像上的四个点的坐标,顺序为左上角、右上角、右下角、左下角。
  376. world_width (int): 变换后图像在世界坐标系中的宽度。
  377. world_height (int): 变换后图像在世界坐标系中的高度。
  378. interpolation (int, optional): 插值方式,默认为cv2.INTER_CUBIC。
  379. ratio_width (float, optional): 变换后图像在x轴上的缩放比例,默认为1.0。
  380. ratio_height (float, optional): 变换后图像在y轴上的缩放比例,默认为1.0。
  381. Returns:
  382. np.ndarray: 变换后的图像,为numpy数组类型。
  383. """
  384. _points = np.array(img_points).reshape(-1, 2).astype(np.float32)
  385. expand_x = int(0.5 * world_width * (ratio_width - 1))
  386. expand_y = int(0.5 * world_height * (ratio_height - 1))
  387. pt_lefttop = [expand_x, expand_y]
  388. pt_righttop = [expand_x + world_width, expand_y]
  389. pt_leftbottom = [expand_x + world_width, expand_y + world_height]
  390. pt_rightbottom = [expand_x, expand_y + world_height]
  391. pts_std = np.float32([pt_lefttop, pt_righttop,
  392. pt_leftbottom, pt_rightbottom])
  393. img_crop_width = int(world_width * ratio_width)
  394. img_crop_height = int(world_height * ratio_height)
  395. M = cv2.getPerspectiveTransform(_points, pts_std)
  396. dst_img = cv2.warpPerspective(
  397. image,
  398. M, (img_crop_width, img_crop_height),
  399. borderMode=cv2.BORDER_CONSTANT, # BORDER_CONSTANT BORDER_REPLICATE
  400. flags=interpolation)
  401. return dst_img
  402. def __call__(self, image_data, points, interpolation=cv2.INTER_LINEAR, ratio_width=1.0, ratio_height=1.0, mode='calibration'):
  403. """
  404. spatial transform for a poly text
  405. :param image_data:
  406. :param points: [x1,y1,x2,y2,x3,y3,...], clockwise order, (x1,y1) must be the top-left of first char.
  407. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  408. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  409. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  410. :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
  411. :return:
  412. """
  413. org_h, org_w = image_data.shape[:2]
  414. org_size = (org_w, org_h)
  415. self.image = image_data
  416. is_horizontal_text = self.horizontal_text_estimate(points)
  417. if is_horizontal_text:
  418. image_coord, world_coord, new_image_size = self.horizontal_text_process(points)
  419. else:
  420. image_coord, world_coord, new_image_size = self.vertical_text_process(points, org_size)
  421. if mode.lower() == 'calibration':
  422. ret, mtx, dist, rvecs, tvecs = self.calibrate(org_size, image_coord, world_coord)
  423. st_size = (int(new_image_size[0]*ratio_width), int(new_image_size[1]*ratio_height))
  424. dst = self.spatial_transform(image_data, st_size, mtx, dist[0], rvecs[0], tvecs[0], interpolation)
  425. elif mode.lower() == 'homography':
  426. # ratio_width and ratio_height must be 1.0 here and ret set to 0.01 without loss manually
  427. ret = 0.01
  428. dst = self.dc_homo(image_data, image_coord, world_coord, is_horizontal_text,
  429. interpolation=interpolation, ratio_width=1.0, ratio_height=1.0)
  430. else:
  431. raise ValueError('mode must be ["calibration", "homography"], but got {}'.format(mode))
  432. return dst, ret
  433. class AutoRectifier:
  434. def __init__(self):
  435. self.npoints = 10
  436. self.curveTextRectifier = CurveTextRectifier()
  437. @staticmethod
  438. def get_rotate_crop_image(img, points, interpolation=cv2.INTER_CUBIC, ratio_width=1.0, ratio_height=1.0):
  439. """
  440. crop or homography
  441. :param img:
  442. :param points:
  443. :param interpolation:
  444. :param ratio_width:
  445. :param ratio_height:
  446. :return:
  447. """
  448. h, w = img.shape[:2]
  449. _points = np.array(points).reshape(-1, 2).astype(np.float32)
  450. if len(_points) != 4:
  451. x_min = int(np.min(_points[:, 0]))
  452. y_min = int(np.min(_points[:, 1]))
  453. x_max = int(np.max(_points[:, 0]))
  454. y_max = int(np.max(_points[:, 1]))
  455. dx = x_max - x_min
  456. dy = y_max - y_min
  457. expand_x = int(0.5 * dx * (ratio_width - 1))
  458. expand_y = int(0.5 * dy * (ratio_height - 1))
  459. x_min = np.clip(int(x_min - expand_x), 0, w - 1)
  460. y_min = np.clip(int(y_min - expand_y), 0, h - 1)
  461. x_max = np.clip(int(x_max + expand_x), 0, w - 1)
  462. y_max = np.clip(int(y_max + expand_y), 0, h - 1)
  463. dst_img = img[y_min:y_max, x_min:x_max, :].copy()
  464. else:
  465. img_crop_width = int(
  466. max(
  467. np.linalg.norm(_points[0] - _points[1]),
  468. np.linalg.norm(_points[2] - _points[3])))
  469. img_crop_height = int(
  470. max(
  471. np.linalg.norm(_points[0] - _points[3]),
  472. np.linalg.norm(_points[1] - _points[2])))
  473. dst_img = Homography(img, _points, img_crop_width, img_crop_height, interpolation, ratio_width, ratio_height)
  474. return dst_img
  475. def visualize(self, image_data, points_list):
  476. visualization = image_data.copy()
  477. for box in points_list:
  478. box = np.array(box).reshape(-1, 2).astype(np.int32)
  479. cv2.drawContours(visualization, [np.array(box).reshape((-1, 1, 2))], -1, (0, 0, 255), 2)
  480. for i, p in enumerate(box):
  481. if i != 0:
  482. cv2.circle(visualization, tuple(p), radius=1, color=(255, 0, 0), thickness=2)
  483. else:
  484. cv2.circle(visualization, tuple(p), radius=1, color=(255, 255, 0), thickness=2)
  485. return visualization
  486. def __call__(self, image_data, points, interpolation=cv2.INTER_LINEAR,
  487. ratio_width=1.0, ratio_height=1.0, loss_thresh=5.0, mode='calibration'):
  488. """
  489. rectification in strategies for a poly text
  490. :param image_data:
  491. :param points: [x1,y1,x2,y2,x3,y3,...], clockwise order, (x1,y1) must be the top-left of first char.
  492. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  493. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  494. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  495. :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
  496. :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
  497. :return:
  498. """
  499. _points = np.array(points).reshape(-1,2)
  500. if len(_points) >= self.npoints and len(_points) % 2 == 0:
  501. try:
  502. curveTextRectifier = CurveTextRectifier()
  503. dst_img, loss = curveTextRectifier(image_data, points, interpolation, ratio_width, ratio_height, mode)
  504. if loss >= 2:
  505. # for robust
  506. # large loss means it cannot be reconstruct correctly, we must find other way to reconstruct
  507. img_list, loss_list = [dst_img], [loss]
  508. _dst_img, _loss = PlanB()(image_data, points, curveTextRectifier,
  509. interpolation, ratio_width, ratio_height,
  510. loss_thresh=loss_thresh,
  511. square=True)
  512. img_list += [_dst_img]
  513. loss_list += [_loss]
  514. _dst_img, _loss = PlanB()(image_data, points, curveTextRectifier,
  515. interpolation, ratio_width, ratio_height,
  516. loss_thresh=loss_thresh, square=False)
  517. img_list += [_dst_img]
  518. loss_list += [_loss]
  519. min_loss = min(loss_list)
  520. dst_img = img_list[loss_list.index(min_loss)]
  521. if min_loss >= loss_thresh:
  522. print('calibration loss: {} is too large for spatial transformer. It is failed. Using get_rotate_crop_image'.format(loss))
  523. dst_img = self.get_rotate_crop_image(image_data, points, interpolation, ratio_width, ratio_height)
  524. print('here')
  525. except Exception as e:
  526. print(e)
  527. dst_img = self.get_rotate_crop_image(image_data, points, interpolation, ratio_width, ratio_height)
  528. else:
  529. dst_img = self.get_rotate_crop_image(image_data, _points, interpolation, ratio_width, ratio_height)
  530. return dst_img
  531. def run(self, image_data, points_list, interpolation=cv2.INTER_LINEAR,
  532. ratio_width=1.0, ratio_height=1.0, loss_thresh=5.0, mode='calibration'):
  533. """
  534. run for texts in an image
  535. :param image_data: numpy.ndarray. The shape is [h, w, 3]
  536. :param points_list: [[x1,y1,x2,y2,x3,y3,...], [x1,y1,x2,y2,x3,y3,...], ...], clockwise order, (x1,y1) must be the top-left of first char.
  537. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  538. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  539. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  540. :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
  541. :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
  542. :return: res: roi-image list, visualized_image: draw polys in original image
  543. """
  544. if image_data is None:
  545. raise ValueError
  546. if not isinstance(points_list, list):
  547. raise ValueError
  548. for points in points_list:
  549. if not isinstance(points, list):
  550. raise ValueError
  551. if ratio_width < 1.0 or ratio_height < 1.0:
  552. raise ValueError('ratio_width and ratio_height cannot be smaller than 1, but got {}', (ratio_width, ratio_height))
  553. if mode.lower() != 'calibration' and mode.lower() != 'homography':
  554. raise ValueError('mode must be ["calibration", "homography"], but got {}'.format(mode))
  555. if mode.lower() == 'homography' and ratio_width != 1.0 and ratio_height != 1.0:
  556. raise ValueError('ratio_width and ratio_height must be 1.0 when mode is homography, but got mode:{}, ratio:({},{})'.format(mode, ratio_width, ratio_height))
  557. res = []
  558. for points in points_list:
  559. rectified_img = self(image_data, points, interpolation, ratio_width, ratio_height,
  560. loss_thresh=loss_thresh, mode=mode)
  561. res.append(rectified_img)
  562. # visualize
  563. visualized_image = self.visualize(image_data, points_list)
  564. return res, visualized_image