seal_det_warp.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os, sys
  15. import numpy as np
  16. from numpy import cos, sin, arctan, sqrt
  17. import cv2
  18. import copy
  19. import time
  20. def Homography(
  21. image,
  22. img_points,
  23. world_width,
  24. world_height,
  25. interpolation=cv2.INTER_CUBIC,
  26. ratio_width=1.0,
  27. ratio_height=1.0,
  28. ):
  29. _points = np.array(img_points).reshape(-1, 2).astype(np.float32)
  30. expand_x = int(0.5 * world_width * (ratio_width - 1))
  31. expand_y = int(0.5 * world_height * (ratio_height - 1))
  32. pt_lefttop = [expand_x, expand_y]
  33. pt_righttop = [expand_x + world_width, expand_y]
  34. pt_leftbottom = [expand_x + world_width, expand_y + world_height]
  35. pt_rightbottom = [expand_x, expand_y + world_height]
  36. pts_std = np.float32([pt_lefttop, pt_righttop, pt_leftbottom, pt_rightbottom])
  37. img_crop_width = int(world_width * ratio_width)
  38. img_crop_height = int(world_height * ratio_height)
  39. M = cv2.getPerspectiveTransform(_points, pts_std)
  40. dst_img = cv2.warpPerspective(
  41. image,
  42. M,
  43. (img_crop_width, img_crop_height),
  44. borderMode=cv2.BORDER_CONSTANT, # BORDER_CONSTANT BORDER_REPLICATE
  45. flags=interpolation,
  46. )
  47. return dst_img
  48. class PlanB:
  49. def __call__(
  50. self,
  51. image,
  52. points,
  53. curveTextRectifier,
  54. interpolation=cv2.INTER_LINEAR,
  55. ratio_width=1.0,
  56. ratio_height=1.0,
  57. loss_thresh=5.0,
  58. square=False,
  59. ):
  60. """
  61. Plan B using sub-image when it failed in original image
  62. :param image:
  63. :param points:
  64. :param curveTextRectifier: CurveTextRectifier
  65. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  66. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  67. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  68. :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
  69. :param square: crop square image or not. True or False. The default is False
  70. :return:
  71. """
  72. h, w = image.shape[:2]
  73. _points = np.array(points).reshape(-1, 2).astype(np.float32)
  74. x_min = int(np.min(_points[:, 0]))
  75. y_min = int(np.min(_points[:, 1]))
  76. x_max = int(np.max(_points[:, 0]))
  77. y_max = int(np.max(_points[:, 1]))
  78. dx = x_max - x_min
  79. dy = y_max - y_min
  80. max_d = max(dx, dy)
  81. mean_pt = np.mean(_points, 0)
  82. expand_x = (ratio_width - 1.0) * 0.5 * max_d
  83. expand_y = (ratio_height - 1.0) * 0.5 * max_d
  84. if square:
  85. x_min = np.clip(int(mean_pt[0] - max_d - expand_x), 0, w - 1)
  86. y_min = np.clip(int(mean_pt[1] - max_d - expand_y), 0, h - 1)
  87. x_max = np.clip(int(mean_pt[0] + max_d + expand_x), 0, w - 1)
  88. y_max = np.clip(int(mean_pt[1] + max_d + expand_y), 0, h - 1)
  89. else:
  90. x_min = np.clip(int(x_min - expand_x), 0, w - 1)
  91. y_min = np.clip(int(y_min - expand_y), 0, h - 1)
  92. x_max = np.clip(int(x_max + expand_x), 0, w - 1)
  93. y_max = np.clip(int(y_max + expand_y), 0, h - 1)
  94. new_image = image[y_min:y_max, x_min:x_max, :].copy()
  95. new_points = _points.copy()
  96. new_points[:, 0] -= x_min
  97. new_points[:, 1] -= y_min
  98. dst_img, loss = curveTextRectifier(
  99. new_image,
  100. new_points,
  101. interpolation,
  102. ratio_width,
  103. ratio_height,
  104. mode="calibration",
  105. )
  106. return dst_img, loss
  107. class CurveTextRectifier:
  108. """
  109. spatial transformer via monocular vision
  110. """
  111. def __init__(self):
  112. self.get_virtual_camera_parameter()
  113. def get_virtual_camera_parameter(self):
  114. vcam_thz = 0
  115. vcam_thx1 = 180
  116. vcam_thy = 180
  117. vcam_thx2 = 0
  118. vcam_x = 0
  119. vcam_y = 0
  120. vcam_z = 100
  121. radian = np.pi / 180
  122. angle_z = radian * vcam_thz
  123. angle_x1 = radian * vcam_thx1
  124. angle_y = radian * vcam_thy
  125. angle_x2 = radian * vcam_thx2
  126. optic_x = vcam_x
  127. optic_y = vcam_y
  128. optic_z = vcam_z
  129. fu = 100
  130. fv = 100
  131. matT = np.zeros((4, 4))
  132. matT[0, 0] = cos(angle_z) * cos(angle_y) - sin(angle_z) * sin(angle_x1) * sin(
  133. angle_y
  134. )
  135. matT[0, 1] = cos(angle_z) * sin(angle_y) * sin(angle_x2) - sin(angle_z) * (
  136. cos(angle_x1) * cos(angle_x2) - sin(angle_x1) * cos(angle_y) * sin(angle_x2)
  137. )
  138. matT[0, 2] = cos(angle_z) * sin(angle_y) * cos(angle_x2) + sin(angle_z) * (
  139. cos(angle_x1) * sin(angle_x2) + sin(angle_x1) * cos(angle_y) * cos(angle_x2)
  140. )
  141. matT[0, 3] = optic_x
  142. matT[1, 0] = sin(angle_z) * cos(angle_y) + cos(angle_z) * sin(angle_x1) * sin(
  143. angle_y
  144. )
  145. matT[1, 1] = sin(angle_z) * sin(angle_y) * sin(angle_x2) + cos(angle_z) * (
  146. cos(angle_x1) * cos(angle_x2) - sin(angle_x1) * cos(angle_y) * sin(angle_x2)
  147. )
  148. matT[1, 2] = sin(angle_z) * sin(angle_y) * cos(angle_x2) - cos(angle_z) * (
  149. cos(angle_x1) * sin(angle_x2) + sin(angle_x1) * cos(angle_y) * cos(angle_x2)
  150. )
  151. matT[1, 3] = optic_y
  152. matT[2, 0] = -cos(angle_x1) * sin(angle_y)
  153. matT[2, 1] = cos(angle_x1) * cos(angle_y) * sin(angle_x2) + sin(angle_x1) * cos(
  154. angle_x2
  155. )
  156. matT[2, 2] = cos(angle_x1) * cos(angle_y) * cos(angle_x2) - sin(angle_x1) * sin(
  157. angle_x2
  158. )
  159. matT[2, 3] = optic_z
  160. matT[3, 0] = 0
  161. matT[3, 1] = 0
  162. matT[3, 2] = 0
  163. matT[3, 3] = 1
  164. matS = np.zeros((4, 4))
  165. matS[2, 3] = 0.5
  166. matS[3, 2] = 0.5
  167. self.ifu = 1 / fu
  168. self.ifv = 1 / fv
  169. self.matT = matT
  170. self.matS = matS
  171. self.K = np.dot(matT.T, matS)
  172. self.K = np.dot(self.K, matT)
  173. def vertical_text_process(self, points, org_size):
  174. """
  175. change sequence amd process
  176. :param points:
  177. :param org_size:
  178. :return:
  179. """
  180. org_w, org_h = org_size
  181. _points = np.array(points).reshape(-1).tolist()
  182. _points = np.array(_points[2:] + _points[:2]).reshape(-1, 2)
  183. # convert to horizontal points
  184. adjusted_points = np.zeros(_points.shape, dtype=np.float32)
  185. adjusted_points[:, 0] = _points[:, 1]
  186. adjusted_points[:, 1] = org_h - _points[:, 0] - 1
  187. _image_coord, _world_coord, _new_image_size = self.horizontal_text_process(
  188. adjusted_points
  189. )
  190. # # convert to vertical points back
  191. image_coord = _points.reshape(1, -1, 2)
  192. world_coord = np.zeros(_world_coord.shape, dtype=np.float32)
  193. world_coord[:, :, 0] = 0 - _world_coord[:, :, 1]
  194. world_coord[:, :, 1] = _world_coord[:, :, 0]
  195. world_coord[:, :, 2] = _world_coord[:, :, 2]
  196. new_image_size = (_new_image_size[1], _new_image_size[0])
  197. return image_coord, world_coord, new_image_size
  198. def horizontal_text_process(self, points):
  199. """
  200. get image coordinate and world coordinate
  201. :param points:
  202. :return:
  203. """
  204. poly = np.array(points).reshape(-1)
  205. dx_list = []
  206. dy_list = []
  207. for i in range(1, len(poly) // 2):
  208. xdx = poly[i * 2] - poly[(i - 1) * 2]
  209. xdy = poly[i * 2 + 1] - poly[(i - 1) * 2 + 1]
  210. d = sqrt(xdx**2 + xdy**2)
  211. dx_list.append(d)
  212. for i in range(0, len(poly) // 4):
  213. ydx = poly[i * 2] - poly[len(poly) - 1 - (i * 2 + 1)]
  214. ydy = poly[i * 2 + 1] - poly[len(poly) - 1 - (i * 2)]
  215. d = sqrt(ydx**2 + ydy**2)
  216. dy_list.append(d)
  217. dx_list = [
  218. (dx_list[i] + dx_list[len(dx_list) - 1 - i]) / 2
  219. for i in range(len(dx_list) // 2)
  220. ]
  221. height = np.around(np.mean(dy_list))
  222. rect_coord = [0, 0]
  223. for i in range(0, len(poly) // 4 - 1):
  224. x = rect_coord[-2]
  225. x += dx_list[i]
  226. y = 0
  227. rect_coord.append(x)
  228. rect_coord.append(y)
  229. rect_coord_half = copy.deepcopy(rect_coord)
  230. for i in range(0, len(poly) // 4):
  231. x = rect_coord_half[len(rect_coord_half) - 2 * i - 2]
  232. y = height
  233. rect_coord.append(x)
  234. rect_coord.append(y)
  235. np_rect_coord = np.array(rect_coord).reshape(-1, 2)
  236. x_min = np.min(np_rect_coord[:, 0])
  237. y_min = np.min(np_rect_coord[:, 1])
  238. x_max = np.max(np_rect_coord[:, 0])
  239. y_max = np.max(np_rect_coord[:, 1])
  240. new_image_size = (int(x_max - x_min + 0.5), int(y_max - y_min + 0.5))
  241. x_mean = (x_max - x_min) / 2
  242. y_mean = (y_max - y_min) / 2
  243. np_rect_coord[:, 0] -= x_mean
  244. np_rect_coord[:, 1] -= y_mean
  245. rect_coord = np_rect_coord.reshape(-1).tolist()
  246. rect_coord = np.array(rect_coord).reshape(-1, 2)
  247. world_coord = np.ones((len(rect_coord), 3)) * 0
  248. world_coord[:, :2] = rect_coord
  249. image_coord = np.array(poly).reshape(1, -1, 2)
  250. world_coord = world_coord.reshape(1, -1, 3)
  251. return image_coord, world_coord, new_image_size
  252. def horizontal_text_estimate(self, points):
  253. """
  254. horizontal or vertical text
  255. :param points:
  256. :return:
  257. """
  258. pts = np.array(points).reshape(-1, 2)
  259. x_min = int(np.min(pts[:, 0]))
  260. y_min = int(np.min(pts[:, 1]))
  261. x_max = int(np.max(pts[:, 0]))
  262. y_max = int(np.max(pts[:, 1]))
  263. x = x_max - x_min
  264. y = y_max - y_min
  265. is_horizontal_text = True
  266. if y / x > 1.5: # vertical text condition
  267. is_horizontal_text = False
  268. return is_horizontal_text
  269. def virtual_camera_to_world(self, size):
  270. ifu, ifv = self.ifu, self.ifv
  271. K, matT = self.K, self.matT
  272. ppu = size[0] / 2 + 1e-6
  273. ppv = size[1] / 2 + 1e-6
  274. P = np.zeros((size[1], size[0], 3))
  275. lu = np.array([i for i in range(size[0])])
  276. lv = np.array([i for i in range(size[1])])
  277. u, v = np.meshgrid(lu, lv)
  278. yp = (v - ppv) * ifv
  279. xp = (u - ppu) * ifu
  280. angle_a = arctan(sqrt(xp * xp + yp * yp))
  281. angle_b = arctan(yp / xp)
  282. D0 = sin(angle_a) * cos(angle_b)
  283. D1 = sin(angle_a) * sin(angle_b)
  284. D2 = cos(angle_a)
  285. D0[xp <= 0] = -D0[xp <= 0]
  286. D1[xp <= 0] = -D1[xp <= 0]
  287. ratio_a = (
  288. K[0, 0] * D0 * D0
  289. + K[1, 1] * D1 * D1
  290. + K[2, 2] * D2 * D2
  291. + (K[0, 1] + K[1, 0]) * D0 * D1
  292. + (K[0, 2] + K[2, 0]) * D0 * D2
  293. + (K[1, 2] + K[2, 1]) * D1 * D2
  294. )
  295. ratio_b = (
  296. (K[0, 3] + K[3, 0]) * D0
  297. + (K[1, 3] + K[3, 1]) * D1
  298. + (K[2, 3] + K[3, 2]) * D2
  299. )
  300. ratio_c = K[3, 3] * np.ones(ratio_b.shape)
  301. delta = ratio_b * ratio_b - 4 * ratio_a * ratio_c
  302. t = np.zeros(delta.shape)
  303. t[ratio_a == 0] = -ratio_c[ratio_a == 0] / ratio_b[ratio_a == 0]
  304. t[ratio_a != 0] = (-ratio_b[ratio_a != 0] + sqrt(delta[ratio_a != 0])) / (
  305. 2 * ratio_a[ratio_a != 0]
  306. )
  307. t[delta < 0] = 0
  308. P[:, :, 0] = matT[0, 3] + t * (
  309. matT[0, 0] * D0 + matT[0, 1] * D1 + matT[0, 2] * D2
  310. )
  311. P[:, :, 1] = matT[1, 3] + t * (
  312. matT[1, 0] * D0 + matT[1, 1] * D1 + matT[1, 2] * D2
  313. )
  314. P[:, :, 2] = matT[2, 3] + t * (
  315. matT[2, 0] * D0 + matT[2, 1] * D1 + matT[2, 2] * D2
  316. )
  317. return P
  318. def world_to_image(self, image_size, world, intrinsic, distCoeffs, rotation, tvec):
  319. r11 = rotation[0, 0]
  320. r12 = rotation[0, 1]
  321. r13 = rotation[0, 2]
  322. r21 = rotation[1, 0]
  323. r22 = rotation[1, 1]
  324. r23 = rotation[1, 2]
  325. r31 = rotation[2, 0]
  326. r32 = rotation[2, 1]
  327. r33 = rotation[2, 2]
  328. t1 = tvec[0]
  329. t2 = tvec[1]
  330. t3 = tvec[2]
  331. k1 = distCoeffs[0]
  332. k2 = distCoeffs[1]
  333. p1 = distCoeffs[2]
  334. p2 = distCoeffs[3]
  335. k3 = distCoeffs[4]
  336. k4 = distCoeffs[5]
  337. k5 = distCoeffs[6]
  338. k6 = distCoeffs[7]
  339. if len(distCoeffs) > 8:
  340. s1 = distCoeffs[8]
  341. s2 = distCoeffs[9]
  342. s3 = distCoeffs[10]
  343. s4 = distCoeffs[11]
  344. else:
  345. s1 = s2 = s3 = s4 = 0
  346. if len(distCoeffs) > 12:
  347. tx = distCoeffs[12]
  348. ty = distCoeffs[13]
  349. else:
  350. tx = ty = 0
  351. fu = intrinsic[0, 0]
  352. fv = intrinsic[1, 1]
  353. ppu = intrinsic[0, 2]
  354. ppv = intrinsic[1, 2]
  355. cos_tx = cos(tx)
  356. cos_ty = cos(ty)
  357. sin_tx = sin(tx)
  358. sin_ty = sin(ty)
  359. tao11 = cos_ty * cos_tx * cos_ty + sin_ty * cos_tx * sin_ty
  360. tao12 = cos_ty * cos_tx * sin_ty * sin_tx - sin_ty * cos_tx * cos_ty * sin_tx
  361. tao13 = -cos_ty * cos_tx * sin_ty * cos_tx + sin_ty * cos_tx * cos_ty * cos_tx
  362. tao21 = -sin_tx * sin_ty
  363. tao22 = cos_ty * cos_tx * cos_tx + sin_tx * cos_ty * sin_tx
  364. tao23 = cos_ty * cos_tx * sin_tx - sin_tx * cos_ty * cos_tx
  365. P = np.zeros((image_size[1], image_size[0], 2))
  366. c3 = r31 * world[:, :, 0] + r32 * world[:, :, 1] + r33 * world[:, :, 2] + t3
  367. c1 = r11 * world[:, :, 0] + r12 * world[:, :, 1] + r13 * world[:, :, 2] + t1
  368. c2 = r21 * world[:, :, 0] + r22 * world[:, :, 1] + r23 * world[:, :, 2] + t2
  369. x1 = c1 / c3
  370. y1 = c2 / c3
  371. x12 = x1 * x1
  372. y12 = y1 * y1
  373. x1y1 = 2 * x1 * y1
  374. r2 = x12 + y12
  375. r4 = r2 * r2
  376. r6 = r2 * r4
  377. radial_distortion = (1 + k1 * r2 + k2 * r4 + k3 * r6) / (
  378. 1 + k4 * r2 + k5 * r4 + k6 * r6
  379. )
  380. x2 = (
  381. x1 * radial_distortion + p1 * x1y1 + p2 * (r2 + 2 * x12) + s1 * r2 + s2 * r4
  382. )
  383. y2 = (
  384. y1 * radial_distortion + p2 * x1y1 + p1 * (r2 + 2 * y12) + s3 * r2 + s4 * r4
  385. )
  386. x3 = tao11 * x2 + tao12 * y2 + tao13
  387. y3 = tao21 * x2 + tao22 * y2 + tao23
  388. P[:, :, 0] = fu * x3 + ppu
  389. P[:, :, 1] = fv * y3 + ppv
  390. P[c3 <= 0] = 0
  391. return P
  392. def spatial_transform(
  393. self, image_data, new_image_size, mtx, dist, rvecs, tvecs, interpolation
  394. ):
  395. rotation, _ = cv2.Rodrigues(rvecs)
  396. world_map = self.virtual_camera_to_world(new_image_size)
  397. image_map = self.world_to_image(
  398. new_image_size, world_map, mtx, dist, rotation, tvecs
  399. )
  400. image_map = image_map.astype(np.float32)
  401. dst = cv2.remap(
  402. image_data, image_map[:, :, 0], image_map[:, :, 1], interpolation
  403. )
  404. return dst
  405. def calibrate(self, org_size, image_coord, world_coord):
  406. """
  407. calibration
  408. :param org_size:
  409. :param image_coord:
  410. :param world_coord:
  411. :return:
  412. """
  413. # flag = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL | cv2.CALIB_THIN_PRISM_MODEL
  414. flag = cv2.CALIB_RATIONAL_MODEL
  415. flag2 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL
  416. flag3 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_THIN_PRISM_MODEL
  417. flag4 = (
  418. cv2.CALIB_RATIONAL_MODEL
  419. | cv2.CALIB_ZERO_TANGENT_DIST
  420. | cv2.CALIB_FIX_ASPECT_RATIO
  421. )
  422. flag5 = (
  423. cv2.CALIB_RATIONAL_MODEL
  424. | cv2.CALIB_TILTED_MODEL
  425. | cv2.CALIB_ZERO_TANGENT_DIST
  426. )
  427. flag6 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_FIX_ASPECT_RATIO
  428. flag_list = [flag2, flag3, flag4, flag5, flag6]
  429. ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(
  430. world_coord.astype(np.float32),
  431. image_coord.astype(np.float32),
  432. org_size,
  433. None,
  434. None,
  435. flags=flag,
  436. )
  437. if ret > 2:
  438. # strategies
  439. min_ret = ret
  440. for i, flag in enumerate(flag_list):
  441. _ret, _mtx, _dist, _rvecs, _tvecs = cv2.calibrateCamera(
  442. world_coord.astype(np.float32),
  443. image_coord.astype(np.float32),
  444. org_size,
  445. None,
  446. None,
  447. flags=flag,
  448. )
  449. if _ret < min_ret:
  450. min_ret = _ret
  451. ret, mtx, dist, rvecs, tvecs = _ret, _mtx, _dist, _rvecs, _tvecs
  452. return ret, mtx, dist, rvecs, tvecs
  453. def dc_homo(
  454. self,
  455. img,
  456. img_points,
  457. obj_points,
  458. is_horizontal_text,
  459. interpolation=cv2.INTER_LINEAR,
  460. ratio_width=1.0,
  461. ratio_height=1.0,
  462. ):
  463. """
  464. divide and conquer: homography
  465. # ratio_width and ratio_height must be 1.0 here
  466. """
  467. _img_points = img_points.reshape(-1, 2)
  468. _obj_points = obj_points.reshape(-1, 3)
  469. homo_img_list = []
  470. width_list = []
  471. height_list = []
  472. # divide and conquer
  473. for i in range(len(_img_points) // 2 - 1):
  474. new_img_points = np.zeros((4, 2)).astype(np.float32)
  475. new_obj_points = np.zeros((4, 2)).astype(np.float32)
  476. new_img_points[0:2, :] = _img_points[i : (i + 2), :2]
  477. new_img_points[2:4, :] = _img_points[::-1, :][i : (i + 2), :2][::-1, :]
  478. new_obj_points[0:2, :] = _obj_points[i : (i + 2), :2]
  479. new_obj_points[2:4, :] = _obj_points[::-1, :][i : (i + 2), :2][::-1, :]
  480. if is_horizontal_text:
  481. world_width = np.abs(new_obj_points[1, 0] - new_obj_points[0, 0])
  482. world_height = np.abs(new_obj_points[3, 1] - new_obj_points[0, 1])
  483. else:
  484. world_width = np.abs(new_obj_points[1, 1] - new_obj_points[0, 1])
  485. world_height = np.abs(new_obj_points[3, 0] - new_obj_points[0, 0])
  486. homo_img = Homography(
  487. img,
  488. new_img_points,
  489. world_width,
  490. world_height,
  491. interpolation=interpolation,
  492. ratio_width=ratio_width,
  493. ratio_height=ratio_height,
  494. )
  495. homo_img_list.append(homo_img)
  496. _h, _w = homo_img.shape[:2]
  497. width_list.append(_w)
  498. height_list.append(_h)
  499. # stitching
  500. rectified_image = np.zeros((np.max(height_list), sum(width_list), 3)).astype(
  501. np.uint8
  502. )
  503. st = 0
  504. for homo_img, w, h in zip(homo_img_list, width_list, height_list):
  505. rectified_image[:h, st : st + w, :] = homo_img
  506. st += w
  507. if not is_horizontal_text:
  508. # vertical rotation
  509. rectified_image = np.rot90(rectified_image, 3)
  510. return rectified_image
  511. def Homography(
  512. self,
  513. image,
  514. img_points,
  515. world_width,
  516. world_height,
  517. interpolation=cv2.INTER_CUBIC,
  518. ratio_width=1.0,
  519. ratio_height=1.0,
  520. ):
  521. _points = np.array(img_points).reshape(-1, 2).astype(np.float32)
  522. expand_x = int(0.5 * world_width * (ratio_width - 1))
  523. expand_y = int(0.5 * world_height * (ratio_height - 1))
  524. pt_lefttop = [expand_x, expand_y]
  525. pt_righttop = [expand_x + world_width, expand_y]
  526. pt_leftbottom = [expand_x + world_width, expand_y + world_height]
  527. pt_rightbottom = [expand_x, expand_y + world_height]
  528. pts_std = np.float32([pt_lefttop, pt_righttop, pt_leftbottom, pt_rightbottom])
  529. img_crop_width = int(world_width * ratio_width)
  530. img_crop_height = int(world_height * ratio_height)
  531. M = cv2.getPerspectiveTransform(_points, pts_std)
  532. dst_img = cv2.warpPerspective(
  533. image,
  534. M,
  535. (img_crop_width, img_crop_height),
  536. borderMode=cv2.BORDER_CONSTANT, # BORDER_CONSTANT BORDER_REPLICATE
  537. flags=interpolation,
  538. )
  539. return dst_img
  540. def __call__(
  541. self,
  542. image_data,
  543. points,
  544. interpolation=cv2.INTER_LINEAR,
  545. ratio_width=1.0,
  546. ratio_height=1.0,
  547. mode="calibration",
  548. ):
  549. """
  550. spatial transform for a poly text
  551. :param image_data:
  552. :param points: [x1,y1,x2,y2,x3,y3,...], clockwise order, (x1,y1) must be the top-left of first char.
  553. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  554. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  555. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  556. :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
  557. :return:
  558. """
  559. org_h, org_w = image_data.shape[:2]
  560. org_size = (org_w, org_h)
  561. self.image = image_data
  562. is_horizontal_text = self.horizontal_text_estimate(points)
  563. if is_horizontal_text:
  564. image_coord, world_coord, new_image_size = self.horizontal_text_process(
  565. points
  566. )
  567. else:
  568. image_coord, world_coord, new_image_size = self.vertical_text_process(
  569. points, org_size
  570. )
  571. if mode.lower() == "calibration":
  572. ret, mtx, dist, rvecs, tvecs = self.calibrate(
  573. org_size, image_coord, world_coord
  574. )
  575. st_size = (
  576. int(new_image_size[0] * ratio_width),
  577. int(new_image_size[1] * ratio_height),
  578. )
  579. dst = self.spatial_transform(
  580. image_data, st_size, mtx, dist[0], rvecs[0], tvecs[0], interpolation
  581. )
  582. elif mode.lower() == "homography":
  583. # ratio_width and ratio_height must be 1.0 here and ret set to 0.01 without loss manually
  584. ret = 0.01
  585. dst = self.dc_homo(
  586. image_data,
  587. image_coord,
  588. world_coord,
  589. is_horizontal_text,
  590. interpolation=interpolation,
  591. ratio_width=1.0,
  592. ratio_height=1.0,
  593. )
  594. else:
  595. raise ValueError(
  596. 'mode must be ["calibration", "homography"], but got {}'.format(mode)
  597. )
  598. return dst, ret
  599. class AutoRectifier:
  600. def __init__(self):
  601. self.npoints = 10
  602. self.curveTextRectifier = CurveTextRectifier()
  603. @staticmethod
  604. def get_rotate_crop_image(
  605. img, points, interpolation=cv2.INTER_CUBIC, ratio_width=1.0, ratio_height=1.0
  606. ):
  607. """
  608. crop or homography
  609. :param img:
  610. :param points:
  611. :param interpolation:
  612. :param ratio_width:
  613. :param ratio_height:
  614. :return:
  615. """
  616. h, w = img.shape[:2]
  617. _points = np.array(points).reshape(-1, 2).astype(np.float32)
  618. if len(_points) != 4:
  619. x_min = int(np.min(_points[:, 0]))
  620. y_min = int(np.min(_points[:, 1]))
  621. x_max = int(np.max(_points[:, 0]))
  622. y_max = int(np.max(_points[:, 1]))
  623. dx = x_max - x_min
  624. dy = y_max - y_min
  625. expand_x = int(0.5 * dx * (ratio_width - 1))
  626. expand_y = int(0.5 * dy * (ratio_height - 1))
  627. x_min = np.clip(int(x_min - expand_x), 0, w - 1)
  628. y_min = np.clip(int(y_min - expand_y), 0, h - 1)
  629. x_max = np.clip(int(x_max + expand_x), 0, w - 1)
  630. y_max = np.clip(int(y_max + expand_y), 0, h - 1)
  631. dst_img = img[y_min:y_max, x_min:x_max, :].copy()
  632. else:
  633. img_crop_width = int(
  634. max(
  635. np.linalg.norm(_points[0] - _points[1]),
  636. np.linalg.norm(_points[2] - _points[3]),
  637. )
  638. )
  639. img_crop_height = int(
  640. max(
  641. np.linalg.norm(_points[0] - _points[3]),
  642. np.linalg.norm(_points[1] - _points[2]),
  643. )
  644. )
  645. dst_img = Homography(
  646. img,
  647. _points,
  648. img_crop_width,
  649. img_crop_height,
  650. interpolation,
  651. ratio_width,
  652. ratio_height,
  653. )
  654. return dst_img
  655. def visualize(self, image_data, points_list):
  656. visualization = image_data.copy()
  657. for box in points_list:
  658. box = np.array(box).reshape(-1, 2).astype(np.int32)
  659. cv2.drawContours(
  660. visualization, [np.array(box).reshape((-1, 1, 2))], -1, (0, 0, 255), 2
  661. )
  662. for i, p in enumerate(box):
  663. if i != 0:
  664. cv2.circle(
  665. visualization,
  666. tuple(p),
  667. radius=1,
  668. color=(255, 0, 0),
  669. thickness=2,
  670. )
  671. else:
  672. cv2.circle(
  673. visualization,
  674. tuple(p),
  675. radius=1,
  676. color=(255, 255, 0),
  677. thickness=2,
  678. )
  679. return visualization
  680. def __call__(
  681. self,
  682. image_data,
  683. points,
  684. interpolation=cv2.INTER_LINEAR,
  685. ratio_width=1.0,
  686. ratio_height=1.0,
  687. loss_thresh=5.0,
  688. mode="calibration",
  689. ):
  690. """
  691. rectification in strategies for a poly text
  692. :param image_data:
  693. :param points: [x1,y1,x2,y2,x3,y3,...], clockwise order, (x1,y1) must be the top-left of first char.
  694. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  695. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  696. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  697. :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
  698. :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
  699. :return:
  700. """
  701. _points = np.array(points).reshape(-1, 2)
  702. if len(_points) >= self.npoints and len(_points) % 2 == 0:
  703. try:
  704. curveTextRectifier = CurveTextRectifier()
  705. dst_img, loss = curveTextRectifier(
  706. image_data, points, interpolation, ratio_width, ratio_height, mode
  707. )
  708. if loss >= 2:
  709. # for robust
  710. # large loss means it cannot be reconstruct correctly, we must find other way to reconstruct
  711. img_list, loss_list = [dst_img], [loss]
  712. _dst_img, _loss = PlanB()(
  713. image_data,
  714. points,
  715. curveTextRectifier,
  716. interpolation,
  717. ratio_width,
  718. ratio_height,
  719. loss_thresh=loss_thresh,
  720. square=True,
  721. )
  722. img_list += [_dst_img]
  723. loss_list += [_loss]
  724. _dst_img, _loss = PlanB()(
  725. image_data,
  726. points,
  727. curveTextRectifier,
  728. interpolation,
  729. ratio_width,
  730. ratio_height,
  731. loss_thresh=loss_thresh,
  732. square=False,
  733. )
  734. img_list += [_dst_img]
  735. loss_list += [_loss]
  736. min_loss = min(loss_list)
  737. dst_img = img_list[loss_list.index(min_loss)]
  738. if min_loss >= loss_thresh:
  739. print(
  740. "calibration loss: {} is too large for spatial transformer. It is failed. Using get_rotate_crop_image".format(
  741. loss
  742. )
  743. )
  744. dst_img = self.get_rotate_crop_image(
  745. image_data, points, interpolation, ratio_width, ratio_height
  746. )
  747. print("here")
  748. except Exception as e:
  749. print(e)
  750. dst_img = self.get_rotate_crop_image(
  751. image_data, points, interpolation, ratio_width, ratio_height
  752. )
  753. else:
  754. dst_img = self.get_rotate_crop_image(
  755. image_data, _points, interpolation, ratio_width, ratio_height
  756. )
  757. return dst_img
  758. def run(
  759. self,
  760. image_data,
  761. points_list,
  762. interpolation=cv2.INTER_LINEAR,
  763. ratio_width=1.0,
  764. ratio_height=1.0,
  765. loss_thresh=5.0,
  766. mode="calibration",
  767. ):
  768. """
  769. run for texts in an image
  770. :param image_data: numpy.ndarray. The shape is [h, w, 3]
  771. :param points_list: [[x1,y1,x2,y2,x3,y3,...], [x1,y1,x2,y2,x3,y3,...], ...], clockwise order, (x1,y1) must be the top-left of first char.
  772. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  773. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  774. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  775. :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
  776. :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
  777. :return: res: roi-image list, visualized_image: draw polys in original image
  778. """
  779. if image_data is None:
  780. raise ValueError
  781. if not isinstance(points_list, list):
  782. raise ValueError
  783. for points in points_list:
  784. if not isinstance(points, list):
  785. raise ValueError
  786. if ratio_width < 1.0 or ratio_height < 1.0:
  787. raise ValueError(
  788. "ratio_width and ratio_height cannot be smaller than 1, but got {}",
  789. (ratio_width, ratio_height),
  790. )
  791. if mode.lower() != "calibration" and mode.lower() != "homography":
  792. raise ValueError(
  793. 'mode must be ["calibration", "homography"], but got {}'.format(mode)
  794. )
  795. if mode.lower() == "homography" and ratio_width != 1.0 and ratio_height != 1.0:
  796. raise ValueError(
  797. "ratio_width and ratio_height must be 1.0 when mode is homography, but got mode:{}, ratio:({},{})".format(
  798. mode, ratio_width, ratio_height
  799. )
  800. )
  801. res = []
  802. for points in points_list:
  803. rectified_img = self(
  804. image_data,
  805. points,
  806. interpolation,
  807. ratio_width,
  808. ratio_height,
  809. loss_thresh=loss_thresh,
  810. mode=mode,
  811. )
  812. res.append(rectified_img)
  813. # visualize
  814. visualized_image = self.visualize(image_data, points_list)
  815. return res, visualized_image