seal_det_warp.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os, sys
  15. import copy
  16. import time
  17. import numpy as np
  18. from numpy import cos, sin, arctan, sqrt
  19. import cv2
  20. from .....utils import logging
  21. #### [TODO] need sunting to add explanatory notes
  22. def Homography(
  23. image,
  24. img_points,
  25. world_width,
  26. world_height,
  27. interpolation=cv2.INTER_CUBIC,
  28. ratio_width=1.0,
  29. ratio_height=1.0,
  30. ):
  31. _points = np.array(img_points).reshape(-1, 2).astype(np.float32)
  32. expand_x = int(0.5 * world_width * (ratio_width - 1))
  33. expand_y = int(0.5 * world_height * (ratio_height - 1))
  34. pt_lefttop = [expand_x, expand_y]
  35. pt_righttop = [expand_x + world_width, expand_y]
  36. pt_leftbottom = [expand_x + world_width, expand_y + world_height]
  37. pt_rightbottom = [expand_x, expand_y + world_height]
  38. pts_std = np.float32([pt_lefttop, pt_righttop, pt_leftbottom, pt_rightbottom])
  39. img_crop_width = int(world_width * ratio_width)
  40. img_crop_height = int(world_height * ratio_height)
  41. M = cv2.getPerspectiveTransform(_points, pts_std)
  42. dst_img = cv2.warpPerspective(
  43. image,
  44. M,
  45. (img_crop_width, img_crop_height),
  46. borderMode=cv2.BORDER_CONSTANT, # BORDER_CONSTANT BORDER_REPLICATE
  47. flags=interpolation,
  48. )
  49. return dst_img
  50. class PlanB:
  51. def __call__(
  52. self,
  53. image,
  54. points,
  55. curveTextRectifier,
  56. interpolation=cv2.INTER_LINEAR,
  57. ratio_width=1.0,
  58. ratio_height=1.0,
  59. loss_thresh=5.0,
  60. square=False,
  61. ):
  62. """
  63. Plan B using sub-image when it failed in original image
  64. :param image:
  65. :param points:
  66. :param curveTextRectifier: CurveTextRectifier
  67. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  68. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  69. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  70. :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
  71. :param square: crop square image or not. True or False. The default is False
  72. :return:
  73. """
  74. h, w = image.shape[:2]
  75. _points = np.array(points).reshape(-1, 2).astype(np.float32)
  76. x_min = int(np.min(_points[:, 0]))
  77. y_min = int(np.min(_points[:, 1]))
  78. x_max = int(np.max(_points[:, 0]))
  79. y_max = int(np.max(_points[:, 1]))
  80. dx = x_max - x_min
  81. dy = y_max - y_min
  82. max_d = max(dx, dy)
  83. mean_pt = np.mean(_points, 0)
  84. expand_x = (ratio_width - 1.0) * 0.5 * max_d
  85. expand_y = (ratio_height - 1.0) * 0.5 * max_d
  86. if square:
  87. x_min = np.clip(int(mean_pt[0] - max_d - expand_x), 0, w - 1)
  88. y_min = np.clip(int(mean_pt[1] - max_d - expand_y), 0, h - 1)
  89. x_max = np.clip(int(mean_pt[0] + max_d + expand_x), 0, w - 1)
  90. y_max = np.clip(int(mean_pt[1] + max_d + expand_y), 0, h - 1)
  91. else:
  92. x_min = np.clip(int(x_min - expand_x), 0, w - 1)
  93. y_min = np.clip(int(y_min - expand_y), 0, h - 1)
  94. x_max = np.clip(int(x_max + expand_x), 0, w - 1)
  95. y_max = np.clip(int(y_max + expand_y), 0, h - 1)
  96. new_image = image[y_min:y_max, x_min:x_max, :].copy()
  97. new_points = _points.copy()
  98. new_points[:, 0] -= x_min
  99. new_points[:, 1] -= y_min
  100. dst_img, loss = curveTextRectifier(
  101. new_image,
  102. new_points,
  103. interpolation,
  104. ratio_width,
  105. ratio_height,
  106. mode="calibration",
  107. )
  108. return dst_img, loss
  109. class CurveTextRectifier:
  110. """
  111. spatial transformer via monocular vision
  112. """
  113. def __init__(self):
  114. self.get_virtual_camera_parameter()
  115. def get_virtual_camera_parameter(self):
  116. vcam_thz = 0
  117. vcam_thx1 = 180
  118. vcam_thy = 180
  119. vcam_thx2 = 0
  120. vcam_x = 0
  121. vcam_y = 0
  122. vcam_z = 100
  123. radian = np.pi / 180
  124. angle_z = radian * vcam_thz
  125. angle_x1 = radian * vcam_thx1
  126. angle_y = radian * vcam_thy
  127. angle_x2 = radian * vcam_thx2
  128. optic_x = vcam_x
  129. optic_y = vcam_y
  130. optic_z = vcam_z
  131. fu = 100
  132. fv = 100
  133. matT = np.zeros((4, 4))
  134. matT[0, 0] = cos(angle_z) * cos(angle_y) - sin(angle_z) * sin(angle_x1) * sin(
  135. angle_y
  136. )
  137. matT[0, 1] = cos(angle_z) * sin(angle_y) * sin(angle_x2) - sin(angle_z) * (
  138. cos(angle_x1) * cos(angle_x2) - sin(angle_x1) * cos(angle_y) * sin(angle_x2)
  139. )
  140. matT[0, 2] = cos(angle_z) * sin(angle_y) * cos(angle_x2) + sin(angle_z) * (
  141. cos(angle_x1) * sin(angle_x2) + sin(angle_x1) * cos(angle_y) * cos(angle_x2)
  142. )
  143. matT[0, 3] = optic_x
  144. matT[1, 0] = sin(angle_z) * cos(angle_y) + cos(angle_z) * sin(angle_x1) * sin(
  145. angle_y
  146. )
  147. matT[1, 1] = sin(angle_z) * sin(angle_y) * sin(angle_x2) + cos(angle_z) * (
  148. cos(angle_x1) * cos(angle_x2) - sin(angle_x1) * cos(angle_y) * sin(angle_x2)
  149. )
  150. matT[1, 2] = sin(angle_z) * sin(angle_y) * cos(angle_x2) - cos(angle_z) * (
  151. cos(angle_x1) * sin(angle_x2) + sin(angle_x1) * cos(angle_y) * cos(angle_x2)
  152. )
  153. matT[1, 3] = optic_y
  154. matT[2, 0] = -cos(angle_x1) * sin(angle_y)
  155. matT[2, 1] = cos(angle_x1) * cos(angle_y) * sin(angle_x2) + sin(angle_x1) * cos(
  156. angle_x2
  157. )
  158. matT[2, 2] = cos(angle_x1) * cos(angle_y) * cos(angle_x2) - sin(angle_x1) * sin(
  159. angle_x2
  160. )
  161. matT[2, 3] = optic_z
  162. matT[3, 0] = 0
  163. matT[3, 1] = 0
  164. matT[3, 2] = 0
  165. matT[3, 3] = 1
  166. matS = np.zeros((4, 4))
  167. matS[2, 3] = 0.5
  168. matS[3, 2] = 0.5
  169. self.ifu = 1 / fu
  170. self.ifv = 1 / fv
  171. self.matT = matT
  172. self.matS = matS
  173. self.K = np.dot(matT.T, matS)
  174. self.K = np.dot(self.K, matT)
  175. def vertical_text_process(self, points, org_size):
  176. """
  177. change sequence amd process
  178. :param points:
  179. :param org_size:
  180. :return:
  181. """
  182. org_w, org_h = org_size
  183. _points = np.array(points).reshape(-1).tolist()
  184. _points = np.array(_points[2:] + _points[:2]).reshape(-1, 2)
  185. # convert to horizontal points
  186. adjusted_points = np.zeros(_points.shape, dtype=np.float32)
  187. adjusted_points[:, 0] = _points[:, 1]
  188. adjusted_points[:, 1] = org_h - _points[:, 0] - 1
  189. _image_coord, _world_coord, _new_image_size = self.horizontal_text_process(
  190. adjusted_points
  191. )
  192. # # convert to vertical points back
  193. image_coord = _points.reshape(1, -1, 2)
  194. world_coord = np.zeros(_world_coord.shape, dtype=np.float32)
  195. world_coord[:, :, 0] = 0 - _world_coord[:, :, 1]
  196. world_coord[:, :, 1] = _world_coord[:, :, 0]
  197. world_coord[:, :, 2] = _world_coord[:, :, 2]
  198. new_image_size = (_new_image_size[1], _new_image_size[0])
  199. return image_coord, world_coord, new_image_size
  200. def horizontal_text_process(self, points):
  201. """
  202. get image coordinate and world coordinate
  203. :param points:
  204. :return:
  205. """
  206. poly = np.array(points).reshape(-1)
  207. dx_list = []
  208. dy_list = []
  209. for i in range(1, len(poly) // 2):
  210. xdx = poly[i * 2] - poly[(i - 1) * 2]
  211. xdy = poly[i * 2 + 1] - poly[(i - 1) * 2 + 1]
  212. d = sqrt(xdx**2 + xdy**2)
  213. dx_list.append(d)
  214. for i in range(0, len(poly) // 4):
  215. ydx = poly[i * 2] - poly[len(poly) - 1 - (i * 2 + 1)]
  216. ydy = poly[i * 2 + 1] - poly[len(poly) - 1 - (i * 2)]
  217. d = sqrt(ydx**2 + ydy**2)
  218. dy_list.append(d)
  219. dx_list = [
  220. (dx_list[i] + dx_list[len(dx_list) - 1 - i]) / 2
  221. for i in range(len(dx_list) // 2)
  222. ]
  223. height = np.around(np.mean(dy_list))
  224. rect_coord = [0, 0]
  225. for i in range(0, len(poly) // 4 - 1):
  226. x = rect_coord[-2]
  227. x += dx_list[i]
  228. y = 0
  229. rect_coord.append(x)
  230. rect_coord.append(y)
  231. rect_coord_half = copy.deepcopy(rect_coord)
  232. for i in range(0, len(poly) // 4):
  233. x = rect_coord_half[len(rect_coord_half) - 2 * i - 2]
  234. y = height
  235. rect_coord.append(x)
  236. rect_coord.append(y)
  237. np_rect_coord = np.array(rect_coord).reshape(-1, 2)
  238. x_min = np.min(np_rect_coord[:, 0])
  239. y_min = np.min(np_rect_coord[:, 1])
  240. x_max = np.max(np_rect_coord[:, 0])
  241. y_max = np.max(np_rect_coord[:, 1])
  242. new_image_size = (int(x_max - x_min + 0.5), int(y_max - y_min + 0.5))
  243. x_mean = (x_max - x_min) / 2
  244. y_mean = (y_max - y_min) / 2
  245. np_rect_coord[:, 0] -= x_mean
  246. np_rect_coord[:, 1] -= y_mean
  247. rect_coord = np_rect_coord.reshape(-1).tolist()
  248. rect_coord = np.array(rect_coord).reshape(-1, 2)
  249. world_coord = np.ones((len(rect_coord), 3)) * 0
  250. world_coord[:, :2] = rect_coord
  251. image_coord = np.array(poly).reshape(1, -1, 2)
  252. world_coord = world_coord.reshape(1, -1, 3)
  253. return image_coord, world_coord, new_image_size
  254. def horizontal_text_estimate(self, points):
  255. """
  256. horizontal or vertical text
  257. :param points:
  258. :return:
  259. """
  260. pts = np.array(points).reshape(-1, 2)
  261. x_min = int(np.min(pts[:, 0]))
  262. y_min = int(np.min(pts[:, 1]))
  263. x_max = int(np.max(pts[:, 0]))
  264. y_max = int(np.max(pts[:, 1]))
  265. x = x_max - x_min
  266. y = y_max - y_min
  267. is_horizontal_text = True
  268. if y / x > 1.5: # vertical text condition
  269. is_horizontal_text = False
  270. return is_horizontal_text
  271. def virtual_camera_to_world(self, size):
  272. ifu, ifv = self.ifu, self.ifv
  273. K, matT = self.K, self.matT
  274. ppu = size[0] / 2 + 1e-6
  275. ppv = size[1] / 2 + 1e-6
  276. P = np.zeros((size[1], size[0], 3))
  277. lu = np.array([i for i in range(size[0])])
  278. lv = np.array([i for i in range(size[1])])
  279. u, v = np.meshgrid(lu, lv)
  280. yp = (v - ppv) * ifv
  281. xp = (u - ppu) * ifu
  282. angle_a = arctan(sqrt(xp * xp + yp * yp))
  283. angle_b = arctan(yp / xp)
  284. D0 = sin(angle_a) * cos(angle_b)
  285. D1 = sin(angle_a) * sin(angle_b)
  286. D2 = cos(angle_a)
  287. D0[xp <= 0] = -D0[xp <= 0]
  288. D1[xp <= 0] = -D1[xp <= 0]
  289. ratio_a = (
  290. K[0, 0] * D0 * D0
  291. + K[1, 1] * D1 * D1
  292. + K[2, 2] * D2 * D2
  293. + (K[0, 1] + K[1, 0]) * D0 * D1
  294. + (K[0, 2] + K[2, 0]) * D0 * D2
  295. + (K[1, 2] + K[2, 1]) * D1 * D2
  296. )
  297. ratio_b = (
  298. (K[0, 3] + K[3, 0]) * D0
  299. + (K[1, 3] + K[3, 1]) * D1
  300. + (K[2, 3] + K[3, 2]) * D2
  301. )
  302. ratio_c = K[3, 3] * np.ones(ratio_b.shape)
  303. delta = ratio_b * ratio_b - 4 * ratio_a * ratio_c
  304. t = np.zeros(delta.shape)
  305. t[ratio_a == 0] = -ratio_c[ratio_a == 0] / ratio_b[ratio_a == 0]
  306. t[ratio_a != 0] = (-ratio_b[ratio_a != 0] + sqrt(delta[ratio_a != 0])) / (
  307. 2 * ratio_a[ratio_a != 0]
  308. )
  309. t[delta < 0] = 0
  310. P[:, :, 0] = matT[0, 3] + t * (
  311. matT[0, 0] * D0 + matT[0, 1] * D1 + matT[0, 2] * D2
  312. )
  313. P[:, :, 1] = matT[1, 3] + t * (
  314. matT[1, 0] * D0 + matT[1, 1] * D1 + matT[1, 2] * D2
  315. )
  316. P[:, :, 2] = matT[2, 3] + t * (
  317. matT[2, 0] * D0 + matT[2, 1] * D1 + matT[2, 2] * D2
  318. )
  319. return P
  320. def world_to_image(self, image_size, world, intrinsic, distCoeffs, rotation, tvec):
  321. r11 = rotation[0, 0]
  322. r12 = rotation[0, 1]
  323. r13 = rotation[0, 2]
  324. r21 = rotation[1, 0]
  325. r22 = rotation[1, 1]
  326. r23 = rotation[1, 2]
  327. r31 = rotation[2, 0]
  328. r32 = rotation[2, 1]
  329. r33 = rotation[2, 2]
  330. t1 = tvec[0]
  331. t2 = tvec[1]
  332. t3 = tvec[2]
  333. k1 = distCoeffs[0]
  334. k2 = distCoeffs[1]
  335. p1 = distCoeffs[2]
  336. p2 = distCoeffs[3]
  337. k3 = distCoeffs[4]
  338. k4 = distCoeffs[5]
  339. k5 = distCoeffs[6]
  340. k6 = distCoeffs[7]
  341. if len(distCoeffs) > 8:
  342. s1 = distCoeffs[8]
  343. s2 = distCoeffs[9]
  344. s3 = distCoeffs[10]
  345. s4 = distCoeffs[11]
  346. else:
  347. s1 = s2 = s3 = s4 = 0
  348. if len(distCoeffs) > 12:
  349. tx = distCoeffs[12]
  350. ty = distCoeffs[13]
  351. else:
  352. tx = ty = 0
  353. fu = intrinsic[0, 0]
  354. fv = intrinsic[1, 1]
  355. ppu = intrinsic[0, 2]
  356. ppv = intrinsic[1, 2]
  357. cos_tx = cos(tx)
  358. cos_ty = cos(ty)
  359. sin_tx = sin(tx)
  360. sin_ty = sin(ty)
  361. tao11 = cos_ty * cos_tx * cos_ty + sin_ty * cos_tx * sin_ty
  362. tao12 = cos_ty * cos_tx * sin_ty * sin_tx - sin_ty * cos_tx * cos_ty * sin_tx
  363. tao13 = -cos_ty * cos_tx * sin_ty * cos_tx + sin_ty * cos_tx * cos_ty * cos_tx
  364. tao21 = -sin_tx * sin_ty
  365. tao22 = cos_ty * cos_tx * cos_tx + sin_tx * cos_ty * sin_tx
  366. tao23 = cos_ty * cos_tx * sin_tx - sin_tx * cos_ty * cos_tx
  367. P = np.zeros((image_size[1], image_size[0], 2))
  368. c3 = r31 * world[:, :, 0] + r32 * world[:, :, 1] + r33 * world[:, :, 2] + t3
  369. c1 = r11 * world[:, :, 0] + r12 * world[:, :, 1] + r13 * world[:, :, 2] + t1
  370. c2 = r21 * world[:, :, 0] + r22 * world[:, :, 1] + r23 * world[:, :, 2] + t2
  371. x1 = c1 / c3
  372. y1 = c2 / c3
  373. x12 = x1 * x1
  374. y12 = y1 * y1
  375. x1y1 = 2 * x1 * y1
  376. r2 = x12 + y12
  377. r4 = r2 * r2
  378. r6 = r2 * r4
  379. radial_distortion = (1 + k1 * r2 + k2 * r4 + k3 * r6) / (
  380. 1 + k4 * r2 + k5 * r4 + k6 * r6
  381. )
  382. x2 = (
  383. x1 * radial_distortion + p1 * x1y1 + p2 * (r2 + 2 * x12) + s1 * r2 + s2 * r4
  384. )
  385. y2 = (
  386. y1 * radial_distortion + p2 * x1y1 + p1 * (r2 + 2 * y12) + s3 * r2 + s4 * r4
  387. )
  388. x3 = tao11 * x2 + tao12 * y2 + tao13
  389. y3 = tao21 * x2 + tao22 * y2 + tao23
  390. P[:, :, 0] = fu * x3 + ppu
  391. P[:, :, 1] = fv * y3 + ppv
  392. P[c3 <= 0] = 0
  393. return P
  394. def spatial_transform(
  395. self, image_data, new_image_size, mtx, dist, rvecs, tvecs, interpolation
  396. ):
  397. rotation, _ = cv2.Rodrigues(rvecs)
  398. world_map = self.virtual_camera_to_world(new_image_size)
  399. image_map = self.world_to_image(
  400. new_image_size, world_map, mtx, dist, rotation, tvecs
  401. )
  402. image_map = image_map.astype(np.float32)
  403. dst = cv2.remap(
  404. image_data, image_map[:, :, 0], image_map[:, :, 1], interpolation
  405. )
  406. return dst
  407. def calibrate(self, org_size, image_coord, world_coord):
  408. """
  409. calibration
  410. :param org_size:
  411. :param image_coord:
  412. :param world_coord:
  413. :return:
  414. """
  415. # flag = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL | cv2.CALIB_THIN_PRISM_MODEL
  416. flag = cv2.CALIB_RATIONAL_MODEL
  417. flag2 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL
  418. flag3 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_THIN_PRISM_MODEL
  419. flag4 = (
  420. cv2.CALIB_RATIONAL_MODEL
  421. | cv2.CALIB_ZERO_TANGENT_DIST
  422. | cv2.CALIB_FIX_ASPECT_RATIO
  423. )
  424. flag5 = (
  425. cv2.CALIB_RATIONAL_MODEL
  426. | cv2.CALIB_TILTED_MODEL
  427. | cv2.CALIB_ZERO_TANGENT_DIST
  428. )
  429. flag6 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_FIX_ASPECT_RATIO
  430. flag_list = [flag2, flag3, flag4, flag5, flag6]
  431. ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(
  432. world_coord.astype(np.float32),
  433. image_coord.astype(np.float32),
  434. org_size,
  435. None,
  436. None,
  437. flags=flag,
  438. )
  439. if ret > 2:
  440. # strategies
  441. min_ret = ret
  442. for i, flag in enumerate(flag_list):
  443. _ret, _mtx, _dist, _rvecs, _tvecs = cv2.calibrateCamera(
  444. world_coord.astype(np.float32),
  445. image_coord.astype(np.float32),
  446. org_size,
  447. None,
  448. None,
  449. flags=flag,
  450. )
  451. if _ret < min_ret:
  452. min_ret = _ret
  453. ret, mtx, dist, rvecs, tvecs = _ret, _mtx, _dist, _rvecs, _tvecs
  454. return ret, mtx, dist, rvecs, tvecs
  455. def dc_homo(
  456. self,
  457. img,
  458. img_points,
  459. obj_points,
  460. is_horizontal_text,
  461. interpolation=cv2.INTER_LINEAR,
  462. ratio_width=1.0,
  463. ratio_height=1.0,
  464. ):
  465. """
  466. divide and conquer: homography
  467. # ratio_width and ratio_height must be 1.0 here
  468. """
  469. _img_points = img_points.reshape(-1, 2)
  470. _obj_points = obj_points.reshape(-1, 3)
  471. homo_img_list = []
  472. width_list = []
  473. height_list = []
  474. # divide and conquer
  475. for i in range(len(_img_points) // 2 - 1):
  476. new_img_points = np.zeros((4, 2)).astype(np.float32)
  477. new_obj_points = np.zeros((4, 2)).astype(np.float32)
  478. new_img_points[0:2, :] = _img_points[i : (i + 2), :2]
  479. new_img_points[2:4, :] = _img_points[::-1, :][i : (i + 2), :2][::-1, :]
  480. new_obj_points[0:2, :] = _obj_points[i : (i + 2), :2]
  481. new_obj_points[2:4, :] = _obj_points[::-1, :][i : (i + 2), :2][::-1, :]
  482. if is_horizontal_text:
  483. world_width = np.abs(new_obj_points[1, 0] - new_obj_points[0, 0])
  484. world_height = np.abs(new_obj_points[3, 1] - new_obj_points[0, 1])
  485. else:
  486. world_width = np.abs(new_obj_points[1, 1] - new_obj_points[0, 1])
  487. world_height = np.abs(new_obj_points[3, 0] - new_obj_points[0, 0])
  488. homo_img = Homography(
  489. img,
  490. new_img_points,
  491. world_width,
  492. world_height,
  493. interpolation=interpolation,
  494. ratio_width=ratio_width,
  495. ratio_height=ratio_height,
  496. )
  497. homo_img_list.append(homo_img)
  498. _h, _w = homo_img.shape[:2]
  499. width_list.append(_w)
  500. height_list.append(_h)
  501. # stitching
  502. rectified_image = np.zeros((np.max(height_list), sum(width_list), 3)).astype(
  503. np.uint8
  504. )
  505. st = 0
  506. for homo_img, w, h in zip(homo_img_list, width_list, height_list):
  507. rectified_image[:h, st : st + w, :] = homo_img
  508. st += w
  509. if not is_horizontal_text:
  510. # vertical rotation
  511. rectified_image = np.rot90(rectified_image, 3)
  512. return rectified_image
  513. def Homography(
  514. self,
  515. image,
  516. img_points,
  517. world_width,
  518. world_height,
  519. interpolation=cv2.INTER_CUBIC,
  520. ratio_width=1.0,
  521. ratio_height=1.0,
  522. ):
  523. _points = np.array(img_points).reshape(-1, 2).astype(np.float32)
  524. expand_x = int(0.5 * world_width * (ratio_width - 1))
  525. expand_y = int(0.5 * world_height * (ratio_height - 1))
  526. pt_lefttop = [expand_x, expand_y]
  527. pt_righttop = [expand_x + world_width, expand_y]
  528. pt_leftbottom = [expand_x + world_width, expand_y + world_height]
  529. pt_rightbottom = [expand_x, expand_y + world_height]
  530. pts_std = np.float32([pt_lefttop, pt_righttop, pt_leftbottom, pt_rightbottom])
  531. img_crop_width = int(world_width * ratio_width)
  532. img_crop_height = int(world_height * ratio_height)
  533. M = cv2.getPerspectiveTransform(_points, pts_std)
  534. dst_img = cv2.warpPerspective(
  535. image,
  536. M,
  537. (img_crop_width, img_crop_height),
  538. borderMode=cv2.BORDER_CONSTANT, # BORDER_CONSTANT BORDER_REPLICATE
  539. flags=interpolation,
  540. )
  541. return dst_img
  542. def __call__(
  543. self,
  544. image_data,
  545. points,
  546. interpolation=cv2.INTER_LINEAR,
  547. ratio_width=1.0,
  548. ratio_height=1.0,
  549. mode="calibration",
  550. ):
  551. """
  552. spatial transform for a poly text
  553. :param image_data:
  554. :param points: [x1,y1,x2,y2,x3,y3,...], clockwise order, (x1,y1) must be the top-left of first char.
  555. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  556. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  557. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  558. :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
  559. :return:
  560. """
  561. org_h, org_w = image_data.shape[:2]
  562. org_size = (org_w, org_h)
  563. self.image = image_data
  564. is_horizontal_text = self.horizontal_text_estimate(points)
  565. if is_horizontal_text:
  566. image_coord, world_coord, new_image_size = self.horizontal_text_process(
  567. points
  568. )
  569. else:
  570. image_coord, world_coord, new_image_size = self.vertical_text_process(
  571. points, org_size
  572. )
  573. if mode.lower() == "calibration":
  574. ret, mtx, dist, rvecs, tvecs = self.calibrate(
  575. org_size, image_coord, world_coord
  576. )
  577. st_size = (
  578. int(new_image_size[0] * ratio_width),
  579. int(new_image_size[1] * ratio_height),
  580. )
  581. dst = self.spatial_transform(
  582. image_data, st_size, mtx, dist[0], rvecs[0], tvecs[0], interpolation
  583. )
  584. elif mode.lower() == "homography":
  585. # ratio_width and ratio_height must be 1.0 here and ret set to 0.01 without loss manually
  586. ret = 0.01
  587. dst = self.dc_homo(
  588. image_data,
  589. image_coord,
  590. world_coord,
  591. is_horizontal_text,
  592. interpolation=interpolation,
  593. ratio_width=1.0,
  594. ratio_height=1.0,
  595. )
  596. else:
  597. raise ValueError(
  598. 'mode must be ["calibration", "homography"], but got {}'.format(mode)
  599. )
  600. return dst, ret
  601. class AutoRectifier:
  602. def __init__(self):
  603. self.npoints = 10
  604. self.curveTextRectifier = CurveTextRectifier()
  605. @staticmethod
  606. def get_rotate_crop_image(
  607. img, points, interpolation=cv2.INTER_CUBIC, ratio_width=1.0, ratio_height=1.0
  608. ):
  609. """
  610. crop or homography
  611. :param img:
  612. :param points:
  613. :param interpolation:
  614. :param ratio_width:
  615. :param ratio_height:
  616. :return:
  617. """
  618. h, w = img.shape[:2]
  619. _points = np.array(points).reshape(-1, 2).astype(np.float32)
  620. if len(_points) != 4:
  621. x_min = int(np.min(_points[:, 0]))
  622. y_min = int(np.min(_points[:, 1]))
  623. x_max = int(np.max(_points[:, 0]))
  624. y_max = int(np.max(_points[:, 1]))
  625. dx = x_max - x_min
  626. dy = y_max - y_min
  627. expand_x = int(0.5 * dx * (ratio_width - 1))
  628. expand_y = int(0.5 * dy * (ratio_height - 1))
  629. x_min = np.clip(int(x_min - expand_x), 0, w - 1)
  630. y_min = np.clip(int(y_min - expand_y), 0, h - 1)
  631. x_max = np.clip(int(x_max + expand_x), 0, w - 1)
  632. y_max = np.clip(int(y_max + expand_y), 0, h - 1)
  633. dst_img = img[y_min:y_max, x_min:x_max, :].copy()
  634. else:
  635. img_crop_width = int(
  636. max(
  637. np.linalg.norm(_points[0] - _points[1]),
  638. np.linalg.norm(_points[2] - _points[3]),
  639. )
  640. )
  641. img_crop_height = int(
  642. max(
  643. np.linalg.norm(_points[0] - _points[3]),
  644. np.linalg.norm(_points[1] - _points[2]),
  645. )
  646. )
  647. dst_img = Homography(
  648. img,
  649. _points,
  650. img_crop_width,
  651. img_crop_height,
  652. interpolation,
  653. ratio_width,
  654. ratio_height,
  655. )
  656. return dst_img
  657. def visualize(self, image_data, points_list):
  658. visualization = image_data.copy()
  659. for box in points_list:
  660. box = np.array(box).reshape(-1, 2).astype(np.int32)
  661. cv2.drawContours(
  662. visualization, [np.array(box).reshape((-1, 1, 2))], -1, (0, 0, 255), 2
  663. )
  664. for i, p in enumerate(box):
  665. if i != 0:
  666. cv2.circle(
  667. visualization,
  668. tuple(p),
  669. radius=1,
  670. color=(255, 0, 0),
  671. thickness=2,
  672. )
  673. else:
  674. cv2.circle(
  675. visualization,
  676. tuple(p),
  677. radius=1,
  678. color=(255, 255, 0),
  679. thickness=2,
  680. )
  681. return visualization
  682. def __call__(
  683. self,
  684. image_data,
  685. points,
  686. interpolation=cv2.INTER_LINEAR,
  687. ratio_width=1.0,
  688. ratio_height=1.0,
  689. loss_thresh=5.0,
  690. mode="calibration",
  691. ):
  692. """
  693. rectification in strategies for a poly text
  694. :param image_data:
  695. :param points: [x1,y1,x2,y2,x3,y3,...], clockwise order, (x1,y1) must be the top-left of first char.
  696. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  697. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  698. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  699. :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
  700. :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
  701. :return:
  702. """
  703. _points = np.array(points).reshape(-1, 2)
  704. if len(_points) >= self.npoints and len(_points) % 2 == 0:
  705. try:
  706. curveTextRectifier = CurveTextRectifier()
  707. dst_img, loss = curveTextRectifier(
  708. image_data, points, interpolation, ratio_width, ratio_height, mode
  709. )
  710. if loss >= 2:
  711. # for robust
  712. # large loss means it cannot be reconstruct correctly, we must find other way to reconstruct
  713. img_list, loss_list = [dst_img], [loss]
  714. _dst_img, _loss = PlanB()(
  715. image_data,
  716. points,
  717. curveTextRectifier,
  718. interpolation,
  719. ratio_width,
  720. ratio_height,
  721. loss_thresh=loss_thresh,
  722. square=True,
  723. )
  724. img_list += [_dst_img]
  725. loss_list += [_loss]
  726. _dst_img, _loss = PlanB()(
  727. image_data,
  728. points,
  729. curveTextRectifier,
  730. interpolation,
  731. ratio_width,
  732. ratio_height,
  733. loss_thresh=loss_thresh,
  734. square=False,
  735. )
  736. img_list += [_dst_img]
  737. loss_list += [_loss]
  738. min_loss = min(loss_list)
  739. dst_img = img_list[loss_list.index(min_loss)]
  740. if min_loss >= loss_thresh:
  741. logging.warning(
  742. "calibration loss: {} is too large for spatial transformer. It is failed. Using get_rotate_crop_image".format(
  743. loss
  744. )
  745. )
  746. dst_img = self.get_rotate_crop_image(
  747. image_data, points, interpolation, ratio_width, ratio_height
  748. )
  749. except Exception as e:
  750. logging.warning(f"Exception caught: {e}")
  751. dst_img = self.get_rotate_crop_image(
  752. image_data, points, interpolation, ratio_width, ratio_height
  753. )
  754. else:
  755. dst_img = self.get_rotate_crop_image(
  756. image_data, _points, interpolation, ratio_width, ratio_height
  757. )
  758. return dst_img
  759. def run(
  760. self,
  761. image_data,
  762. points_list,
  763. interpolation=cv2.INTER_LINEAR,
  764. ratio_width=1.0,
  765. ratio_height=1.0,
  766. loss_thresh=5.0,
  767. mode="calibration",
  768. ):
  769. """
  770. run for texts in an image
  771. :param image_data: numpy.ndarray. The shape is [h, w, 3]
  772. :param points_list: [[x1,y1,x2,y2,x3,y3,...], [x1,y1,x2,y2,x3,y3,...], ...], clockwise order, (x1,y1) must be the top-left of first char.
  773. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  774. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  775. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  776. :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
  777. :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
  778. :return: res: roi-image list, visualized_image: draw polys in original image
  779. """
  780. if image_data is None:
  781. raise ValueError
  782. if not isinstance(points_list, list):
  783. raise ValueError
  784. for points in points_list:
  785. if not isinstance(points, list):
  786. raise ValueError
  787. if ratio_width < 1.0 or ratio_height < 1.0:
  788. raise ValueError(
  789. "ratio_width and ratio_height cannot be smaller than 1, but got {}",
  790. (ratio_width, ratio_height),
  791. )
  792. if mode.lower() != "calibration" and mode.lower() != "homography":
  793. raise ValueError(
  794. 'mode must be ["calibration", "homography"], but got {}'.format(mode)
  795. )
  796. if mode.lower() == "homography" and ratio_width != 1.0 and ratio_height != 1.0:
  797. raise ValueError(
  798. "ratio_width and ratio_height must be 1.0 when mode is homography, but got mode:{}, ratio:({},{})".format(
  799. mode, ratio_width, ratio_height
  800. )
  801. )
  802. res = []
  803. for points in points_list:
  804. rectified_img = self(
  805. image_data,
  806. points,
  807. interpolation,
  808. ratio_width,
  809. ratio_height,
  810. loss_thresh=loss_thresh,
  811. mode=mode,
  812. )
  813. res.append(rectified_img)
  814. # visualize
  815. visualized_image = self.visualize(image_data, points_list)
  816. return res, visualized_image