seal_det_warp.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os, sys
  15. import numpy as np
  16. from numpy import cos, sin, arctan, sqrt
  17. import cv2
  18. import copy
  19. import time
  20. from .....utils import logging
  21. def Homography(
  22. image,
  23. img_points,
  24. world_width,
  25. world_height,
  26. interpolation=cv2.INTER_CUBIC,
  27. ratio_width=1.0,
  28. ratio_height=1.0,
  29. ):
  30. _points = np.array(img_points).reshape(-1, 2).astype(np.float32)
  31. expand_x = int(0.5 * world_width * (ratio_width - 1))
  32. expand_y = int(0.5 * world_height * (ratio_height - 1))
  33. pt_lefttop = [expand_x, expand_y]
  34. pt_righttop = [expand_x + world_width, expand_y]
  35. pt_leftbottom = [expand_x + world_width, expand_y + world_height]
  36. pt_rightbottom = [expand_x, expand_y + world_height]
  37. pts_std = np.float32([pt_lefttop, pt_righttop, pt_leftbottom, pt_rightbottom])
  38. img_crop_width = int(world_width * ratio_width)
  39. img_crop_height = int(world_height * ratio_height)
  40. M = cv2.getPerspectiveTransform(_points, pts_std)
  41. dst_img = cv2.warpPerspective(
  42. image,
  43. M,
  44. (img_crop_width, img_crop_height),
  45. borderMode=cv2.BORDER_CONSTANT, # BORDER_CONSTANT BORDER_REPLICATE
  46. flags=interpolation,
  47. )
  48. return dst_img
  49. class PlanB:
  50. def __call__(
  51. self,
  52. image,
  53. points,
  54. curveTextRectifier,
  55. interpolation=cv2.INTER_LINEAR,
  56. ratio_width=1.0,
  57. ratio_height=1.0,
  58. loss_thresh=5.0,
  59. square=False,
  60. ):
  61. """
  62. Plan B using sub-image when it failed in original image
  63. :param image:
  64. :param points:
  65. :param curveTextRectifier: CurveTextRectifier
  66. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  67. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  68. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  69. :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
  70. :param square: crop square image or not. True or False. The default is False
  71. :return:
  72. """
  73. h, w = image.shape[:2]
  74. _points = np.array(points).reshape(-1, 2).astype(np.float32)
  75. x_min = int(np.min(_points[:, 0]))
  76. y_min = int(np.min(_points[:, 1]))
  77. x_max = int(np.max(_points[:, 0]))
  78. y_max = int(np.max(_points[:, 1]))
  79. dx = x_max - x_min
  80. dy = y_max - y_min
  81. max_d = max(dx, dy)
  82. mean_pt = np.mean(_points, 0)
  83. expand_x = (ratio_width - 1.0) * 0.5 * max_d
  84. expand_y = (ratio_height - 1.0) * 0.5 * max_d
  85. if square:
  86. x_min = np.clip(int(mean_pt[0] - max_d - expand_x), 0, w - 1)
  87. y_min = np.clip(int(mean_pt[1] - max_d - expand_y), 0, h - 1)
  88. x_max = np.clip(int(mean_pt[0] + max_d + expand_x), 0, w - 1)
  89. y_max = np.clip(int(mean_pt[1] + max_d + expand_y), 0, h - 1)
  90. else:
  91. x_min = np.clip(int(x_min - expand_x), 0, w - 1)
  92. y_min = np.clip(int(y_min - expand_y), 0, h - 1)
  93. x_max = np.clip(int(x_max + expand_x), 0, w - 1)
  94. y_max = np.clip(int(y_max + expand_y), 0, h - 1)
  95. new_image = image[y_min:y_max, x_min:x_max, :].copy()
  96. new_points = _points.copy()
  97. new_points[:, 0] -= x_min
  98. new_points[:, 1] -= y_min
  99. dst_img, loss = curveTextRectifier(
  100. new_image,
  101. new_points,
  102. interpolation,
  103. ratio_width,
  104. ratio_height,
  105. mode="calibration",
  106. )
  107. return dst_img, loss
  108. class CurveTextRectifier:
  109. """
  110. spatial transformer via monocular vision
  111. """
  112. def __init__(self):
  113. self.get_virtual_camera_parameter()
  114. def get_virtual_camera_parameter(self):
  115. vcam_thz = 0
  116. vcam_thx1 = 180
  117. vcam_thy = 180
  118. vcam_thx2 = 0
  119. vcam_x = 0
  120. vcam_y = 0
  121. vcam_z = 100
  122. radian = np.pi / 180
  123. angle_z = radian * vcam_thz
  124. angle_x1 = radian * vcam_thx1
  125. angle_y = radian * vcam_thy
  126. angle_x2 = radian * vcam_thx2
  127. optic_x = vcam_x
  128. optic_y = vcam_y
  129. optic_z = vcam_z
  130. fu = 100
  131. fv = 100
  132. matT = np.zeros((4, 4))
  133. matT[0, 0] = cos(angle_z) * cos(angle_y) - sin(angle_z) * sin(angle_x1) * sin(
  134. angle_y
  135. )
  136. matT[0, 1] = cos(angle_z) * sin(angle_y) * sin(angle_x2) - sin(angle_z) * (
  137. cos(angle_x1) * cos(angle_x2) - sin(angle_x1) * cos(angle_y) * sin(angle_x2)
  138. )
  139. matT[0, 2] = cos(angle_z) * sin(angle_y) * cos(angle_x2) + sin(angle_z) * (
  140. cos(angle_x1) * sin(angle_x2) + sin(angle_x1) * cos(angle_y) * cos(angle_x2)
  141. )
  142. matT[0, 3] = optic_x
  143. matT[1, 0] = sin(angle_z) * cos(angle_y) + cos(angle_z) * sin(angle_x1) * sin(
  144. angle_y
  145. )
  146. matT[1, 1] = sin(angle_z) * sin(angle_y) * sin(angle_x2) + cos(angle_z) * (
  147. cos(angle_x1) * cos(angle_x2) - sin(angle_x1) * cos(angle_y) * sin(angle_x2)
  148. )
  149. matT[1, 2] = sin(angle_z) * sin(angle_y) * cos(angle_x2) - cos(angle_z) * (
  150. cos(angle_x1) * sin(angle_x2) + sin(angle_x1) * cos(angle_y) * cos(angle_x2)
  151. )
  152. matT[1, 3] = optic_y
  153. matT[2, 0] = -cos(angle_x1) * sin(angle_y)
  154. matT[2, 1] = cos(angle_x1) * cos(angle_y) * sin(angle_x2) + sin(angle_x1) * cos(
  155. angle_x2
  156. )
  157. matT[2, 2] = cos(angle_x1) * cos(angle_y) * cos(angle_x2) - sin(angle_x1) * sin(
  158. angle_x2
  159. )
  160. matT[2, 3] = optic_z
  161. matT[3, 0] = 0
  162. matT[3, 1] = 0
  163. matT[3, 2] = 0
  164. matT[3, 3] = 1
  165. matS = np.zeros((4, 4))
  166. matS[2, 3] = 0.5
  167. matS[3, 2] = 0.5
  168. self.ifu = 1 / fu
  169. self.ifv = 1 / fv
  170. self.matT = matT
  171. self.matS = matS
  172. self.K = np.dot(matT.T, matS)
  173. self.K = np.dot(self.K, matT)
  174. def vertical_text_process(self, points, org_size):
  175. """
  176. change sequence amd process
  177. :param points:
  178. :param org_size:
  179. :return:
  180. """
  181. org_w, org_h = org_size
  182. _points = np.array(points).reshape(-1).tolist()
  183. _points = np.array(_points[2:] + _points[:2]).reshape(-1, 2)
  184. # convert to horizontal points
  185. adjusted_points = np.zeros(_points.shape, dtype=np.float32)
  186. adjusted_points[:, 0] = _points[:, 1]
  187. adjusted_points[:, 1] = org_h - _points[:, 0] - 1
  188. _image_coord, _world_coord, _new_image_size = self.horizontal_text_process(
  189. adjusted_points
  190. )
  191. # # convert to vertical points back
  192. image_coord = _points.reshape(1, -1, 2)
  193. world_coord = np.zeros(_world_coord.shape, dtype=np.float32)
  194. world_coord[:, :, 0] = 0 - _world_coord[:, :, 1]
  195. world_coord[:, :, 1] = _world_coord[:, :, 0]
  196. world_coord[:, :, 2] = _world_coord[:, :, 2]
  197. new_image_size = (_new_image_size[1], _new_image_size[0])
  198. return image_coord, world_coord, new_image_size
  199. def horizontal_text_process(self, points):
  200. """
  201. get image coordinate and world coordinate
  202. :param points:
  203. :return:
  204. """
  205. poly = np.array(points).reshape(-1)
  206. dx_list = []
  207. dy_list = []
  208. for i in range(1, len(poly) // 2):
  209. xdx = poly[i * 2] - poly[(i - 1) * 2]
  210. xdy = poly[i * 2 + 1] - poly[(i - 1) * 2 + 1]
  211. d = sqrt(xdx**2 + xdy**2)
  212. dx_list.append(d)
  213. for i in range(0, len(poly) // 4):
  214. ydx = poly[i * 2] - poly[len(poly) - 1 - (i * 2 + 1)]
  215. ydy = poly[i * 2 + 1] - poly[len(poly) - 1 - (i * 2)]
  216. d = sqrt(ydx**2 + ydy**2)
  217. dy_list.append(d)
  218. dx_list = [
  219. (dx_list[i] + dx_list[len(dx_list) - 1 - i]) / 2
  220. for i in range(len(dx_list) // 2)
  221. ]
  222. height = np.around(np.mean(dy_list))
  223. rect_coord = [0, 0]
  224. for i in range(0, len(poly) // 4 - 1):
  225. x = rect_coord[-2]
  226. x += dx_list[i]
  227. y = 0
  228. rect_coord.append(x)
  229. rect_coord.append(y)
  230. rect_coord_half = copy.deepcopy(rect_coord)
  231. for i in range(0, len(poly) // 4):
  232. x = rect_coord_half[len(rect_coord_half) - 2 * i - 2]
  233. y = height
  234. rect_coord.append(x)
  235. rect_coord.append(y)
  236. np_rect_coord = np.array(rect_coord).reshape(-1, 2)
  237. x_min = np.min(np_rect_coord[:, 0])
  238. y_min = np.min(np_rect_coord[:, 1])
  239. x_max = np.max(np_rect_coord[:, 0])
  240. y_max = np.max(np_rect_coord[:, 1])
  241. new_image_size = (int(x_max - x_min + 0.5), int(y_max - y_min + 0.5))
  242. x_mean = (x_max - x_min) / 2
  243. y_mean = (y_max - y_min) / 2
  244. np_rect_coord[:, 0] -= x_mean
  245. np_rect_coord[:, 1] -= y_mean
  246. rect_coord = np_rect_coord.reshape(-1).tolist()
  247. rect_coord = np.array(rect_coord).reshape(-1, 2)
  248. world_coord = np.ones((len(rect_coord), 3)) * 0
  249. world_coord[:, :2] = rect_coord
  250. image_coord = np.array(poly).reshape(1, -1, 2)
  251. world_coord = world_coord.reshape(1, -1, 3)
  252. return image_coord, world_coord, new_image_size
  253. def horizontal_text_estimate(self, points):
  254. """
  255. horizontal or vertical text
  256. :param points:
  257. :return:
  258. """
  259. pts = np.array(points).reshape(-1, 2)
  260. x_min = int(np.min(pts[:, 0]))
  261. y_min = int(np.min(pts[:, 1]))
  262. x_max = int(np.max(pts[:, 0]))
  263. y_max = int(np.max(pts[:, 1]))
  264. x = x_max - x_min
  265. y = y_max - y_min
  266. is_horizontal_text = True
  267. if y / x > 1.5: # vertical text condition
  268. is_horizontal_text = False
  269. return is_horizontal_text
  270. def virtual_camera_to_world(self, size):
  271. ifu, ifv = self.ifu, self.ifv
  272. K, matT = self.K, self.matT
  273. ppu = size[0] / 2 + 1e-6
  274. ppv = size[1] / 2 + 1e-6
  275. P = np.zeros((size[1], size[0], 3))
  276. lu = np.array([i for i in range(size[0])])
  277. lv = np.array([i for i in range(size[1])])
  278. u, v = np.meshgrid(lu, lv)
  279. yp = (v - ppv) * ifv
  280. xp = (u - ppu) * ifu
  281. angle_a = arctan(sqrt(xp * xp + yp * yp))
  282. angle_b = arctan(yp / xp)
  283. D0 = sin(angle_a) * cos(angle_b)
  284. D1 = sin(angle_a) * sin(angle_b)
  285. D2 = cos(angle_a)
  286. D0[xp <= 0] = -D0[xp <= 0]
  287. D1[xp <= 0] = -D1[xp <= 0]
  288. ratio_a = (
  289. K[0, 0] * D0 * D0
  290. + K[1, 1] * D1 * D1
  291. + K[2, 2] * D2 * D2
  292. + (K[0, 1] + K[1, 0]) * D0 * D1
  293. + (K[0, 2] + K[2, 0]) * D0 * D2
  294. + (K[1, 2] + K[2, 1]) * D1 * D2
  295. )
  296. ratio_b = (
  297. (K[0, 3] + K[3, 0]) * D0
  298. + (K[1, 3] + K[3, 1]) * D1
  299. + (K[2, 3] + K[3, 2]) * D2
  300. )
  301. ratio_c = K[3, 3] * np.ones(ratio_b.shape)
  302. delta = ratio_b * ratio_b - 4 * ratio_a * ratio_c
  303. t = np.zeros(delta.shape)
  304. t[ratio_a == 0] = -ratio_c[ratio_a == 0] / ratio_b[ratio_a == 0]
  305. t[ratio_a != 0] = (-ratio_b[ratio_a != 0] + sqrt(delta[ratio_a != 0])) / (
  306. 2 * ratio_a[ratio_a != 0]
  307. )
  308. t[delta < 0] = 0
  309. P[:, :, 0] = matT[0, 3] + t * (
  310. matT[0, 0] * D0 + matT[0, 1] * D1 + matT[0, 2] * D2
  311. )
  312. P[:, :, 1] = matT[1, 3] + t * (
  313. matT[1, 0] * D0 + matT[1, 1] * D1 + matT[1, 2] * D2
  314. )
  315. P[:, :, 2] = matT[2, 3] + t * (
  316. matT[2, 0] * D0 + matT[2, 1] * D1 + matT[2, 2] * D2
  317. )
  318. return P
  319. def world_to_image(self, image_size, world, intrinsic, distCoeffs, rotation, tvec):
  320. r11 = rotation[0, 0]
  321. r12 = rotation[0, 1]
  322. r13 = rotation[0, 2]
  323. r21 = rotation[1, 0]
  324. r22 = rotation[1, 1]
  325. r23 = rotation[1, 2]
  326. r31 = rotation[2, 0]
  327. r32 = rotation[2, 1]
  328. r33 = rotation[2, 2]
  329. t1 = tvec[0]
  330. t2 = tvec[1]
  331. t3 = tvec[2]
  332. k1 = distCoeffs[0]
  333. k2 = distCoeffs[1]
  334. p1 = distCoeffs[2]
  335. p2 = distCoeffs[3]
  336. k3 = distCoeffs[4]
  337. k4 = distCoeffs[5]
  338. k5 = distCoeffs[6]
  339. k6 = distCoeffs[7]
  340. if len(distCoeffs) > 8:
  341. s1 = distCoeffs[8]
  342. s2 = distCoeffs[9]
  343. s3 = distCoeffs[10]
  344. s4 = distCoeffs[11]
  345. else:
  346. s1 = s2 = s3 = s4 = 0
  347. if len(distCoeffs) > 12:
  348. tx = distCoeffs[12]
  349. ty = distCoeffs[13]
  350. else:
  351. tx = ty = 0
  352. fu = intrinsic[0, 0]
  353. fv = intrinsic[1, 1]
  354. ppu = intrinsic[0, 2]
  355. ppv = intrinsic[1, 2]
  356. cos_tx = cos(tx)
  357. cos_ty = cos(ty)
  358. sin_tx = sin(tx)
  359. sin_ty = sin(ty)
  360. tao11 = cos_ty * cos_tx * cos_ty + sin_ty * cos_tx * sin_ty
  361. tao12 = cos_ty * cos_tx * sin_ty * sin_tx - sin_ty * cos_tx * cos_ty * sin_tx
  362. tao13 = -cos_ty * cos_tx * sin_ty * cos_tx + sin_ty * cos_tx * cos_ty * cos_tx
  363. tao21 = -sin_tx * sin_ty
  364. tao22 = cos_ty * cos_tx * cos_tx + sin_tx * cos_ty * sin_tx
  365. tao23 = cos_ty * cos_tx * sin_tx - sin_tx * cos_ty * cos_tx
  366. P = np.zeros((image_size[1], image_size[0], 2))
  367. c3 = r31 * world[:, :, 0] + r32 * world[:, :, 1] + r33 * world[:, :, 2] + t3
  368. c1 = r11 * world[:, :, 0] + r12 * world[:, :, 1] + r13 * world[:, :, 2] + t1
  369. c2 = r21 * world[:, :, 0] + r22 * world[:, :, 1] + r23 * world[:, :, 2] + t2
  370. x1 = c1 / c3
  371. y1 = c2 / c3
  372. x12 = x1 * x1
  373. y12 = y1 * y1
  374. x1y1 = 2 * x1 * y1
  375. r2 = x12 + y12
  376. r4 = r2 * r2
  377. r6 = r2 * r4
  378. radial_distortion = (1 + k1 * r2 + k2 * r4 + k3 * r6) / (
  379. 1 + k4 * r2 + k5 * r4 + k6 * r6
  380. )
  381. x2 = (
  382. x1 * radial_distortion + p1 * x1y1 + p2 * (r2 + 2 * x12) + s1 * r2 + s2 * r4
  383. )
  384. y2 = (
  385. y1 * radial_distortion + p2 * x1y1 + p1 * (r2 + 2 * y12) + s3 * r2 + s4 * r4
  386. )
  387. x3 = tao11 * x2 + tao12 * y2 + tao13
  388. y3 = tao21 * x2 + tao22 * y2 + tao23
  389. P[:, :, 0] = fu * x3 + ppu
  390. P[:, :, 1] = fv * y3 + ppv
  391. P[c3 <= 0] = 0
  392. return P
  393. def spatial_transform(
  394. self, image_data, new_image_size, mtx, dist, rvecs, tvecs, interpolation
  395. ):
  396. rotation, _ = cv2.Rodrigues(rvecs)
  397. world_map = self.virtual_camera_to_world(new_image_size)
  398. image_map = self.world_to_image(
  399. new_image_size, world_map, mtx, dist, rotation, tvecs
  400. )
  401. image_map = image_map.astype(np.float32)
  402. dst = cv2.remap(
  403. image_data, image_map[:, :, 0], image_map[:, :, 1], interpolation
  404. )
  405. return dst
  406. def calibrate(self, org_size, image_coord, world_coord):
  407. """
  408. calibration
  409. :param org_size:
  410. :param image_coord:
  411. :param world_coord:
  412. :return:
  413. """
  414. # flag = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL | cv2.CALIB_THIN_PRISM_MODEL
  415. flag = cv2.CALIB_RATIONAL_MODEL
  416. flag2 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL
  417. flag3 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_THIN_PRISM_MODEL
  418. flag4 = (
  419. cv2.CALIB_RATIONAL_MODEL
  420. | cv2.CALIB_ZERO_TANGENT_DIST
  421. | cv2.CALIB_FIX_ASPECT_RATIO
  422. )
  423. flag5 = (
  424. cv2.CALIB_RATIONAL_MODEL
  425. | cv2.CALIB_TILTED_MODEL
  426. | cv2.CALIB_ZERO_TANGENT_DIST
  427. )
  428. flag6 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_FIX_ASPECT_RATIO
  429. flag_list = [flag2, flag3, flag4, flag5, flag6]
  430. ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(
  431. world_coord.astype(np.float32),
  432. image_coord.astype(np.float32),
  433. org_size,
  434. None,
  435. None,
  436. flags=flag,
  437. )
  438. if ret > 2:
  439. # strategies
  440. min_ret = ret
  441. for i, flag in enumerate(flag_list):
  442. _ret, _mtx, _dist, _rvecs, _tvecs = cv2.calibrateCamera(
  443. world_coord.astype(np.float32),
  444. image_coord.astype(np.float32),
  445. org_size,
  446. None,
  447. None,
  448. flags=flag,
  449. )
  450. if _ret < min_ret:
  451. min_ret = _ret
  452. ret, mtx, dist, rvecs, tvecs = _ret, _mtx, _dist, _rvecs, _tvecs
  453. return ret, mtx, dist, rvecs, tvecs
  454. def dc_homo(
  455. self,
  456. img,
  457. img_points,
  458. obj_points,
  459. is_horizontal_text,
  460. interpolation=cv2.INTER_LINEAR,
  461. ratio_width=1.0,
  462. ratio_height=1.0,
  463. ):
  464. """
  465. divide and conquer: homography
  466. # ratio_width and ratio_height must be 1.0 here
  467. """
  468. _img_points = img_points.reshape(-1, 2)
  469. _obj_points = obj_points.reshape(-1, 3)
  470. homo_img_list = []
  471. width_list = []
  472. height_list = []
  473. # divide and conquer
  474. for i in range(len(_img_points) // 2 - 1):
  475. new_img_points = np.zeros((4, 2)).astype(np.float32)
  476. new_obj_points = np.zeros((4, 2)).astype(np.float32)
  477. new_img_points[0:2, :] = _img_points[i : (i + 2), :2]
  478. new_img_points[2:4, :] = _img_points[::-1, :][i : (i + 2), :2][::-1, :]
  479. new_obj_points[0:2, :] = _obj_points[i : (i + 2), :2]
  480. new_obj_points[2:4, :] = _obj_points[::-1, :][i : (i + 2), :2][::-1, :]
  481. if is_horizontal_text:
  482. world_width = np.abs(new_obj_points[1, 0] - new_obj_points[0, 0])
  483. world_height = np.abs(new_obj_points[3, 1] - new_obj_points[0, 1])
  484. else:
  485. world_width = np.abs(new_obj_points[1, 1] - new_obj_points[0, 1])
  486. world_height = np.abs(new_obj_points[3, 0] - new_obj_points[0, 0])
  487. homo_img = Homography(
  488. img,
  489. new_img_points,
  490. world_width,
  491. world_height,
  492. interpolation=interpolation,
  493. ratio_width=ratio_width,
  494. ratio_height=ratio_height,
  495. )
  496. homo_img_list.append(homo_img)
  497. _h, _w = homo_img.shape[:2]
  498. width_list.append(_w)
  499. height_list.append(_h)
  500. # stitching
  501. rectified_image = np.zeros((np.max(height_list), sum(width_list), 3)).astype(
  502. np.uint8
  503. )
  504. st = 0
  505. for homo_img, w, h in zip(homo_img_list, width_list, height_list):
  506. rectified_image[:h, st : st + w, :] = homo_img
  507. st += w
  508. if not is_horizontal_text:
  509. # vertical rotation
  510. rectified_image = np.rot90(rectified_image, 3)
  511. return rectified_image
  512. def Homography(
  513. self,
  514. image,
  515. img_points,
  516. world_width,
  517. world_height,
  518. interpolation=cv2.INTER_CUBIC,
  519. ratio_width=1.0,
  520. ratio_height=1.0,
  521. ):
  522. _points = np.array(img_points).reshape(-1, 2).astype(np.float32)
  523. expand_x = int(0.5 * world_width * (ratio_width - 1))
  524. expand_y = int(0.5 * world_height * (ratio_height - 1))
  525. pt_lefttop = [expand_x, expand_y]
  526. pt_righttop = [expand_x + world_width, expand_y]
  527. pt_leftbottom = [expand_x + world_width, expand_y + world_height]
  528. pt_rightbottom = [expand_x, expand_y + world_height]
  529. pts_std = np.float32([pt_lefttop, pt_righttop, pt_leftbottom, pt_rightbottom])
  530. img_crop_width = int(world_width * ratio_width)
  531. img_crop_height = int(world_height * ratio_height)
  532. M = cv2.getPerspectiveTransform(_points, pts_std)
  533. dst_img = cv2.warpPerspective(
  534. image,
  535. M,
  536. (img_crop_width, img_crop_height),
  537. borderMode=cv2.BORDER_CONSTANT, # BORDER_CONSTANT BORDER_REPLICATE
  538. flags=interpolation,
  539. )
  540. return dst_img
  541. def __call__(
  542. self,
  543. image_data,
  544. points,
  545. interpolation=cv2.INTER_LINEAR,
  546. ratio_width=1.0,
  547. ratio_height=1.0,
  548. mode="calibration",
  549. ):
  550. """
  551. spatial transform for a poly text
  552. :param image_data:
  553. :param points: [x1,y1,x2,y2,x3,y3,...], clockwise order, (x1,y1) must be the top-left of first char.
  554. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  555. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  556. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  557. :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
  558. :return:
  559. """
  560. org_h, org_w = image_data.shape[:2]
  561. org_size = (org_w, org_h)
  562. self.image = image_data
  563. is_horizontal_text = self.horizontal_text_estimate(points)
  564. if is_horizontal_text:
  565. image_coord, world_coord, new_image_size = self.horizontal_text_process(
  566. points
  567. )
  568. else:
  569. image_coord, world_coord, new_image_size = self.vertical_text_process(
  570. points, org_size
  571. )
  572. if mode.lower() == "calibration":
  573. ret, mtx, dist, rvecs, tvecs = self.calibrate(
  574. org_size, image_coord, world_coord
  575. )
  576. st_size = (
  577. int(new_image_size[0] * ratio_width),
  578. int(new_image_size[1] * ratio_height),
  579. )
  580. dst = self.spatial_transform(
  581. image_data, st_size, mtx, dist[0], rvecs[0], tvecs[0], interpolation
  582. )
  583. elif mode.lower() == "homography":
  584. # ratio_width and ratio_height must be 1.0 here and ret set to 0.01 without loss manually
  585. ret = 0.01
  586. dst = self.dc_homo(
  587. image_data,
  588. image_coord,
  589. world_coord,
  590. is_horizontal_text,
  591. interpolation=interpolation,
  592. ratio_width=1.0,
  593. ratio_height=1.0,
  594. )
  595. else:
  596. raise ValueError(
  597. 'mode must be ["calibration", "homography"], but got {}'.format(mode)
  598. )
  599. return dst, ret
  600. class AutoRectifier:
  601. def __init__(self):
  602. self.npoints = 10
  603. self.curveTextRectifier = CurveTextRectifier()
  604. @staticmethod
  605. def get_rotate_crop_image(
  606. img, points, interpolation=cv2.INTER_CUBIC, ratio_width=1.0, ratio_height=1.0
  607. ):
  608. """
  609. crop or homography
  610. :param img:
  611. :param points:
  612. :param interpolation:
  613. :param ratio_width:
  614. :param ratio_height:
  615. :return:
  616. """
  617. h, w = img.shape[:2]
  618. _points = np.array(points).reshape(-1, 2).astype(np.float32)
  619. if len(_points) != 4:
  620. x_min = int(np.min(_points[:, 0]))
  621. y_min = int(np.min(_points[:, 1]))
  622. x_max = int(np.max(_points[:, 0]))
  623. y_max = int(np.max(_points[:, 1]))
  624. dx = x_max - x_min
  625. dy = y_max - y_min
  626. expand_x = int(0.5 * dx * (ratio_width - 1))
  627. expand_y = int(0.5 * dy * (ratio_height - 1))
  628. x_min = np.clip(int(x_min - expand_x), 0, w - 1)
  629. y_min = np.clip(int(y_min - expand_y), 0, h - 1)
  630. x_max = np.clip(int(x_max + expand_x), 0, w - 1)
  631. y_max = np.clip(int(y_max + expand_y), 0, h - 1)
  632. dst_img = img[y_min:y_max, x_min:x_max, :].copy()
  633. else:
  634. img_crop_width = int(
  635. max(
  636. np.linalg.norm(_points[0] - _points[1]),
  637. np.linalg.norm(_points[2] - _points[3]),
  638. )
  639. )
  640. img_crop_height = int(
  641. max(
  642. np.linalg.norm(_points[0] - _points[3]),
  643. np.linalg.norm(_points[1] - _points[2]),
  644. )
  645. )
  646. dst_img = Homography(
  647. img,
  648. _points,
  649. img_crop_width,
  650. img_crop_height,
  651. interpolation,
  652. ratio_width,
  653. ratio_height,
  654. )
  655. return dst_img
  656. def visualize(self, image_data, points_list):
  657. visualization = image_data.copy()
  658. for box in points_list:
  659. box = np.array(box).reshape(-1, 2).astype(np.int32)
  660. cv2.drawContours(
  661. visualization, [np.array(box).reshape((-1, 1, 2))], -1, (0, 0, 255), 2
  662. )
  663. for i, p in enumerate(box):
  664. if i != 0:
  665. cv2.circle(
  666. visualization,
  667. tuple(p),
  668. radius=1,
  669. color=(255, 0, 0),
  670. thickness=2,
  671. )
  672. else:
  673. cv2.circle(
  674. visualization,
  675. tuple(p),
  676. radius=1,
  677. color=(255, 255, 0),
  678. thickness=2,
  679. )
  680. return visualization
  681. def __call__(
  682. self,
  683. image_data,
  684. points,
  685. interpolation=cv2.INTER_LINEAR,
  686. ratio_width=1.0,
  687. ratio_height=1.0,
  688. loss_thresh=5.0,
  689. mode="calibration",
  690. ):
  691. """
  692. rectification in strategies for a poly text
  693. :param image_data:
  694. :param points: [x1,y1,x2,y2,x3,y3,...], clockwise order, (x1,y1) must be the top-left of first char.
  695. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  696. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  697. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  698. :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
  699. :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
  700. :return:
  701. """
  702. _points = np.array(points).reshape(-1, 2)
  703. if len(_points) >= self.npoints and len(_points) % 2 == 0:
  704. try:
  705. curveTextRectifier = CurveTextRectifier()
  706. dst_img, loss = curveTextRectifier(
  707. image_data, points, interpolation, ratio_width, ratio_height, mode
  708. )
  709. if loss >= 2:
  710. # for robust
  711. # large loss means it cannot be reconstruct correctly, we must find other way to reconstruct
  712. img_list, loss_list = [dst_img], [loss]
  713. _dst_img, _loss = PlanB()(
  714. image_data,
  715. points,
  716. curveTextRectifier,
  717. interpolation,
  718. ratio_width,
  719. ratio_height,
  720. loss_thresh=loss_thresh,
  721. square=True,
  722. )
  723. img_list += [_dst_img]
  724. loss_list += [_loss]
  725. _dst_img, _loss = PlanB()(
  726. image_data,
  727. points,
  728. curveTextRectifier,
  729. interpolation,
  730. ratio_width,
  731. ratio_height,
  732. loss_thresh=loss_thresh,
  733. square=False,
  734. )
  735. img_list += [_dst_img]
  736. loss_list += [_loss]
  737. min_loss = min(loss_list)
  738. dst_img = img_list[loss_list.index(min_loss)]
  739. if min_loss >= loss_thresh:
  740. logging.warning(
  741. "calibration loss: {} is too large for spatial transformer. It is failed. Using get_rotate_crop_image".format(
  742. loss
  743. )
  744. )
  745. dst_img = self.get_rotate_crop_image(
  746. image_data, points, interpolation, ratio_width, ratio_height
  747. )
  748. except Exception as e:
  749. logging.warning(f"Exception caught: {e}")
  750. dst_img = self.get_rotate_crop_image(
  751. image_data, points, interpolation, ratio_width, ratio_height
  752. )
  753. else:
  754. dst_img = self.get_rotate_crop_image(
  755. image_data, _points, interpolation, ratio_width, ratio_height
  756. )
  757. return dst_img
  758. def run(
  759. self,
  760. image_data,
  761. points_list,
  762. interpolation=cv2.INTER_LINEAR,
  763. ratio_width=1.0,
  764. ratio_height=1.0,
  765. loss_thresh=5.0,
  766. mode="calibration",
  767. ):
  768. """
  769. run for texts in an image
  770. :param image_data: numpy.ndarray. The shape is [h, w, 3]
  771. :param points_list: [[x1,y1,x2,y2,x3,y3,...], [x1,y1,x2,y2,x3,y3,...], ...], clockwise order, (x1,y1) must be the top-left of first char.
  772. :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
  773. :param ratio_width: roi_image width expansion. It should not be smaller than 1.0
  774. :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
  775. :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
  776. :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
  777. :return: res: roi-image list, visualized_image: draw polys in original image
  778. """
  779. if image_data is None:
  780. raise ValueError
  781. if not isinstance(points_list, list):
  782. raise ValueError
  783. for points in points_list:
  784. if not isinstance(points, list):
  785. raise ValueError
  786. if ratio_width < 1.0 or ratio_height < 1.0:
  787. raise ValueError(
  788. "ratio_width and ratio_height cannot be smaller than 1, but got {}",
  789. (ratio_width, ratio_height),
  790. )
  791. if mode.lower() != "calibration" and mode.lower() != "homography":
  792. raise ValueError(
  793. 'mode must be ["calibration", "homography"], but got {}'.format(mode)
  794. )
  795. if mode.lower() == "homography" and ratio_width != 1.0 and ratio_height != 1.0:
  796. raise ValueError(
  797. "ratio_width and ratio_height must be 1.0 when mode is homography, but got mode:{}, ratio:({},{})".format(
  798. mode, ratio_width, ratio_height
  799. )
  800. )
  801. res = []
  802. for points in points_list:
  803. rectified_img = self(
  804. image_data,
  805. points,
  806. interpolation,
  807. ratio_width,
  808. ratio_height,
  809. loss_thresh=loss_thresh,
  810. mode=mode,
  811. )
  812. res.append(rectified_img)
  813. # visualize
  814. visualized_image = self.visualize(image_data, points_list)
  815. return res, visualized_image