text_det.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import sys
  16. import cv2
  17. import copy
  18. import math
  19. import pyclipper
  20. import numpy as np
  21. from numpy.linalg import norm
  22. from PIL import Image
  23. from shapely.geometry import Polygon
  24. from ...utils.io import ImageReader
  25. from ....utils import logging
  26. from ..base import BaseComponent
  27. from .seal_det_warp import AutoRectifier
  28. __all__ = ["DetResizeForTest", "NormalizeImage", "DBPostProcess", "CropByPolys"]
  29. class DetResizeForTest(BaseComponent):
  30. """DetResizeForTest"""
  31. INPUT_KEYS = ["img"]
  32. OUTPUT_KEYS = ["img", "img_shape"]
  33. DEAULT_INPUTS = {"img": "img"}
  34. DEAULT_OUTPUTS = {"img": "img", "img_shape": "img_shape"}
  35. def __init__(self, **kwargs):
  36. super().__init__()
  37. self.resize_type = 0
  38. self.keep_ratio = False
  39. if "image_shape" in kwargs:
  40. self.image_shape = kwargs["image_shape"]
  41. self.resize_type = 1
  42. if "keep_ratio" in kwargs:
  43. self.keep_ratio = kwargs["keep_ratio"]
  44. elif "limit_side_len" in kwargs:
  45. self.limit_side_len = kwargs["limit_side_len"]
  46. self.limit_type = kwargs.get("limit_type", "min")
  47. elif "resize_long" in kwargs:
  48. self.resize_type = 2
  49. self.resize_long = kwargs.get("resize_long", 960)
  50. else:
  51. self.limit_side_len = 736
  52. self.limit_type = "min"
  53. def apply(self, img):
  54. """apply"""
  55. src_h, src_w, _ = img.shape
  56. if sum([src_h, src_w]) < 64:
  57. img = self.image_padding(img)
  58. if self.resize_type == 0:
  59. # img, shape = self.resize_image_type0(img)
  60. img, [ratio_h, ratio_w] = self.resize_image_type0(img)
  61. elif self.resize_type == 2:
  62. img, [ratio_h, ratio_w] = self.resize_image_type2(img)
  63. else:
  64. # img, shape = self.resize_image_type1(img)
  65. img, [ratio_h, ratio_w] = self.resize_image_type1(img)
  66. return {"img": img, "img_shape": np.array([src_h, src_w, ratio_h, ratio_w])}
  67. def image_padding(self, im, value=0):
  68. """padding image"""
  69. h, w, c = im.shape
  70. im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
  71. im_pad[:h, :w, :] = im
  72. return im_pad
  73. def resize_image_type1(self, img):
  74. """resize the image"""
  75. resize_h, resize_w = self.image_shape
  76. ori_h, ori_w = img.shape[:2] # (h, w, c)
  77. if self.keep_ratio is True:
  78. resize_w = ori_w * resize_h / ori_h
  79. N = math.ceil(resize_w / 32)
  80. resize_w = N * 32
  81. ratio_h = float(resize_h) / ori_h
  82. ratio_w = float(resize_w) / ori_w
  83. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  84. # return img, np.array([ori_h, ori_w])
  85. return img, [ratio_h, ratio_w]
  86. def resize_image_type0(self, img):
  87. """
  88. resize image to a size multiple of 32 which is required by the network
  89. args:
  90. img(array): array with shape [h, w, c]
  91. return(tuple):
  92. img, (ratio_h, ratio_w)
  93. """
  94. limit_side_len = self.limit_side_len
  95. h, w, c = img.shape
  96. # limit the max side
  97. if self.limit_type == "max":
  98. if max(h, w) > limit_side_len:
  99. if h > w:
  100. ratio = float(limit_side_len) / h
  101. else:
  102. ratio = float(limit_side_len) / w
  103. else:
  104. ratio = 1.0
  105. elif self.limit_type == "min":
  106. if min(h, w) < limit_side_len:
  107. if h < w:
  108. ratio = float(limit_side_len) / h
  109. else:
  110. ratio = float(limit_side_len) / w
  111. else:
  112. ratio = 1.0
  113. elif self.limit_type == "resize_long":
  114. ratio = float(limit_side_len) / max(h, w)
  115. else:
  116. raise Exception("not support limit type, image ")
  117. resize_h = int(h * ratio)
  118. resize_w = int(w * ratio)
  119. resize_h = max(int(round(resize_h / 32) * 32), 32)
  120. resize_w = max(int(round(resize_w / 32) * 32), 32)
  121. try:
  122. if int(resize_w) <= 0 or int(resize_h) <= 0:
  123. return None, (None, None)
  124. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  125. except:
  126. logging.info(img.shape, resize_w, resize_h)
  127. sys.exit(0)
  128. ratio_h = resize_h / float(h)
  129. ratio_w = resize_w / float(w)
  130. return img, [ratio_h, ratio_w]
  131. def resize_image_type2(self, img):
  132. """resize image size"""
  133. h, w, _ = img.shape
  134. resize_w = w
  135. resize_h = h
  136. if resize_h > resize_w:
  137. ratio = float(self.resize_long) / resize_h
  138. else:
  139. ratio = float(self.resize_long) / resize_w
  140. resize_h = int(resize_h * ratio)
  141. resize_w = int(resize_w * ratio)
  142. max_stride = 128
  143. resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
  144. resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
  145. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  146. ratio_h = resize_h / float(h)
  147. ratio_w = resize_w / float(w)
  148. return img, [ratio_h, ratio_w]
  149. class NormalizeImage(BaseComponent):
  150. """normalize image such as substract mean, divide std"""
  151. INPUT_KEYS = ["img"]
  152. OUTPUT_KEYS = ["img"]
  153. DEAULT_INPUTS = {"img": "img"}
  154. DEAULT_OUTPUTS = {"img": "img"}
  155. def __init__(self, scale=None, mean=None, std=None, order="chw", **kwargs):
  156. super().__init__()
  157. if isinstance(scale, str):
  158. scale = eval(scale)
  159. self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
  160. mean = mean if mean is not None else [0.485, 0.456, 0.406]
  161. std = std if std is not None else [0.229, 0.224, 0.225]
  162. shape = (3, 1, 1) if order == "chw" else (1, 1, 3)
  163. self.mean = np.array(mean).reshape(shape).astype("float32")
  164. self.std = np.array(std).reshape(shape).astype("float32")
  165. def apply(self, img):
  166. """apply"""
  167. from PIL import Image
  168. if isinstance(img, Image.Image):
  169. img = np.array(img)
  170. assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage"
  171. img = (img.astype("float32") * self.scale - self.mean) / self.std
  172. return {"img": img}
  173. class DBPostProcess(BaseComponent):
  174. """
  175. The post process for Differentiable Binarization (DB).
  176. """
  177. INPUT_KEYS = ["pred", "img_shape"]
  178. OUTPUT_KEYS = ["dt_polys", "dt_scores"]
  179. DEAULT_INPUTS = {"pred": "pred", "img_shape": "img_shape"}
  180. DEAULT_OUTPUTS = {"dt_polys": "dt_polys", "dt_scores": "dt_scores"}
  181. def __init__(
  182. self,
  183. thresh=0.3,
  184. box_thresh=0.7,
  185. max_candidates=1000,
  186. unclip_ratio=2.0,
  187. use_dilation=False,
  188. score_mode="fast",
  189. box_type="quad",
  190. **kwargs
  191. ):
  192. super().__init__()
  193. self.thresh = thresh
  194. self.box_thresh = box_thresh
  195. self.max_candidates = max_candidates
  196. self.unclip_ratio = unclip_ratio
  197. self.min_size = 3
  198. self.score_mode = score_mode
  199. self.box_type = box_type
  200. assert score_mode in [
  201. "slow",
  202. "fast",
  203. ], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
  204. self.dilation_kernel = None if not use_dilation else np.array([[1, 1], [1, 1]])
  205. def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
  206. """_bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1}"""
  207. bitmap = _bitmap
  208. height, width = bitmap.shape
  209. boxes = []
  210. scores = []
  211. contours, _ = cv2.findContours(
  212. (bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
  213. )
  214. for contour in contours[: self.max_candidates]:
  215. epsilon = 0.002 * cv2.arcLength(contour, True)
  216. approx = cv2.approxPolyDP(contour, epsilon, True)
  217. points = approx.reshape((-1, 2))
  218. if points.shape[0] < 4:
  219. continue
  220. score = self.box_score_fast(pred, points.reshape(-1, 2))
  221. if self.box_thresh > score:
  222. continue
  223. if points.shape[0] > 2:
  224. box = self.unclip(points, self.unclip_ratio)
  225. if len(box) > 1:
  226. continue
  227. else:
  228. continue
  229. box = box.reshape(-1, 2)
  230. if len(box) > 0:
  231. _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
  232. if sside < self.min_size + 2:
  233. continue
  234. else:
  235. continue
  236. box = np.array(box)
  237. box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
  238. box[:, 1] = np.clip(
  239. np.round(box[:, 1] / height * dest_height), 0, dest_height
  240. )
  241. boxes.append(box)
  242. scores.append(score)
  243. return boxes, scores
  244. def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
  245. """_bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1}"""
  246. bitmap = _bitmap
  247. height, width = bitmap.shape
  248. outs = cv2.findContours(
  249. (bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
  250. )
  251. if len(outs) == 3:
  252. img, contours, _ = outs[0], outs[1], outs[2]
  253. elif len(outs) == 2:
  254. contours, _ = outs[0], outs[1]
  255. num_contours = min(len(contours), self.max_candidates)
  256. boxes = []
  257. scores = []
  258. for index in range(num_contours):
  259. contour = contours[index]
  260. points, sside = self.get_mini_boxes(contour)
  261. if sside < self.min_size:
  262. continue
  263. points = np.array(points)
  264. if self.score_mode == "fast":
  265. score = self.box_score_fast(pred, points.reshape(-1, 2))
  266. else:
  267. score = self.box_score_slow(pred, contour)
  268. if self.box_thresh > score:
  269. continue
  270. box = self.unclip(points, self.unclip_ratio).reshape(-1, 1, 2)
  271. box, sside = self.get_mini_boxes(box)
  272. if sside < self.min_size + 2:
  273. continue
  274. box = np.array(box)
  275. box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
  276. box[:, 1] = np.clip(
  277. np.round(box[:, 1] / height * dest_height), 0, dest_height
  278. )
  279. boxes.append(box.astype(np.int16))
  280. scores.append(score)
  281. return np.array(boxes, dtype=np.int16), scores
  282. def unclip(self, box, unclip_ratio):
  283. """unclip"""
  284. poly = Polygon(box)
  285. distance = poly.area * unclip_ratio / poly.length
  286. offset = pyclipper.PyclipperOffset()
  287. offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
  288. try:
  289. expanded = np.array(offset.Execute(distance))
  290. except ValueError:
  291. expanded = np.array(offset.Execute(distance)[0])
  292. return expanded
  293. def get_mini_boxes(self, contour):
  294. """get mini boxes"""
  295. bounding_box = cv2.minAreaRect(contour)
  296. points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
  297. index_1, index_2, index_3, index_4 = 0, 1, 2, 3
  298. if points[1][1] > points[0][1]:
  299. index_1 = 0
  300. index_4 = 1
  301. else:
  302. index_1 = 1
  303. index_4 = 0
  304. if points[3][1] > points[2][1]:
  305. index_2 = 2
  306. index_3 = 3
  307. else:
  308. index_2 = 3
  309. index_3 = 2
  310. box = [points[index_1], points[index_2], points[index_3], points[index_4]]
  311. return box, min(bounding_box[1])
  312. def box_score_fast(self, bitmap, _box):
  313. """box_score_fast: use bbox mean score as the mean score"""
  314. h, w = bitmap.shape[:2]
  315. box = _box.copy()
  316. xmin = np.clip(np.floor(box[:, 0].min()).astype("int"), 0, w - 1)
  317. xmax = np.clip(np.ceil(box[:, 0].max()).astype("int"), 0, w - 1)
  318. ymin = np.clip(np.floor(box[:, 1].min()).astype("int"), 0, h - 1)
  319. ymax = np.clip(np.ceil(box[:, 1].max()).astype("int"), 0, h - 1)
  320. mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
  321. box[:, 0] = box[:, 0] - xmin
  322. box[:, 1] = box[:, 1] - ymin
  323. cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
  324. return cv2.mean(bitmap[ymin : ymax + 1, xmin : xmax + 1], mask)[0]
  325. def box_score_slow(self, bitmap, contour):
  326. """box_score_slow: use polyon mean score as the mean score"""
  327. h, w = bitmap.shape[:2]
  328. contour = contour.copy()
  329. contour = np.reshape(contour, (-1, 2))
  330. xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
  331. xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
  332. ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
  333. ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
  334. mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
  335. contour[:, 0] = contour[:, 0] - xmin
  336. contour[:, 1] = contour[:, 1] - ymin
  337. cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
  338. return cv2.mean(bitmap[ymin : ymax + 1, xmin : xmax + 1], mask)[0]
  339. def apply(self, pred, img_shape):
  340. """apply"""
  341. pred = pred[0][0, :, :]
  342. segmentation = pred > self.thresh
  343. src_h, src_w, ratio_h, ratio_w = img_shape
  344. if self.dilation_kernel is not None:
  345. mask = cv2.dilate(
  346. np.array(segmentation).astype(np.uint8),
  347. self.dilation_kernel,
  348. )
  349. else:
  350. mask = segmentation
  351. if self.box_type == "poly":
  352. boxes, scores = self.polygons_from_bitmap(pred, mask, src_w, src_h)
  353. elif self.box_type == "quad":
  354. boxes, scores = self.boxes_from_bitmap(pred, mask, src_w, src_h)
  355. else:
  356. raise ValueError("box_type can only be one of ['quad', 'poly']")
  357. return {"dt_polys": boxes, "dt_scores": scores}
  358. class CropByPolys(BaseComponent):
  359. """Crop Image by Polys"""
  360. INPUT_KEYS = ["input_path", "dt_polys"]
  361. OUTPUT_KEYS = ["img"]
  362. DEAULT_INPUTS = {"input_path": "input_path", "dt_polys": "dt_polys"}
  363. DEAULT_OUTPUTS = {"img": "img"}
  364. def __init__(self, det_box_type="quad"):
  365. super().__init__()
  366. self.det_box_type = det_box_type
  367. self._reader = ImageReader(backend="opencv")
  368. def apply(self, input_path, dt_polys):
  369. """apply"""
  370. img = self._reader.read(input_path)
  371. if self.det_box_type == "quad":
  372. dt_boxes = np.array(dt_polys)
  373. output_list = []
  374. for bno in range(len(dt_boxes)):
  375. tmp_box = copy.deepcopy(dt_boxes[bno])
  376. img_crop = self.get_minarea_rect_crop(img, tmp_box)
  377. output_list.append(
  378. {
  379. "img": img_crop,
  380. "img_size": [img_crop.shape[1], img_crop.shape[0]],
  381. }
  382. )
  383. elif self.det_box_type == "poly":
  384. output_list = []
  385. dt_boxes = dt_polys
  386. for bno in range(len(dt_boxes)):
  387. tmp_box = copy.deepcopy(dt_boxes[bno])
  388. img_crop = self.get_poly_rect_crop(img.copy(), tmp_box)
  389. output_list.append(
  390. {
  391. "img": img_crop,
  392. "img_size": [img_crop.shape[1], img_crop.shape[0]],
  393. }
  394. )
  395. else:
  396. raise NotImplementedError
  397. return output_list
  398. def get_minarea_rect_crop(self, img, points):
  399. """get_minarea_rect_crop"""
  400. bounding_box = cv2.minAreaRect(np.array(points).astype(np.int32))
  401. points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
  402. index_a, index_b, index_c, index_d = 0, 1, 2, 3
  403. if points[1][1] > points[0][1]:
  404. index_a = 0
  405. index_d = 1
  406. else:
  407. index_a = 1
  408. index_d = 0
  409. if points[3][1] > points[2][1]:
  410. index_b = 2
  411. index_c = 3
  412. else:
  413. index_b = 3
  414. index_c = 2
  415. box = [points[index_a], points[index_b], points[index_c], points[index_d]]
  416. crop_img = self.get_rotate_crop_image(img, np.array(box))
  417. return crop_img
  418. def get_rotate_crop_image(self, img, points):
  419. """
  420. img_height, img_width = img.shape[0:2]
  421. left = int(np.min(points[:, 0]))
  422. right = int(np.max(points[:, 0]))
  423. top = int(np.min(points[:, 1]))
  424. bottom = int(np.max(points[:, 1]))
  425. img_crop = img[top:bottom, left:right, :].copy()
  426. points[:, 0] = points[:, 0] - left
  427. points[:, 1] = points[:, 1] - top
  428. """
  429. assert len(points) == 4, "shape of points must be 4*2"
  430. img_crop_width = int(
  431. max(
  432. np.linalg.norm(points[0] - points[1]),
  433. np.linalg.norm(points[2] - points[3]),
  434. )
  435. )
  436. img_crop_height = int(
  437. max(
  438. np.linalg.norm(points[0] - points[3]),
  439. np.linalg.norm(points[1] - points[2]),
  440. )
  441. )
  442. pts_std = np.float32(
  443. [
  444. [0, 0],
  445. [img_crop_width, 0],
  446. [img_crop_width, img_crop_height],
  447. [0, img_crop_height],
  448. ]
  449. )
  450. M = cv2.getPerspectiveTransform(points, pts_std)
  451. dst_img = cv2.warpPerspective(
  452. img,
  453. M,
  454. (img_crop_width, img_crop_height),
  455. borderMode=cv2.BORDER_REPLICATE,
  456. flags=cv2.INTER_CUBIC,
  457. )
  458. dst_img_height, dst_img_width = dst_img.shape[0:2]
  459. if dst_img_height * 1.0 / dst_img_width >= 1.5:
  460. dst_img = np.rot90(dst_img)
  461. return dst_img
  462. def reorder_poly_edge(self, points):
  463. """Get the respective points composing head edge, tail edge, top
  464. sideline and bottom sideline.
  465. Args:
  466. points (ndarray): The points composing a text polygon.
  467. Returns:
  468. head_edge (ndarray): The two points composing the head edge of text
  469. polygon.
  470. tail_edge (ndarray): The two points composing the tail edge of text
  471. polygon.
  472. top_sideline (ndarray): The points composing top curved sideline of
  473. text polygon.
  474. bot_sideline (ndarray): The points composing bottom curved sideline
  475. of text polygon.
  476. """
  477. assert points.ndim == 2
  478. assert points.shape[0] >= 4
  479. assert points.shape[1] == 2
  480. orientation_thr = 2.0 # 一个经验超参数
  481. head_inds, tail_inds = self.find_head_tail(points, orientation_thr)
  482. head_edge, tail_edge = points[head_inds], points[tail_inds]
  483. pad_points = np.vstack([points, points])
  484. if tail_inds[1] < 1:
  485. tail_inds[1] = len(points)
  486. sideline1 = pad_points[head_inds[1] : tail_inds[1]]
  487. sideline2 = pad_points[tail_inds[1] : (head_inds[1] + len(points))]
  488. return head_edge, tail_edge, sideline1, sideline2
  489. def vector_slope(self, vec):
  490. assert len(vec) == 2
  491. return abs(vec[1] / (vec[0] + 1e-8))
  492. def find_head_tail(self, points, orientation_thr):
  493. """Find the head edge and tail edge of a text polygon.
  494. Args:
  495. points (ndarray): The points composing a text polygon.
  496. orientation_thr (float): The threshold for distinguishing between
  497. head edge and tail edge among the horizontal and vertical edges
  498. of a quadrangle.
  499. Returns:
  500. head_inds (list): The indexes of two points composing head edge.
  501. tail_inds (list): The indexes of two points composing tail edge.
  502. """
  503. assert points.ndim == 2
  504. assert points.shape[0] >= 4
  505. assert points.shape[1] == 2
  506. assert isinstance(orientation_thr, float)
  507. if len(points) > 4:
  508. pad_points = np.vstack([points, points[0]])
  509. edge_vec = pad_points[1:] - pad_points[:-1]
  510. theta_sum = []
  511. adjacent_vec_theta = []
  512. for i, edge_vec1 in enumerate(edge_vec):
  513. adjacent_ind = [x % len(edge_vec) for x in [i - 1, i + 1]]
  514. adjacent_edge_vec = edge_vec[adjacent_ind]
  515. temp_theta_sum = np.sum(self.vector_angle(edge_vec1, adjacent_edge_vec))
  516. temp_adjacent_theta = self.vector_angle(
  517. adjacent_edge_vec[0], adjacent_edge_vec[1]
  518. )
  519. theta_sum.append(temp_theta_sum)
  520. adjacent_vec_theta.append(temp_adjacent_theta)
  521. theta_sum_score = np.array(theta_sum) / np.pi
  522. adjacent_theta_score = np.array(adjacent_vec_theta) / np.pi
  523. poly_center = np.mean(points, axis=0)
  524. edge_dist = np.maximum(
  525. norm(pad_points[1:] - poly_center, axis=-1),
  526. norm(pad_points[:-1] - poly_center, axis=-1),
  527. )
  528. dist_score = edge_dist / np.max(edge_dist)
  529. position_score = np.zeros(len(edge_vec))
  530. score = 0.5 * theta_sum_score + 0.15 * adjacent_theta_score
  531. score += 0.35 * dist_score
  532. if len(points) % 2 == 0:
  533. position_score[(len(score) // 2 - 1)] += 1
  534. position_score[-1] += 1
  535. score += 0.1 * position_score
  536. pad_score = np.concatenate([score, score])
  537. score_matrix = np.zeros((len(score), len(score) - 3))
  538. x = np.arange(len(score) - 3) / float(len(score) - 4)
  539. gaussian = (
  540. 1.0
  541. / (np.sqrt(2.0 * np.pi) * 0.5)
  542. * np.exp(-np.power((x - 0.5) / 0.5, 2.0) / 2)
  543. )
  544. gaussian = gaussian / np.max(gaussian)
  545. for i in range(len(score)):
  546. score_matrix[i, :] = (
  547. score[i]
  548. + pad_score[(i + 2) : (i + len(score) - 1)] * gaussian * 0.3
  549. )
  550. head_start, tail_increment = np.unravel_index(
  551. score_matrix.argmax(), score_matrix.shape
  552. )
  553. tail_start = (head_start + tail_increment + 2) % len(points)
  554. head_end = (head_start + 1) % len(points)
  555. tail_end = (tail_start + 1) % len(points)
  556. if head_end > tail_end:
  557. head_start, tail_start = tail_start, head_start
  558. head_end, tail_end = tail_end, head_end
  559. head_inds = [head_start, head_end]
  560. tail_inds = [tail_start, tail_end]
  561. else:
  562. if self.vector_slope(points[1] - points[0]) + self.vector_slope(
  563. points[3] - points[2]
  564. ) < self.vector_slope(points[2] - points[1]) + self.vector_slope(
  565. points[0] - points[3]
  566. ):
  567. horizontal_edge_inds = [[0, 1], [2, 3]]
  568. vertical_edge_inds = [[3, 0], [1, 2]]
  569. else:
  570. horizontal_edge_inds = [[3, 0], [1, 2]]
  571. vertical_edge_inds = [[0, 1], [2, 3]]
  572. vertical_len_sum = norm(
  573. points[vertical_edge_inds[0][0]] - points[vertical_edge_inds[0][1]]
  574. ) + norm(
  575. points[vertical_edge_inds[1][0]] - points[vertical_edge_inds[1][1]]
  576. )
  577. horizontal_len_sum = norm(
  578. points[horizontal_edge_inds[0][0]] - points[horizontal_edge_inds[0][1]]
  579. ) + norm(
  580. points[horizontal_edge_inds[1][0]] - points[horizontal_edge_inds[1][1]]
  581. )
  582. if vertical_len_sum > horizontal_len_sum * orientation_thr:
  583. head_inds = horizontal_edge_inds[0]
  584. tail_inds = horizontal_edge_inds[1]
  585. else:
  586. head_inds = vertical_edge_inds[0]
  587. tail_inds = vertical_edge_inds[1]
  588. return head_inds, tail_inds
  589. def vector_angle(self, vec1, vec2):
  590. if vec1.ndim > 1:
  591. unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8).reshape((-1, 1))
  592. else:
  593. unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8)
  594. if vec2.ndim > 1:
  595. unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8).reshape((-1, 1))
  596. else:
  597. unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8)
  598. return np.arccos(np.clip(np.sum(unit_vec1 * unit_vec2, axis=-1), -1.0, 1.0))
  599. def get_minarea_rect(self, img, points):
  600. bounding_box = cv2.minAreaRect(points)
  601. points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
  602. index_a, index_b, index_c, index_d = 0, 1, 2, 3
  603. if points[1][1] > points[0][1]:
  604. index_a = 0
  605. index_d = 1
  606. else:
  607. index_a = 1
  608. index_d = 0
  609. if points[3][1] > points[2][1]:
  610. index_b = 2
  611. index_c = 3
  612. else:
  613. index_b = 3
  614. index_c = 2
  615. box = [points[index_a], points[index_b], points[index_c], points[index_d]]
  616. crop_img = self.get_rotate_crop_image(img, np.array(box))
  617. return crop_img, box
  618. def sample_points_on_bbox_bp(self, line, n=50):
  619. """Resample n points on a line.
  620. Args:
  621. line (ndarray): The points composing a line.
  622. n (int): The resampled points number.
  623. Returns:
  624. resampled_line (ndarray): The points composing the resampled line.
  625. """
  626. from numpy.linalg import norm
  627. # 断言检查输入参数的有效性
  628. assert line.ndim == 2
  629. assert line.shape[0] >= 2
  630. assert line.shape[1] == 2
  631. assert isinstance(n, int)
  632. assert n > 0
  633. length_list = [norm(line[i + 1] - line[i]) for i in range(len(line) - 1)]
  634. total_length = sum(length_list)
  635. length_cumsum = np.cumsum([0.0] + length_list)
  636. delta_length = total_length / (float(n) + 1e-8)
  637. current_edge_ind = 0
  638. resampled_line = [line[0]]
  639. for i in range(1, n):
  640. current_line_len = i * delta_length
  641. while (
  642. current_edge_ind + 1 < len(length_cumsum)
  643. and current_line_len >= length_cumsum[current_edge_ind + 1]
  644. ):
  645. current_edge_ind += 1
  646. current_edge_end_shift = current_line_len - length_cumsum[current_edge_ind]
  647. if current_edge_ind >= len(length_list):
  648. break
  649. end_shift_ratio = current_edge_end_shift / length_list[current_edge_ind]
  650. current_point = (
  651. line[current_edge_ind]
  652. + (line[current_edge_ind + 1] - line[current_edge_ind])
  653. * end_shift_ratio
  654. )
  655. resampled_line.append(current_point)
  656. resampled_line.append(line[-1])
  657. resampled_line = np.array(resampled_line)
  658. return resampled_line
  659. def sample_points_on_bbox(self, line, n=50):
  660. """Resample n points on a line.
  661. Args:
  662. line (ndarray): The points composing a line.
  663. n (int): The resampled points number.
  664. Returns:
  665. resampled_line (ndarray): The points composing the resampled line.
  666. """
  667. assert line.ndim == 2
  668. assert line.shape[0] >= 2
  669. assert line.shape[1] == 2
  670. assert isinstance(n, int)
  671. assert n > 0
  672. length_list = [norm(line[i + 1] - line[i]) for i in range(len(line) - 1)]
  673. total_length = sum(length_list)
  674. mean_length = total_length / (len(length_list) + 1e-8)
  675. group = [[0]]
  676. for i in range(len(length_list)):
  677. point_id = i + 1
  678. if length_list[i] < 0.9 * mean_length:
  679. for g in group:
  680. if i in g:
  681. g.append(point_id)
  682. break
  683. else:
  684. g = [point_id]
  685. group.append(g)
  686. top_tail_len = norm(line[0] - line[-1])
  687. if top_tail_len < 0.9 * mean_length:
  688. group[0].extend(g)
  689. group.remove(g)
  690. mean_positions = []
  691. for indices in group:
  692. x_sum = 0
  693. y_sum = 0
  694. for index in indices:
  695. x, y = line[index]
  696. x_sum += x
  697. y_sum += y
  698. num_points = len(indices)
  699. mean_x = x_sum / num_points
  700. mean_y = y_sum / num_points
  701. mean_positions.append((mean_x, mean_y))
  702. resampled_line = np.array(mean_positions)
  703. return resampled_line
  704. def get_poly_rect_crop(self, img, points):
  705. """
  706. 修改该函数,实现使用polygon,对不规则、弯曲文本的矫正以及crop
  707. args: img: 图片 ndarrary格式
  708. points: polygon格式的多点坐标 N*2 shape, ndarray格式
  709. return: 矫正后的图片 ndarray格式
  710. """
  711. points = np.array(points).astype(np.int32).reshape(-1, 2)
  712. temp_crop_img, temp_box = self.get_minarea_rect(img, points)
  713. # 计算最小外接矩形与polygon的IoU
  714. def get_union(pD, pG):
  715. return Polygon(pD).union(Polygon(pG)).area
  716. def get_intersection_over_union(pD, pG):
  717. return get_intersection(pD, pG) / (get_union(pD, pG) + 1e-10)
  718. def get_intersection(pD, pG):
  719. return Polygon(pD).intersection(Polygon(pG)).area
  720. cal_IoU = get_intersection_over_union(points, temp_box)
  721. if cal_IoU >= 0.7:
  722. points = self.sample_points_on_bbox_bp(points, 31)
  723. return temp_crop_img
  724. points_sample = self.sample_points_on_bbox(points)
  725. points_sample = points_sample.astype(np.int32)
  726. head_edge, tail_edge, top_line, bot_line = self.reorder_poly_edge(points_sample)
  727. resample_top_line = self.sample_points_on_bbox_bp(top_line, 15)
  728. resample_bot_line = self.sample_points_on_bbox_bp(bot_line, 15)
  729. sideline_mean_shift = np.mean(resample_top_line, axis=0) - np.mean(
  730. resample_bot_line, axis=0
  731. )
  732. if sideline_mean_shift[1] > 0:
  733. resample_bot_line, resample_top_line = resample_top_line, resample_bot_line
  734. rectifier = AutoRectifier()
  735. new_points = np.concatenate([resample_top_line, resample_bot_line])
  736. new_points_list = list(new_points.astype(np.float32).reshape(1, -1).tolist())
  737. if len(img.shape) == 2:
  738. img = np.stack((img,) * 3, axis=-1)
  739. img_crop, image = rectifier.run(img, new_points_list, mode="homography")
  740. return np.array(img_crop[0], dtype=np.uint8)
  741. class SortBoxes(BaseComponent):
  742. YIELD_BATCH = False
  743. INPUT_KEYS = ["dt_polys"]
  744. OUTPUT_KEYS = ["dt_polys"]
  745. DEAULT_INPUTS = {"dt_polys": "dt_polys"}
  746. DEAULT_OUTPUTS = {"dt_polys": "dt_polys"}
  747. def apply(self, dt_polys):
  748. """
  749. Sort text boxes in order from top to bottom, left to right
  750. args:
  751. dt_boxes(array):detected text boxes with shape [4, 2]
  752. return:
  753. sorted boxes(array) with shape [4, 2]
  754. """
  755. dt_boxes = np.array(dt_polys)
  756. num_boxes = dt_boxes.shape[0]
  757. sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
  758. _boxes = list(sorted_boxes)
  759. for i in range(num_boxes - 1):
  760. for j in range(i, -1, -1):
  761. if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and (
  762. _boxes[j + 1][0][0] < _boxes[j][0][0]
  763. ):
  764. tmp = _boxes[j]
  765. _boxes[j] = _boxes[j + 1]
  766. _boxes[j + 1] = tmp
  767. else:
  768. break
  769. return {"dt_polys": _boxes}