processors.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import List, Tuple, Union
  15. import os
  16. import sys
  17. import cv2
  18. import copy
  19. import math
  20. import pyclipper
  21. import numpy as np
  22. from numpy.linalg import norm
  23. from PIL import Image
  24. from shapely.geometry import Polygon
  25. from ...utils.io import ImageReader
  26. from ....utils import logging
  27. from ...utils.benchmark import benchmark
  28. class DetResizeForTest:
  29. """DetResizeForTest"""
  30. def __init__(self, **kwargs):
  31. super().__init__()
  32. self.resize_type = 0
  33. self.keep_ratio = False
  34. if "image_shape" in kwargs:
  35. self.image_shape = kwargs["image_shape"]
  36. self.resize_type = 1
  37. if "keep_ratio" in kwargs:
  38. self.keep_ratio = kwargs["keep_ratio"]
  39. elif "limit_side_len" in kwargs:
  40. self.limit_side_len = kwargs["limit_side_len"]
  41. self.limit_type = kwargs.get("limit_type", "min")
  42. elif "resize_long" in kwargs:
  43. self.resize_type = 2
  44. self.resize_long = kwargs.get("resize_long", 960)
  45. else:
  46. self.limit_side_len = 736
  47. self.limit_type = "min"
  48. @benchmark.timeit
  49. def __call__(
  50. self,
  51. imgs,
  52. limit_side_len: Union[int, None] = None,
  53. limit_type: Union[str, None] = None,
  54. ):
  55. """apply"""
  56. resize_imgs, img_shapes = [], []
  57. for ori_img in imgs:
  58. img, shape = self.resize(ori_img, limit_side_len, limit_type)
  59. resize_imgs.append(img)
  60. img_shapes.append(shape)
  61. return resize_imgs, img_shapes
  62. def resize(
  63. self, img, limit_side_len: Union[int, None], limit_type: Union[str, None]
  64. ):
  65. src_h, src_w, _ = img.shape
  66. if sum([src_h, src_w]) < 64:
  67. img = self.image_padding(img)
  68. if self.resize_type == 0:
  69. # img, shape = self.resize_image_type0(img)
  70. img, [ratio_h, ratio_w] = self.resize_image_type0(
  71. img, limit_side_len, limit_type
  72. )
  73. elif self.resize_type == 2:
  74. img, [ratio_h, ratio_w] = self.resize_image_type2(img)
  75. else:
  76. # img, shape = self.resize_image_type1(img)
  77. img, [ratio_h, ratio_w] = self.resize_image_type1(img)
  78. return img, np.array([src_h, src_w, ratio_h, ratio_w])
  79. def image_padding(self, im, value=0):
  80. """padding image"""
  81. h, w, c = im.shape
  82. im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
  83. im_pad[:h, :w, :] = im
  84. return im_pad
  85. def resize_image_type1(self, img):
  86. """resize the image"""
  87. resize_h, resize_w = self.image_shape
  88. ori_h, ori_w = img.shape[:2] # (h, w, c)
  89. if self.keep_ratio is True:
  90. resize_w = ori_w * resize_h / ori_h
  91. N = math.ceil(resize_w / 32)
  92. resize_w = N * 32
  93. ratio_h = float(resize_h) / ori_h
  94. ratio_w = float(resize_w) / ori_w
  95. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  96. # return img, np.array([ori_h, ori_w])
  97. return img, [ratio_h, ratio_w]
  98. def resize_image_type0(
  99. self, img, limit_side_len: Union[int, None], limit_type: Union[str, None]
  100. ):
  101. """
  102. resize image to a size multiple of 32 which is required by the network
  103. args:
  104. img(array): array with shape [h, w, c]
  105. return(tuple):
  106. img, (ratio_h, ratio_w)
  107. """
  108. limit_side_len = limit_side_len or self.limit_side_len
  109. limit_type = limit_type or self.limit_type
  110. h, w, c = img.shape
  111. # limit the max side
  112. if limit_type == "max":
  113. if max(h, w) > limit_side_len:
  114. if h > w:
  115. ratio = float(limit_side_len) / h
  116. else:
  117. ratio = float(limit_side_len) / w
  118. else:
  119. ratio = 1.0
  120. elif limit_type == "min":
  121. if min(h, w) < limit_side_len:
  122. if h < w:
  123. ratio = float(limit_side_len) / h
  124. else:
  125. ratio = float(limit_side_len) / w
  126. else:
  127. ratio = 1.0
  128. elif limit_type == "resize_long":
  129. ratio = float(limit_side_len) / max(h, w)
  130. else:
  131. raise Exception("not support limit type, image ")
  132. resize_h = int(h * ratio)
  133. resize_w = int(w * ratio)
  134. resize_h = max(int(round(resize_h / 32) * 32), 32)
  135. resize_w = max(int(round(resize_w / 32) * 32), 32)
  136. try:
  137. if int(resize_w) <= 0 or int(resize_h) <= 0:
  138. return None, (None, None)
  139. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  140. except:
  141. logging.info(img.shape, resize_w, resize_h)
  142. sys.exit(0)
  143. ratio_h = resize_h / float(h)
  144. ratio_w = resize_w / float(w)
  145. return img, [ratio_h, ratio_w]
  146. def resize_image_type2(self, img):
  147. """resize image size"""
  148. h, w, _ = img.shape
  149. resize_w = w
  150. resize_h = h
  151. if resize_h > resize_w:
  152. ratio = float(self.resize_long) / resize_h
  153. else:
  154. ratio = float(self.resize_long) / resize_w
  155. resize_h = int(resize_h * ratio)
  156. resize_w = int(resize_w * ratio)
  157. max_stride = 128
  158. resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
  159. resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
  160. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  161. ratio_h = resize_h / float(h)
  162. ratio_w = resize_w / float(w)
  163. return img, [ratio_h, ratio_w]
  164. class NormalizeImage:
  165. """normalize image such as substract mean, divide std"""
  166. def __init__(self, scale=None, mean=None, std=None, order="chw", **kwargs):
  167. super().__init__()
  168. if isinstance(scale, str):
  169. scale = eval(scale)
  170. self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
  171. mean = mean if mean is not None else [0.485, 0.456, 0.406]
  172. std = std if std is not None else [0.229, 0.224, 0.225]
  173. shape = (3, 1, 1) if order == "chw" else (1, 1, 3)
  174. self.mean = np.array(mean).reshape(shape).astype("float32")
  175. self.std = np.array(std).reshape(shape).astype("float32")
  176. @benchmark.timeit
  177. def __call__(self, imgs):
  178. """apply"""
  179. def norm(img):
  180. return (img.astype("float32") * self.scale - self.mean) / self.std
  181. return [norm(img) for img in imgs]
  182. class DBPostProcess:
  183. """
  184. The post process for Differentiable Binarization (DB).
  185. """
  186. def __init__(
  187. self,
  188. thresh=0.3,
  189. box_thresh=0.7,
  190. max_candidates=1000,
  191. unclip_ratio=2.0,
  192. use_dilation=False,
  193. score_mode="fast",
  194. box_type="quad",
  195. **kwargs
  196. ):
  197. super().__init__()
  198. self.thresh = thresh
  199. self.box_thresh = box_thresh
  200. self.max_candidates = max_candidates
  201. self.unclip_ratio = unclip_ratio
  202. self.min_size = 3
  203. self.score_mode = score_mode
  204. self.box_type = box_type
  205. assert score_mode in [
  206. "slow",
  207. "fast",
  208. ], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
  209. self.use_dilation = use_dilation
  210. def polygons_from_bitmap(
  211. self,
  212. pred,
  213. _bitmap,
  214. dest_width,
  215. dest_height,
  216. box_thresh,
  217. unclip_ratio,
  218. ):
  219. """_bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1}"""
  220. bitmap = _bitmap
  221. height, width = bitmap.shape
  222. boxes = []
  223. scores = []
  224. contours, _ = cv2.findContours(
  225. (bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
  226. )
  227. for contour in contours[: self.max_candidates]:
  228. epsilon = 0.002 * cv2.arcLength(contour, True)
  229. approx = cv2.approxPolyDP(contour, epsilon, True)
  230. points = approx.reshape((-1, 2))
  231. if points.shape[0] < 4:
  232. continue
  233. score = self.box_score_fast(pred, points.reshape(-1, 2))
  234. if box_thresh > score:
  235. continue
  236. if points.shape[0] > 2:
  237. box = self.unclip(points, unclip_ratio)
  238. if len(box) > 1:
  239. continue
  240. else:
  241. continue
  242. box = box.reshape(-1, 2)
  243. if len(box) > 0:
  244. _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
  245. if sside < self.min_size + 2:
  246. continue
  247. else:
  248. continue
  249. box = np.array(box)
  250. box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
  251. box[:, 1] = np.clip(
  252. np.round(box[:, 1] / height * dest_height), 0, dest_height
  253. )
  254. boxes.append(box)
  255. scores.append(score)
  256. return boxes, scores
  257. def boxes_from_bitmap(
  258. self,
  259. pred,
  260. _bitmap,
  261. dest_width,
  262. dest_height,
  263. box_thresh,
  264. unclip_ratio,
  265. ):
  266. """_bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1}"""
  267. bitmap = _bitmap
  268. height, width = bitmap.shape
  269. outs = cv2.findContours(
  270. (bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
  271. )
  272. if len(outs) == 3:
  273. img, contours, _ = outs[0], outs[1], outs[2]
  274. elif len(outs) == 2:
  275. contours, _ = outs[0], outs[1]
  276. num_contours = min(len(contours), self.max_candidates)
  277. boxes = []
  278. scores = []
  279. for index in range(num_contours):
  280. contour = contours[index]
  281. points, sside = self.get_mini_boxes(contour)
  282. if sside < self.min_size:
  283. continue
  284. points = np.array(points)
  285. if self.score_mode == "fast":
  286. score = self.box_score_fast(pred, points.reshape(-1, 2))
  287. else:
  288. score = self.box_score_slow(pred, contour)
  289. if box_thresh > score:
  290. continue
  291. box = self.unclip(points, unclip_ratio).reshape(-1, 1, 2)
  292. box, sside = self.get_mini_boxes(box)
  293. if sside < self.min_size + 2:
  294. continue
  295. box = np.array(box)
  296. box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
  297. box[:, 1] = np.clip(
  298. np.round(box[:, 1] / height * dest_height), 0, dest_height
  299. )
  300. boxes.append(box.astype(np.int16))
  301. scores.append(score)
  302. return np.array(boxes, dtype=np.int16), scores
  303. def unclip(self, box, unclip_ratio):
  304. """unclip"""
  305. poly = Polygon(box)
  306. distance = poly.area * unclip_ratio / poly.length
  307. offset = pyclipper.PyclipperOffset()
  308. offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
  309. try:
  310. expanded = np.array(offset.Execute(distance))
  311. except ValueError:
  312. expanded = np.array(offset.Execute(distance)[0])
  313. return expanded
  314. def get_mini_boxes(self, contour):
  315. """get mini boxes"""
  316. bounding_box = cv2.minAreaRect(contour)
  317. points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
  318. index_1, index_2, index_3, index_4 = 0, 1, 2, 3
  319. if points[1][1] > points[0][1]:
  320. index_1 = 0
  321. index_4 = 1
  322. else:
  323. index_1 = 1
  324. index_4 = 0
  325. if points[3][1] > points[2][1]:
  326. index_2 = 2
  327. index_3 = 3
  328. else:
  329. index_2 = 3
  330. index_3 = 2
  331. box = [points[index_1], points[index_2], points[index_3], points[index_4]]
  332. return box, min(bounding_box[1])
  333. def box_score_fast(self, bitmap, _box):
  334. """box_score_fast: use bbox mean score as the mean score"""
  335. h, w = bitmap.shape[:2]
  336. box = _box.copy()
  337. xmin = np.clip(np.floor(box[:, 0].min()).astype("int"), 0, w - 1)
  338. xmax = np.clip(np.ceil(box[:, 0].max()).astype("int"), 0, w - 1)
  339. ymin = np.clip(np.floor(box[:, 1].min()).astype("int"), 0, h - 1)
  340. ymax = np.clip(np.ceil(box[:, 1].max()).astype("int"), 0, h - 1)
  341. mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
  342. box[:, 0] = box[:, 0] - xmin
  343. box[:, 1] = box[:, 1] - ymin
  344. cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
  345. return cv2.mean(bitmap[ymin : ymax + 1, xmin : xmax + 1], mask)[0]
  346. def box_score_slow(self, bitmap, contour):
  347. """box_score_slow: use polyon mean score as the mean score"""
  348. h, w = bitmap.shape[:2]
  349. contour = contour.copy()
  350. contour = np.reshape(contour, (-1, 2))
  351. xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
  352. xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
  353. ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
  354. ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
  355. mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
  356. contour[:, 0] = contour[:, 0] - xmin
  357. contour[:, 1] = contour[:, 1] - ymin
  358. cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
  359. return cv2.mean(bitmap[ymin : ymax + 1, xmin : xmax + 1], mask)[0]
  360. @benchmark.timeit
  361. def __call__(
  362. self,
  363. preds,
  364. img_shapes,
  365. thresh: Union[float, None] = None,
  366. box_thresh: Union[float, None] = None,
  367. unclip_ratio: Union[float, None] = None,
  368. ):
  369. """apply"""
  370. boxes, scores = [], []
  371. for pred, img_shape in zip(preds[0], img_shapes):
  372. box, score = self.process(
  373. pred,
  374. img_shape,
  375. thresh or self.thresh,
  376. box_thresh or self.box_thresh,
  377. unclip_ratio or self.unclip_ratio,
  378. )
  379. boxes.append(box)
  380. scores.append(score)
  381. return boxes, scores
  382. def process(
  383. self,
  384. pred,
  385. img_shape,
  386. thresh,
  387. box_thresh,
  388. unclip_ratio,
  389. ):
  390. pred = pred[0, :, :]
  391. segmentation = pred > thresh
  392. dilation_kernel = None if not self.use_dilation else np.array([[1, 1], [1, 1]])
  393. src_h, src_w, ratio_h, ratio_w = img_shape
  394. if dilation_kernel is not None:
  395. mask = cv2.dilate(
  396. np.array(segmentation).astype(np.uint8),
  397. dilation_kernel,
  398. )
  399. else:
  400. mask = segmentation
  401. if self.box_type == "poly":
  402. boxes, scores = self.polygons_from_bitmap(
  403. pred, mask, src_w, src_h, box_thresh, unclip_ratio
  404. )
  405. elif self.box_type == "quad":
  406. boxes, scores = self.boxes_from_bitmap(
  407. pred, mask, src_w, src_h, box_thresh, unclip_ratio
  408. )
  409. else:
  410. raise ValueError("box_type can only be one of ['quad', 'poly']")
  411. return boxes, scores