processors.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import List, Tuple, Union
  15. import os
  16. import sys
  17. import cv2
  18. import copy
  19. import math
  20. import pyclipper
  21. import numpy as np
  22. from numpy.linalg import norm
  23. from PIL import Image
  24. from shapely.geometry import Polygon
  25. from ...utils.io import ImageReader
  26. from ....utils import logging
  27. from ...utils.benchmark import benchmark
  28. @benchmark.timeit
  29. class DetResizeForTest:
  30. """DetResizeForTest"""
  31. def __init__(self, **kwargs):
  32. super().__init__()
  33. self.resize_type = 0
  34. self.keep_ratio = False
  35. if "image_shape" in kwargs:
  36. self.image_shape = kwargs["image_shape"]
  37. self.resize_type = 1
  38. if "keep_ratio" in kwargs:
  39. self.keep_ratio = kwargs["keep_ratio"]
  40. elif "limit_side_len" in kwargs:
  41. self.limit_side_len = kwargs["limit_side_len"]
  42. self.limit_type = kwargs.get("limit_type", "min")
  43. elif "resize_long" in kwargs:
  44. self.resize_type = 2
  45. self.resize_long = kwargs.get("resize_long", 960)
  46. else:
  47. self.limit_side_len = 736
  48. self.limit_type = "min"
  49. def __call__(
  50. self,
  51. imgs,
  52. limit_side_len: Union[int, None] = None,
  53. limit_type: Union[str, None] = None,
  54. ):
  55. """apply"""
  56. resize_imgs, img_shapes = [], []
  57. for ori_img in imgs:
  58. img, shape = self.resize(ori_img, limit_side_len, limit_type)
  59. resize_imgs.append(img)
  60. img_shapes.append(shape)
  61. return resize_imgs, img_shapes
  62. def resize(
  63. self, img, limit_side_len: Union[int, None], limit_type: Union[str, None]
  64. ):
  65. src_h, src_w, _ = img.shape
  66. if sum([src_h, src_w]) < 64:
  67. img = self.image_padding(img)
  68. if self.resize_type == 0:
  69. # img, shape = self.resize_image_type0(img)
  70. img, [ratio_h, ratio_w] = self.resize_image_type0(
  71. img, limit_side_len, limit_type
  72. )
  73. elif self.resize_type == 2:
  74. img, [ratio_h, ratio_w] = self.resize_image_type2(img)
  75. else:
  76. # img, shape = self.resize_image_type1(img)
  77. img, [ratio_h, ratio_w] = self.resize_image_type1(img)
  78. return img, np.array([src_h, src_w, ratio_h, ratio_w])
  79. def image_padding(self, im, value=0):
  80. """padding image"""
  81. h, w, c = im.shape
  82. im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
  83. im_pad[:h, :w, :] = im
  84. return im_pad
  85. def resize_image_type1(self, img):
  86. """resize the image"""
  87. resize_h, resize_w = self.image_shape
  88. ori_h, ori_w = img.shape[:2] # (h, w, c)
  89. if self.keep_ratio is True:
  90. resize_w = ori_w * resize_h / ori_h
  91. N = math.ceil(resize_w / 32)
  92. resize_w = N * 32
  93. ratio_h = float(resize_h) / ori_h
  94. ratio_w = float(resize_w) / ori_w
  95. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  96. # return img, np.array([ori_h, ori_w])
  97. return img, [ratio_h, ratio_w]
  98. def resize_image_type0(
  99. self, img, limit_side_len: Union[int, None], limit_type: Union[str, None]
  100. ):
  101. """
  102. resize image to a size multiple of 32 which is required by the network
  103. args:
  104. img(array): array with shape [h, w, c]
  105. return(tuple):
  106. img, (ratio_h, ratio_w)
  107. """
  108. limit_side_len = limit_side_len or self.limit_side_len
  109. limit_type = limit_type or self.limit_type
  110. h, w, c = img.shape
  111. # limit the max side
  112. if limit_type == "max":
  113. if max(h, w) > limit_side_len:
  114. if h > w:
  115. ratio = float(limit_side_len) / h
  116. else:
  117. ratio = float(limit_side_len) / w
  118. else:
  119. ratio = 1.0
  120. elif limit_type == "min":
  121. if min(h, w) < limit_side_len:
  122. if h < w:
  123. ratio = float(limit_side_len) / h
  124. else:
  125. ratio = float(limit_side_len) / w
  126. else:
  127. ratio = 1.0
  128. elif limit_type == "resize_long":
  129. ratio = float(limit_side_len) / max(h, w)
  130. else:
  131. raise Exception("not support limit type, image ")
  132. resize_h = int(h * ratio)
  133. resize_w = int(w * ratio)
  134. resize_h = max(int(round(resize_h / 32) * 32), 32)
  135. resize_w = max(int(round(resize_w / 32) * 32), 32)
  136. try:
  137. if int(resize_w) <= 0 or int(resize_h) <= 0:
  138. return None, (None, None)
  139. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  140. except:
  141. logging.info(img.shape, resize_w, resize_h)
  142. sys.exit(0)
  143. ratio_h = resize_h / float(h)
  144. ratio_w = resize_w / float(w)
  145. return img, [ratio_h, ratio_w]
  146. def resize_image_type2(self, img):
  147. """resize image size"""
  148. h, w, _ = img.shape
  149. resize_w = w
  150. resize_h = h
  151. if resize_h > resize_w:
  152. ratio = float(self.resize_long) / resize_h
  153. else:
  154. ratio = float(self.resize_long) / resize_w
  155. resize_h = int(resize_h * ratio)
  156. resize_w = int(resize_w * ratio)
  157. max_stride = 128
  158. resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
  159. resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
  160. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  161. ratio_h = resize_h / float(h)
  162. ratio_w = resize_w / float(w)
  163. return img, [ratio_h, ratio_w]
  164. @benchmark.timeit
  165. class NormalizeImage:
  166. """normalize image such as substract mean, divide std"""
  167. def __init__(self, scale=None, mean=None, std=None, order="chw", **kwargs):
  168. super().__init__()
  169. if isinstance(scale, str):
  170. scale = eval(scale)
  171. self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
  172. mean = mean if mean is not None else [0.485, 0.456, 0.406]
  173. std = std if std is not None else [0.229, 0.224, 0.225]
  174. shape = (3, 1, 1) if order == "chw" else (1, 1, 3)
  175. self.mean = np.array(mean).reshape(shape).astype("float32")
  176. self.std = np.array(std).reshape(shape).astype("float32")
  177. def __call__(self, imgs):
  178. """apply"""
  179. def norm(img):
  180. return (img.astype("float32") * self.scale - self.mean) / self.std
  181. return [norm(img) for img in imgs]
  182. @benchmark.timeit
  183. class DBPostProcess:
  184. """
  185. The post process for Differentiable Binarization (DB).
  186. """
  187. def __init__(
  188. self,
  189. thresh=0.3,
  190. box_thresh=0.7,
  191. max_candidates=1000,
  192. unclip_ratio=2.0,
  193. use_dilation=False,
  194. score_mode="fast",
  195. box_type="quad",
  196. **kwargs
  197. ):
  198. super().__init__()
  199. self.thresh = thresh
  200. self.box_thresh = box_thresh
  201. self.max_candidates = max_candidates
  202. self.unclip_ratio = unclip_ratio
  203. self.min_size = 3
  204. self.score_mode = score_mode
  205. self.box_type = box_type
  206. assert score_mode in [
  207. "slow",
  208. "fast",
  209. ], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
  210. self.use_dilation = use_dilation
  211. def polygons_from_bitmap(
  212. self,
  213. pred,
  214. _bitmap,
  215. dest_width,
  216. dest_height,
  217. box_thresh,
  218. unclip_ratio,
  219. ):
  220. """_bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1}"""
  221. bitmap = _bitmap
  222. height, width = bitmap.shape
  223. boxes = []
  224. scores = []
  225. contours, _ = cv2.findContours(
  226. (bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
  227. )
  228. for contour in contours[: self.max_candidates]:
  229. epsilon = 0.002 * cv2.arcLength(contour, True)
  230. approx = cv2.approxPolyDP(contour, epsilon, True)
  231. points = approx.reshape((-1, 2))
  232. if points.shape[0] < 4:
  233. continue
  234. score = self.box_score_fast(pred, points.reshape(-1, 2))
  235. if box_thresh > score:
  236. continue
  237. if points.shape[0] > 2:
  238. box = self.unclip(points, unclip_ratio)
  239. if len(box) > 1:
  240. continue
  241. else:
  242. continue
  243. box = box.reshape(-1, 2)
  244. if len(box) > 0:
  245. _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
  246. if sside < self.min_size + 2:
  247. continue
  248. else:
  249. continue
  250. box = np.array(box)
  251. box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
  252. box[:, 1] = np.clip(
  253. np.round(box[:, 1] / height * dest_height), 0, dest_height
  254. )
  255. boxes.append(box)
  256. scores.append(score)
  257. return boxes, scores
  258. def boxes_from_bitmap(
  259. self,
  260. pred,
  261. _bitmap,
  262. dest_width,
  263. dest_height,
  264. box_thresh,
  265. unclip_ratio,
  266. ):
  267. """_bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1}"""
  268. bitmap = _bitmap
  269. height, width = bitmap.shape
  270. outs = cv2.findContours(
  271. (bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
  272. )
  273. if len(outs) == 3:
  274. img, contours, _ = outs[0], outs[1], outs[2]
  275. elif len(outs) == 2:
  276. contours, _ = outs[0], outs[1]
  277. num_contours = min(len(contours), self.max_candidates)
  278. boxes = []
  279. scores = []
  280. for index in range(num_contours):
  281. contour = contours[index]
  282. points, sside = self.get_mini_boxes(contour)
  283. if sside < self.min_size:
  284. continue
  285. points = np.array(points)
  286. if self.score_mode == "fast":
  287. score = self.box_score_fast(pred, points.reshape(-1, 2))
  288. else:
  289. score = self.box_score_slow(pred, contour)
  290. if box_thresh > score:
  291. continue
  292. box = self.unclip(points, unclip_ratio).reshape(-1, 1, 2)
  293. box, sside = self.get_mini_boxes(box)
  294. if sside < self.min_size + 2:
  295. continue
  296. box = np.array(box)
  297. box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
  298. box[:, 1] = np.clip(
  299. np.round(box[:, 1] / height * dest_height), 0, dest_height
  300. )
  301. boxes.append(box.astype(np.int16))
  302. scores.append(score)
  303. return np.array(boxes, dtype=np.int16), scores
  304. def unclip(self, box, unclip_ratio):
  305. """unclip"""
  306. poly = Polygon(box)
  307. distance = poly.area * unclip_ratio / poly.length
  308. offset = pyclipper.PyclipperOffset()
  309. offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
  310. try:
  311. expanded = np.array(offset.Execute(distance))
  312. except ValueError:
  313. expanded = np.array(offset.Execute(distance)[0])
  314. return expanded
  315. def get_mini_boxes(self, contour):
  316. """get mini boxes"""
  317. bounding_box = cv2.minAreaRect(contour)
  318. points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
  319. index_1, index_2, index_3, index_4 = 0, 1, 2, 3
  320. if points[1][1] > points[0][1]:
  321. index_1 = 0
  322. index_4 = 1
  323. else:
  324. index_1 = 1
  325. index_4 = 0
  326. if points[3][1] > points[2][1]:
  327. index_2 = 2
  328. index_3 = 3
  329. else:
  330. index_2 = 3
  331. index_3 = 2
  332. box = [points[index_1], points[index_2], points[index_3], points[index_4]]
  333. return box, min(bounding_box[1])
  334. def box_score_fast(self, bitmap, _box):
  335. """box_score_fast: use bbox mean score as the mean score"""
  336. h, w = bitmap.shape[:2]
  337. box = _box.copy()
  338. xmin = np.clip(np.floor(box[:, 0].min()).astype("int"), 0, w - 1)
  339. xmax = np.clip(np.ceil(box[:, 0].max()).astype("int"), 0, w - 1)
  340. ymin = np.clip(np.floor(box[:, 1].min()).astype("int"), 0, h - 1)
  341. ymax = np.clip(np.ceil(box[:, 1].max()).astype("int"), 0, h - 1)
  342. mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
  343. box[:, 0] = box[:, 0] - xmin
  344. box[:, 1] = box[:, 1] - ymin
  345. cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
  346. return cv2.mean(bitmap[ymin : ymax + 1, xmin : xmax + 1], mask)[0]
  347. def box_score_slow(self, bitmap, contour):
  348. """box_score_slow: use polygon mean score as the mean score"""
  349. h, w = bitmap.shape[:2]
  350. contour = contour.copy()
  351. contour = np.reshape(contour, (-1, 2))
  352. xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
  353. xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
  354. ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
  355. ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
  356. mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
  357. contour[:, 0] = contour[:, 0] - xmin
  358. contour[:, 1] = contour[:, 1] - ymin
  359. cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
  360. return cv2.mean(bitmap[ymin : ymax + 1, xmin : xmax + 1], mask)[0]
  361. def __call__(
  362. self,
  363. preds,
  364. img_shapes,
  365. thresh: Union[float, None] = None,
  366. box_thresh: Union[float, None] = None,
  367. unclip_ratio: Union[float, None] = None,
  368. ):
  369. """apply"""
  370. boxes, scores = [], []
  371. for pred, img_shape in zip(preds[0], img_shapes):
  372. box, score = self.process(
  373. pred,
  374. img_shape,
  375. thresh or self.thresh,
  376. box_thresh or self.box_thresh,
  377. unclip_ratio or self.unclip_ratio,
  378. )
  379. boxes.append(box)
  380. scores.append(score)
  381. return boxes, scores
  382. def process(
  383. self,
  384. pred,
  385. img_shape,
  386. thresh,
  387. box_thresh,
  388. unclip_ratio,
  389. ):
  390. pred = pred[0, :, :]
  391. segmentation = pred > thresh
  392. dilation_kernel = None if not self.use_dilation else np.array([[1, 1], [1, 1]])
  393. src_h, src_w, ratio_h, ratio_w = img_shape
  394. if dilation_kernel is not None:
  395. mask = cv2.dilate(
  396. np.array(segmentation).astype(np.uint8),
  397. dilation_kernel,
  398. )
  399. else:
  400. mask = segmentation
  401. if self.box_type == "poly":
  402. boxes, scores = self.polygons_from_bitmap(
  403. pred, mask, src_w, src_h, box_thresh, unclip_ratio
  404. )
  405. elif self.box_type == "quad":
  406. boxes, scores = self.boxes_from_bitmap(
  407. pred, mask, src_w, src_h, box_thresh, unclip_ratio
  408. )
  409. else:
  410. raise ValueError("box_type can only be one of ['quad', 'poly']")
  411. return boxes, scores