operators.py 52 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435
  1. # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import numpy as np
  15. import cv2
  16. import copy
  17. import random
  18. from PIL import Image
  19. import paddlex
  20. try:
  21. from collections.abc import Sequence
  22. except Exception:
  23. from collections import Sequence
  24. from numbers import Number
  25. from .functions import normalize, horizontal_flip, permute, vertical_flip, center_crop, is_poly, \
  26. horizontal_flip_poly, horizontal_flip_rle, vertical_flip_poly, vertical_flip_rle, crop_poly, \
  27. crop_rle, expand_poly, expand_rle, resize_poly, resize_rle
  28. __all__ = [
  29. "Compose", "Decode", "Resize", "RandomResize", "ResizeByShort",
  30. "RandomResizeByShort", "RandomHorizontalFlip", "RandomVerticalFlip",
  31. "Normalize", "CenterCrop", "RandomCrop", "RandomScaleAspect",
  32. "RandomExpand", "Padding", "MixupImage", "RandomDistort", "RandomBlur",
  33. "ArrangeSegmenter", "ArrangeClassifier", "ArrangeDetector"
  34. ]
  35. interp_dict = {
  36. 'NEAREST': cv2.INTER_NEAREST,
  37. 'LINEAR': cv2.INTER_LINEAR,
  38. 'CUBIC': cv2.INTER_CUBIC,
  39. 'AREA': cv2.INTER_AREA,
  40. 'LANCZOS4': cv2.INTER_LANCZOS4
  41. }
  42. class Transform(object):
  43. """
  44. Parent class of all data augmentation operations
  45. """
  46. def __init__(self):
  47. pass
  48. def apply_im(self, image):
  49. pass
  50. def apply_mask(self, mask):
  51. pass
  52. def apply_bbox(self, bbox):
  53. pass
  54. def apply_segm(self, segms):
  55. pass
  56. def apply(self, sample):
  57. sample['image'] = self.apply_im(sample['image'])
  58. if 'mask' in sample:
  59. sample['mask'] = self.apply_mask(sample['mask'])
  60. if 'gt_bbox' in sample:
  61. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'])
  62. return sample
  63. def __call__(self, sample):
  64. if isinstance(sample, Sequence):
  65. sample = [self.apply(s) for s in sample]
  66. else:
  67. sample = self.apply(sample)
  68. return sample
  69. class Compose(Transform):
  70. """
  71. Apply a series of data augmentation to the input.
  72. All input images are in Height-Width-Channel ([H, W, C]) format.
  73. Args:
  74. transforms (List[paddlex.transforms.Transform]): List of data preprocess or augmentations.
  75. Raises:
  76. TypeError: Invalid type of transforms.
  77. ValueError: Invalid length of transforms.
  78. """
  79. def __init__(self, transforms):
  80. super(Compose, self).__init__()
  81. if not isinstance(transforms, list):
  82. raise TypeError(
  83. 'Type of transforms is invalid. Must be List, but received is {}'
  84. .format(type(transforms)))
  85. if len(transforms) < 1:
  86. raise ValueError(
  87. 'Length of transforms must not be less than 1, but received is {}'
  88. .format(len(transforms)))
  89. self.transforms = transforms
  90. self.decode_image = Decode()
  91. self.arrange_outputs = None
  92. self.apply_im_only = False
  93. def __call__(self, sample):
  94. if self.apply_im_only and 'mask' in sample:
  95. mask_backup = copy.deepcopy(sample['mask'])
  96. del sample['mask']
  97. sample = self.decode_image(sample)
  98. for op in self.transforms:
  99. # skip batch transforms amd mixup
  100. if isinstance(op, (paddlex.transforms.BatchRandomResize,
  101. paddlex.transforms.BatchRandomResizeByShort,
  102. MixupImage)):
  103. continue
  104. sample = op(sample)
  105. if self.arrange_outputs is not None:
  106. if self.apply_im_only:
  107. sample['mask'] = mask_backup
  108. sample = self.arrange_outputs(sample)
  109. return sample
  110. class Decode(Transform):
  111. """
  112. Decode image(s) in input.
  113. Args:
  114. to_rgb (bool, optional): If True, convert input images from BGR format to RGB format. Defaults to True.
  115. """
  116. def __init__(self, to_rgb=True):
  117. super(Decode, self).__init__()
  118. self.to_rgb = to_rgb
  119. def read_img(self, img_path):
  120. return cv2.imread(img_path, cv2.IMREAD_ANYDEPTH | cv2.IMREAD_ANYCOLOR |
  121. cv2.IMREAD_COLOR)
  122. def apply_im(self, im_path):
  123. if isinstance(im_path, str):
  124. try:
  125. image = self.read_img(im_path)
  126. except:
  127. raise ValueError('Cannot read the image file {}!'.format(
  128. im_path))
  129. else:
  130. image = im_path
  131. if self.to_rgb:
  132. image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  133. return image
  134. def apply_mask(self, mask):
  135. try:
  136. mask = np.asarray(Image.open(mask))
  137. except:
  138. raise ValueError("Cannot read the mask file {}!".format(mask))
  139. if len(mask.shape) != 2:
  140. raise Exception(
  141. "Mask should be a 1-channel image, but recevied is a {}-channel image.".
  142. format(mask.shape[2]))
  143. return mask
  144. def apply(self, sample):
  145. """
  146. Args:
  147. sample (dict): Input sample, containing 'image' at least.
  148. Returns:
  149. dict: Decoded sample.
  150. """
  151. sample['image'] = self.apply_im(sample['image'])
  152. if 'mask' in sample:
  153. sample['mask'] = self.apply_mask(sample['mask'])
  154. im_height, im_width, _ = sample['image'].shape
  155. se_height, se_width = sample['mask'].shape
  156. if im_height != se_height or im_width != se_width:
  157. raise Exception(
  158. "The height or width of the im is not same as the mask")
  159. sample['im_shape'] = np.array(
  160. sample['image'].shape[:2], dtype=np.float32)
  161. sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
  162. return sample
  163. class Resize(Transform):
  164. """
  165. Resize input.
  166. - If target_size is an int,resize the image(s) to (target_size, target_size).
  167. - If target_size is a list or tuple, resize the image(s) to target_size.
  168. Attention:If interp is 'RANDOM', the interpolation method will be chose randomly.
  169. Args:
  170. target_size (int, List[int] or Tuple[int]): Target size. If int, the height and width share the same target_size.
  171. Otherwise, target_size represents [target height, target width].
  172. interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional):
  173. Interpolation method of resize. Defaults to 'LINEAR'.
  174. keep_ratio (bool): the resize scale of width/height is same and width/height after resized is not greater
  175. than target width/height. Defaults to False.
  176. Raises:
  177. TypeError: Invalid type of target_size.
  178. ValueError: Invalid interpolation method.
  179. """
  180. def __init__(self, target_size, interp='LINEAR', keep_ratio=False):
  181. super(Resize, self).__init__()
  182. if not (interp == "RANDOM" or interp in interp_dict):
  183. raise ValueError("interp should be one of {}".format(
  184. interp_dict.keys()))
  185. if isinstance(target_size, int):
  186. target_size = (target_size, target_size)
  187. else:
  188. if not (isinstance(target_size,
  189. (list, tuple)) and len(target_size) == 2):
  190. raise TypeError(
  191. "target_size should be an int or a list of length 2, but received {}".
  192. format(target_size))
  193. # (height, width)
  194. self.target_size = target_size
  195. self.interp = interp
  196. self.keep_ratio = keep_ratio
  197. def apply_im(self, image, interp, target_size):
  198. image = cv2.resize(image, target_size, interpolation=interp)
  199. return image
  200. def apply_mask(self, mask, target_size):
  201. mask = cv2.resize(mask, target_size, interpolation=cv2.INTER_NEAREST)
  202. return mask
  203. def apply_bbox(self, bbox, scale, target_size):
  204. im_scale_x, im_scale_y = scale
  205. bbox[:, 0::2] *= im_scale_x
  206. bbox[:, 1::2] *= im_scale_y
  207. bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, target_size[0])
  208. bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, target_size[1])
  209. return bbox
  210. def apply_segm(self, segms, im_size, scale):
  211. im_h, im_w = im_size
  212. im_scale_x, im_scale_y = scale
  213. resized_segms = []
  214. for segm in segms:
  215. if is_poly(segm):
  216. # Polygon format
  217. resized_segms.append([
  218. resize_poly(poly, im_scale_x, im_scale_y) for poly in segm
  219. ])
  220. else:
  221. # RLE format
  222. resized_segms.append(
  223. resize_rle(segm, im_h, im_w, im_scale_x, im_scale_y))
  224. return resized_segms
  225. def apply(self, sample):
  226. if self.interp == "RANDOM":
  227. interp = random.choice(list(interp_dict.values()))
  228. else:
  229. interp = interp_dict[self.interp]
  230. im_h, im_w = sample['image'].shape[:2]
  231. im_scale_y = self.target_size[0] / im_h
  232. im_scale_x = self.target_size[1] / im_w
  233. target_size = (self.target_size[1], self.target_size[0])
  234. if self.keep_ratio:
  235. scale = min(im_scale_y, im_scale_x)
  236. target_w = int(round(im_w * scale))
  237. target_h = int(round(im_h * scale))
  238. target_size = (target_w, target_h)
  239. im_scale_y = target_h / im_h
  240. im_scale_x = target_w / im_w
  241. sample['image'] = self.apply_im(sample['image'], interp, target_size)
  242. if 'mask' in sample:
  243. sample['mask'] = self.apply_mask(sample['mask'], target_size)
  244. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  245. sample['gt_bbox'] = self.apply_bbox(
  246. sample['gt_bbox'], [im_scale_x, im_scale_y], target_size)
  247. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  248. sample['gt_poly'] = self.apply_segm(
  249. sample['gt_poly'], [im_h, im_w], [im_scale_x, im_scale_y])
  250. sample['im_shape'] = np.asarray(
  251. sample['image'].shape[:2], dtype=np.float32)
  252. if 'scale_factor' in sample:
  253. scale_factor = sample['scale_factor']
  254. sample['scale_factor'] = np.asarray(
  255. [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
  256. dtype=np.float32)
  257. return sample
  258. class RandomResize(Transform):
  259. """
  260. Resize input to random sizes.
  261. Attention:If interp is 'RANDOM', the interpolation method will be chose randomly.
  262. Args:
  263. target_sizes (List[int], List[list or tuple] or Tuple[lsit or tuple]):
  264. Multiple target sizes, each target size is an int or list/tuple.
  265. interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional):
  266. Interpolation method of resize. Defaults to 'LINEAR'.
  267. Raises:
  268. TypeError: Invalid type of target_size.
  269. ValueError: Invalid interpolation method.
  270. See Also:
  271. Resize input to a specific size.
  272. """
  273. def __init__(self, target_sizes, interp='LINEAR'):
  274. super(RandomResize, self).__init__()
  275. if not (interp == "RANDOM" or interp in interp_dict):
  276. raise ValueError("interp should be one of {}".format(
  277. interp_dict.keys()))
  278. self.interp = interp
  279. assert isinstance(target_sizes, list), \
  280. "target_size must be List"
  281. for i, item in enumerate(target_sizes):
  282. if isinstance(item, int):
  283. target_sizes[i] = (item, item)
  284. self.target_size = target_sizes
  285. def apply(self, sample):
  286. height, width = random.choice(self.target_size)
  287. resizer = Resize((height, width), interp=self.interp)
  288. sample = resizer(sample)
  289. return sample
  290. class ResizeByShort(Transform):
  291. """
  292. Resize input with keeping the aspect ratio.
  293. Attention:If interp is 'RANDOM', the interpolation method will be chose randomly.
  294. Args:
  295. short_size (int): Target size of the shorter side of the image(s).
  296. max_size (int, optional): The upper bound of longer side of the image(s). If max_size is -1, no upper bound is applied. Defaults to -1.
  297. interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional): Interpolation method of resize. Defaults to 'LINEAR'.
  298. Raises:
  299. ValueError: Invalid interpolation method.
  300. """
  301. def __init__(self, short_size=256, max_size=-1, interp='LINEAR'):
  302. if not (interp == "RANDOM" or interp in interp_dict):
  303. raise ValueError("interp should be one of {}".format(
  304. interp_dict.keys()))
  305. super(ResizeByShort, self).__init__()
  306. self.short_size = short_size
  307. self.max_size = max_size
  308. self.interp = interp
  309. def apply_im(self, image, interp, target_size):
  310. image = cv2.resize(image, target_size, interpolation=interp)
  311. return image
  312. def apply_mask(self, mask, target_size):
  313. mask = cv2.resize(mask, target_size, interpolation=cv2.INTER_NEAREST)
  314. return mask
  315. def apply_bbox(self, bbox, scale, target_size):
  316. im_scale_x, im_scale_y = scale
  317. bbox[:, 0::2] *= im_scale_x
  318. bbox[:, 1::2] *= im_scale_y
  319. bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, target_size[1])
  320. bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, target_size[0])
  321. return bbox
  322. def apply_segm(self, segms, im_size, scale):
  323. im_h, im_w = im_size
  324. im_scale_x, im_scale_y = scale
  325. resized_segms = []
  326. for segm in segms:
  327. if is_poly(segm):
  328. # Polygon format
  329. resized_segms.append([
  330. resize_poly(poly, im_scale_x, im_scale_y) for poly in segm
  331. ])
  332. else:
  333. # RLE format
  334. resized_segms.append(
  335. resize_rle(segm, im_h, im_w, im_scale_x, im_scale_y))
  336. return resized_segms
  337. def apply(self, sample):
  338. if self.interp == "RANDOM":
  339. interp = random.choice(list(interp_dict.values()))
  340. else:
  341. interp = interp_dict[self.interp]
  342. im_h, im_w = sample['image'].shape[:2]
  343. im_short_size = min(im_h, im_w)
  344. im_long_size = max(im_h, im_w)
  345. scale = float(self.short_size) / float(im_short_size)
  346. if 0 < self.max_size < np.round(scale * im_long_size):
  347. scale = float(self.max_size) / float(im_long_size)
  348. target_w = int(round(im_w * scale))
  349. target_h = int(round(im_h * scale))
  350. target_size = (target_w, target_h)
  351. sample['image'] = self.apply_im(sample['image'], interp, target_size)
  352. im_scale_y = target_h / im_h
  353. im_scale_x = target_w / im_w
  354. if 'mask' in sample:
  355. sample['mask'] = self.apply_mask(sample['mask'], target_size)
  356. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  357. sample['gt_bbox'] = self.apply_bbox(
  358. sample['gt_bbox'], [im_scale_x, im_scale_y], target_size)
  359. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  360. sample['gt_poly'] = self.apply_segm(
  361. sample['gt_poly'], [im_h, im_w], [im_scale_x, im_scale_y])
  362. sample['im_shape'] = np.asarray(
  363. sample['image'].shape[:2], dtype=np.float32)
  364. if 'scale_factor' in sample:
  365. scale_factor = sample['scale_factor']
  366. sample['scale_factor'] = np.asarray(
  367. [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
  368. dtype=np.float32)
  369. return sample
  370. class RandomResizeByShort(Transform):
  371. """
  372. Resize input to random sizes with keeping the aspect ratio.
  373. Attention:If interp is 'RANDOM', the interpolation method will be chose randomly.
  374. Args:
  375. short_sizes (List[int]): Target size of the shorter side of the image(s).
  376. max_size (int, optional): The upper bound of longer side of the image(s). If max_size is -1, no upper bound is applied. Defaults to -1.
  377. interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional): Interpolation method of resize. Defaults to 'LINEAR'.
  378. Raises:
  379. TypeError: Invalid type of target_size.
  380. ValueError: Invalid interpolation method.
  381. See Also:
  382. ResizeByShort: Resize image(s) in input with keeping the aspect ratio.
  383. """
  384. def __init__(self, short_sizes, max_size=-1, interp='LINEAR'):
  385. super(RandomResizeByShort, self).__init__()
  386. if not (interp == "RANDOM" or interp in interp_dict):
  387. raise ValueError("interp should be one of {}".format(
  388. interp_dict.keys()))
  389. self.interp = interp
  390. assert isinstance(short_sizes, list), \
  391. "short_sizes must be List"
  392. self.short_sizes = short_sizes
  393. self.max_size = max_size
  394. def apply(self, sample):
  395. short_size = random.choice(self.short_sizes)
  396. resizer = ResizeByShort(
  397. short_size=short_size, max_size=self.max_size, interp=self.interp)
  398. sample = resizer(sample)
  399. return sample
  400. class RandomHorizontalFlip(Transform):
  401. """
  402. Randomly flip the input horizontally.
  403. Args:
  404. prob(float, optional): Probability of flipping the input. Defaults to .5.
  405. """
  406. def __init__(self, prob=0.5):
  407. super(RandomHorizontalFlip, self).__init__()
  408. self.prob = prob
  409. def apply_im(self, image):
  410. image = horizontal_flip(image)
  411. return image
  412. def apply_mask(self, mask):
  413. mask = horizontal_flip(mask)
  414. return mask
  415. def apply_bbox(self, bbox, width):
  416. oldx1 = bbox[:, 0].copy()
  417. oldx2 = bbox[:, 2].copy()
  418. bbox[:, 0] = width - oldx2
  419. bbox[:, 2] = width - oldx1
  420. return bbox
  421. def apply_segm(self, segms, height, width):
  422. flipped_segms = []
  423. for segm in segms:
  424. if is_poly(segm):
  425. # Polygon format
  426. flipped_segms.append(
  427. [horizontal_flip_poly(poly, width) for poly in segm])
  428. else:
  429. # RLE format
  430. flipped_segms.append(horizontal_flip_rle(segm, height, width))
  431. return flipped_segms
  432. def apply(self, sample):
  433. if random.random() < self.prob:
  434. im_h, im_w = sample['image'].shape[:2]
  435. sample['image'] = self.apply_im(sample['image'])
  436. if 'mask' in sample:
  437. sample['mask'] = self.apply_mask(sample['mask'])
  438. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  439. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], im_w)
  440. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  441. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_h,
  442. im_w)
  443. return sample
  444. class RandomVerticalFlip(Transform):
  445. """
  446. Randomly flip the input vertically.
  447. Args:
  448. prob(float, optional): Probability of flipping the input. Defaults to .5.
  449. """
  450. def __init__(self, prob=0.5):
  451. super(RandomVerticalFlip, self).__init__()
  452. self.prob = prob
  453. def apply_im(self, image):
  454. image = vertical_flip(image)
  455. return image
  456. def apply_mask(self, mask):
  457. mask = vertical_flip(mask)
  458. return mask
  459. def apply_bbox(self, bbox, height):
  460. oldy1 = bbox[:, 1].copy()
  461. oldy2 = bbox[:, 3].copy()
  462. bbox[:, 0] = height - oldy2
  463. bbox[:, 2] = height - oldy1
  464. return bbox
  465. def apply_segm(self, segms, height, width):
  466. flipped_segms = []
  467. for segm in segms:
  468. if is_poly(segm):
  469. # Polygon format
  470. flipped_segms.append(
  471. [vertical_flip_poly(poly, height) for poly in segm])
  472. else:
  473. # RLE format
  474. flipped_segms.append(vertical_flip_rle(segm, height, width))
  475. return flipped_segms
  476. def apply(self, sample):
  477. if random.random() < self.prob:
  478. im_h, im_w = sample['image'].shape[:2]
  479. sample['image'] = self.apply_im(sample['image'])
  480. if 'mask' in sample:
  481. sample['mask'] = self.apply_mask(sample['mask'])
  482. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  483. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], im_h)
  484. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  485. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_h,
  486. im_w)
  487. return sample
  488. class Normalize(Transform):
  489. """
  490. Apply min-max normalization to the image(s) in input.
  491. 1. im = (im - min_value) * 1 / (max_value - min_value)
  492. 2. im = im - mean
  493. 3. im = im / std
  494. Args:
  495. mean(List[float] or Tuple[float], optional): Mean of input image(s). Defaults to [0.485, 0.456, 0.406].
  496. std(List[float] or Tuple[float], optional): Standard deviation of input image(s). Defaults to [0.229, 0.224, 0.225].
  497. min_val(List[float] or Tuple[float], optional): Minimum value of input image(s). Defaults to [0, 0, 0, ].
  498. max_val(List[float] or Tuple[float], optional): Max value of input image(s). Defaults to [255., 255., 255.].
  499. is_scale(bool, optional): If True, the image pixel values will be divided by 255.
  500. """
  501. def __init__(self,
  502. mean=[0.485, 0.456, 0.406],
  503. std=[0.229, 0.224, 0.225],
  504. min_val=[0, 0, 0],
  505. max_val=[255., 255., 255.],
  506. is_scale=True):
  507. super(Normalize, self).__init__()
  508. from functools import reduce
  509. if reduce(lambda x, y: x * y, std) == 0:
  510. raise ValueError(
  511. 'Std should not have 0, but received is {}'.format(std))
  512. if is_scale:
  513. if reduce(lambda x, y: x * y,
  514. [a - b for a, b in zip(max_val, min_val)]) == 0:
  515. raise ValueError(
  516. '(max_val - min_val) should not have 0, but received is {}'.
  517. format((np.asarray(max_val) - np.asarray(min_val)).tolist(
  518. )))
  519. self.mean = mean
  520. self.std = std
  521. self.min_val = min_val
  522. self.max_val = max_val
  523. self.is_scale = is_scale
  524. def apply_im(self, image):
  525. image = image.astype(np.float32)
  526. mean = np.asarray(
  527. self.mean, dtype=np.float32)[np.newaxis, np.newaxis, :]
  528. std = np.asarray(self.std, dtype=np.float32)[np.newaxis, np.newaxis, :]
  529. image = normalize(image, mean, std, self.min_val, self.max_val)
  530. return image
  531. def apply(self, sample):
  532. sample['image'] = self.apply_im(sample['image'])
  533. return sample
  534. class CenterCrop(Transform):
  535. """
  536. Crop the input at the center.
  537. 1. Locate the center of the image.
  538. 2. Crop the sample.
  539. Args:
  540. crop_size(int, optional): target size of the cropped image(s). Defaults to 224.
  541. """
  542. def __init__(self, crop_size=224):
  543. super(CenterCrop, self).__init__()
  544. self.crop_size = crop_size
  545. def apply_im(self, image):
  546. image = center_crop(image, self.crop_size)
  547. return image
  548. def apply_mask(self, mask):
  549. mask = center_crop(mask)
  550. return mask
  551. def apply(self, sample):
  552. sample['image'] = self.apply_im(sample['image'])
  553. if 'mask' in sample:
  554. sample['mask'] = self.apply_mask(sample['mask'])
  555. return sample
  556. class RandomCrop(Transform):
  557. """
  558. Randomly crop the input.
  559. 1. Compute the height and width of cropped area according to aspect_ratio and scaling.
  560. 2. Locate the upper left corner of cropped area randomly.
  561. 3. Crop the image(s).
  562. 4. Resize the cropped area to crop_size by crop_size.
  563. Args:
  564. crop_size(int, List[int] or Tuple[int]): Target size of the cropped area. If None, the cropped area will not be
  565. resized. Defaults to None.
  566. aspect_ratio (List[float], optional): Aspect ratio of cropped region in [min, max] format. Defaults to [.5, 2.].
  567. thresholds (List[float], optional): Iou thresholds to decide a valid bbox crop.
  568. Defaults to [.0, .1, .3, .5, .7, .9].
  569. scaling (List[float], optional): Ratio between the cropped region and the original image in [min, max] format.
  570. Defaults to [.3, 1.].
  571. num_attempts (int, optional): The number of tries before giving up. Defaults to 50.
  572. allow_no_crop (bool, optional): Whether returning without doing crop is allowed. Defaults to True.
  573. cover_all_box (bool, optional): Whether to ensure all bboxes are covered in the final crop. Defaults to False.
  574. """
  575. def __init__(self,
  576. crop_size=None,
  577. aspect_ratio=[.5, 2.],
  578. thresholds=[.0, .1, .3, .5, .7, .9],
  579. scaling=[.3, 1.],
  580. num_attempts=50,
  581. allow_no_crop=True,
  582. cover_all_box=False):
  583. super(RandomCrop, self).__init__()
  584. self.crop_size = crop_size
  585. self.aspect_ratio = aspect_ratio
  586. self.thresholds = thresholds
  587. self.scaling = scaling
  588. self.num_attempts = num_attempts
  589. self.allow_no_crop = allow_no_crop
  590. self.cover_all_box = cover_all_box
  591. def _generate_crop_info(self, sample):
  592. im_h, im_w = sample['image'].shape[:2]
  593. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  594. thresholds = self.thresholds
  595. if self.allow_no_crop:
  596. thresholds.append('no_crop')
  597. np.random.shuffle(thresholds)
  598. for thresh in thresholds:
  599. if thresh == 'no_crop':
  600. return None
  601. for i in range(self.num_attempts):
  602. crop_box = self._get_crop_box(im_h, im_w)
  603. if crop_box is None:
  604. continue
  605. iou = self._iou_matrix(
  606. sample['gt_bbox'],
  607. np.array(
  608. [crop_box], dtype=np.float32))
  609. if iou.max() < thresh:
  610. continue
  611. if self.cover_all_box and iou.min() < thresh:
  612. continue
  613. cropped_box, valid_ids = self._crop_box_with_center_constraint(
  614. sample['gt_bbox'],
  615. np.array(
  616. crop_box, dtype=np.float32))
  617. if valid_ids.size > 0:
  618. return crop_box, cropped_box, valid_ids
  619. else:
  620. for i in range(self.num_attempts):
  621. crop_box = self._get_crop_box(im_h, im_w)
  622. if crop_box is None:
  623. continue
  624. return crop_box, None, None
  625. return None
  626. def _get_crop_box(self, im_h, im_w):
  627. scale = np.random.uniform(*self.scaling)
  628. if self.aspect_ratio is not None:
  629. min_ar, max_ar = self.aspect_ratio
  630. aspect_ratio = np.random.uniform(
  631. max(min_ar, scale**2), min(max_ar, scale**-2))
  632. h_scale = scale / np.sqrt(aspect_ratio)
  633. w_scale = scale * np.sqrt(aspect_ratio)
  634. else:
  635. h_scale = np.random.uniform(*self.scaling)
  636. w_scale = np.random.uniform(*self.scaling)
  637. crop_h = im_h * h_scale
  638. crop_w = im_w * w_scale
  639. if self.aspect_ratio is None:
  640. if crop_h / crop_w < 0.5 or crop_h / crop_w > 2.0:
  641. return None
  642. crop_h = int(crop_h)
  643. crop_w = int(crop_w)
  644. crop_y = np.random.randint(0, im_h - crop_h)
  645. crop_x = np.random.randint(0, im_w - crop_w)
  646. return [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
  647. def _iou_matrix(self, a, b):
  648. tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
  649. br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
  650. area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2)
  651. area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
  652. area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
  653. area_o = (area_a[:, np.newaxis] + area_b - area_i)
  654. return area_i / (area_o + 1e-10)
  655. def _crop_box_with_center_constraint(self, box, crop):
  656. cropped_box = box.copy()
  657. cropped_box[:, :2] = np.maximum(box[:, :2], crop[:2])
  658. cropped_box[:, 2:] = np.minimum(box[:, 2:], crop[2:])
  659. cropped_box[:, :2] -= crop[:2]
  660. cropped_box[:, 2:] -= crop[:2]
  661. centers = (box[:, :2] + box[:, 2:]) / 2
  662. valid = np.logical_and(crop[:2] <= centers,
  663. centers < crop[2:]).all(axis=1)
  664. valid = np.logical_and(
  665. valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
  666. return cropped_box, np.where(valid)[0]
  667. def _crop_segm(self, segms, valid_ids, crop, height, width):
  668. crop_segms = []
  669. for id in valid_ids:
  670. segm = segms[id]
  671. if is_poly(segm):
  672. # Polygon format
  673. crop_segms.append(crop_poly(segm, crop))
  674. else:
  675. # RLE format
  676. crop_segms.append(crop_rle(segm, crop, height, width))
  677. return crop_segms
  678. def apply_im(self, image, crop):
  679. x1, y1, x2, y2 = crop
  680. return image[y1:y2, x1:x2, :]
  681. def apply_mask(self, mask, crop):
  682. x1, y1, x2, y2 = crop
  683. return mask[y1:y2, x1:x2, :]
  684. def apply(self, sample):
  685. crop_info = self._generate_crop_info(sample)
  686. if crop_info is not None:
  687. crop_box, cropped_box, valid_ids = crop_info
  688. im_h, im_w = sample['image'].shape[:2]
  689. sample['image'] = self.apply_im(sample['image'], crop_box)
  690. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  691. crop_polys = self._crop_segm(
  692. sample['gt_poly'],
  693. valid_ids,
  694. np.array(
  695. crop_box, dtype=np.int64),
  696. im_h,
  697. im_w)
  698. if [] in crop_polys:
  699. delete_id = list()
  700. valid_polys = list()
  701. for idx, poly in enumerate(crop_polys):
  702. if not crop_poly:
  703. delete_id.append(idx)
  704. else:
  705. valid_polys.append(poly)
  706. valid_ids = np.delete(valid_ids, delete_id)
  707. if not valid_polys:
  708. return sample
  709. sample['gt_poly'] = valid_polys
  710. else:
  711. sample['gt_poly'] = crop_polys
  712. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  713. sample['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
  714. sample['gt_class'] = np.take(
  715. sample['gt_class'], valid_ids, axis=0)
  716. if 'gt_score' in sample:
  717. sample['gt_score'] = np.take(
  718. sample['gt_score'], valid_ids, axis=0)
  719. if 'is_crowd' in sample:
  720. sample['is_crowd'] = np.take(
  721. sample['is_crowd'], valid_ids, axis=0)
  722. if 'mask' in sample:
  723. sample['mask'] = self.apply_mask(sample['mask'], crop_box)
  724. if self.crop_size is not None:
  725. sample = Resize(self.crop_size)(sample)
  726. return sample
  727. class RandomScaleAspect(Transform):
  728. """
  729. Crop input image(s) and resize back to original sizes.
  730. Args:
  731. min_scale (float):Minimum ratio between the cropped region and the original image.
  732. If 0, image(s) will not be cropped. Defaults to .5.
  733. aspect_ratio (float): Aspect ratio of cropped region. Defaults to .33.
  734. """
  735. def __init__(self, min_scale=0.5, aspect_ratio=0.33):
  736. super(RandomScaleAspect, self).__init__()
  737. self.min_scale = min_scale
  738. self.aspect_ratio = aspect_ratio
  739. def apply(self, sample):
  740. if self.min_scale != 0 and self.aspect_ratio != 0:
  741. img_height, img_width = sample['image'].shape[:2]
  742. sample = RandomCrop(
  743. crop_size=(img_height, img_width),
  744. aspect_ratio=[self.aspect_ratio, 1. / self.aspect_ratio],
  745. scaling=[self.min_scale, 1.],
  746. num_attempts=10,
  747. allow_no_crop=False)(sample)
  748. return sample
  749. class RandomExpand(Transform):
  750. """
  751. Randomly expand the input by padding to the lower right side of the image(s) in input.
  752. Args:
  753. upper_ratio(float, optional): The maximum ratio to which the original image is expanded. Defaults to 4..
  754. prob(float, optional): The probability of apply expanding. Defaults to .5.
  755. im_padding_value(List[float] or Tuple[float], optional): RGB filling value for the image. Defaults to (127.5, 127.5, 127.5).
  756. label_padding_value(int, optional): Filling value for the mask. Defaults to 255.
  757. """
  758. def __init__(self,
  759. upper_ratio=4.,
  760. prob=.5,
  761. im_padding_value=(127.5, 127.5, 127.5),
  762. label_padding_value=255):
  763. super(RandomExpand, self).__init__()
  764. assert upper_ratio > 1.01, "expand ratio must be larger than 1.01"
  765. self.upper_ratio = upper_ratio
  766. self.prob = prob
  767. assert isinstance(im_padding_value, (Number, Sequence)), \
  768. "fill value must be either float or sequence"
  769. if isinstance(im_padding_value, Number):
  770. im_padding_value = (im_padding_value, ) * 3
  771. if not isinstance(im_padding_value, tuple):
  772. im_padding_value = tuple(im_padding_value)
  773. self.im_padding_value = im_padding_value
  774. self.label_padding_value = label_padding_value
  775. def apply(self, sample):
  776. if random.random() < self.prob:
  777. im_h, im_w = sample['image'].shape[:2]
  778. ratio = np.random.uniform(1., self.upper_ratio)
  779. h = int(im_h * ratio)
  780. w = int(im_w * ratio)
  781. if h > im_h and w > im_w:
  782. y = np.random.randint(0, h - im_h)
  783. x = np.random.randint(0, w - im_w)
  784. target_size = (h, w)
  785. offsets = (x, y)
  786. sample = Padding(
  787. target_size=target_size,
  788. pad_mode=-1,
  789. offsets=offsets,
  790. im_padding_value=self.im_padding_value,
  791. label_padding_value=self.label_padding_value)(sample)
  792. return sample
  793. class Padding(Transform):
  794. def __init__(self,
  795. target_size=None,
  796. pad_mode=0,
  797. offsets=None,
  798. im_padding_value=(127.5, 127.5, 127.5),
  799. label_padding_value=255,
  800. coarsest_stride=32):
  801. """
  802. Pad image to a specified size or multiple of size_divisor.
  803. Args:
  804. target_size(int, Sequence, optional): Image target size, if None, pad to multiple of size_divisor. Defaults to None.
  805. pad_mode({-1, 0, 1, 2}, optional): Pad mode, currently only supports four modes [-1, 0, 1, 2]. if -1, use specified offsets
  806. if 0, only pad to right and bottom. If 1, pad according to center. If 2, only pad left and top. Defaults to 0.
  807. im_padding_value(Sequence[float]): RGB value of pad area. Defaults to (127.5, 127.5, 127.5).
  808. label_padding_value(int, optional): Filling value for the mask. Defaults to 255.
  809. coarsest_stride(int): Image width and height after padding is a multiple of coarsest_stride.
  810. """
  811. super(Padding, self).__init__()
  812. if isinstance(target_size, (list, tuple)):
  813. if len(target_size) != 2:
  814. raise ValueError(
  815. '`target_size` should include 2 elements, but it is {}'.
  816. format(target_size))
  817. if isinstance(target_size, int):
  818. target_size = [target_size] * 2
  819. assert pad_mode in [
  820. -1, 0, 1, 2
  821. ], 'currently only supports four modes [-1, 0, 1, 2]'
  822. if pad_mode == -1:
  823. assert offsets, 'if pad_mode is -1, offsets should not be None'
  824. self.target_size = target_size
  825. self.coarsest_stride = coarsest_stride
  826. self.pad_mode = pad_mode
  827. self.offsets = offsets
  828. self.im_padding_value = im_padding_value
  829. self.label_padding_value = label_padding_value
  830. def apply_im(self, image, offsets, target_size):
  831. x, y = offsets
  832. im_h, im_w = image.shape[:2]
  833. h, w = target_size
  834. canvas = np.ones((h, w, 3), dtype=np.float32)
  835. canvas *= np.array(self.im_padding_value, dtype=np.float32)
  836. canvas[y:y + im_h, x:x + im_w, :] = image.astype(np.float32)
  837. return canvas
  838. def apply_mask(self, mask, offsets, target_size):
  839. x, y = offsets
  840. im_h, im_w = mask.shape[:2]
  841. h, w = target_size
  842. canvas = np.ones((h, w), dtype=np.float32)
  843. canvas *= np.array(self.label_padding_value, dtype=np.float32)
  844. canvas[y:y + im_h, x:x + im_w] = mask.astype(np.float32)
  845. return canvas
  846. def apply_bbox(self, bbox, offsets):
  847. return bbox + np.array(offsets * 2, dtype=np.float32)
  848. def apply_segm(self, segms, offsets, im_size, size):
  849. x, y = offsets
  850. height, width = im_size
  851. h, w = size
  852. expanded_segms = []
  853. for segm in segms:
  854. if is_poly(segm):
  855. # Polygon format
  856. expanded_segms.append(
  857. [expand_poly(poly, x, y) for poly in segm])
  858. else:
  859. # RLE format
  860. expanded_segms.append(
  861. expand_rle(segm, x, y, height, width, h, w))
  862. return expanded_segms
  863. def apply(self, sample):
  864. im_h, im_w = sample['image'].shape[:2]
  865. if self.target_size:
  866. h, w = self.target_size
  867. assert (
  868. im_h <= h and im_w <= w
  869. ), 'target size ({}, {}) cannot be less than image size ({}, {})'\
  870. .format(h, w, im_h, im_w)
  871. else:
  872. h = (np.ceil(im_h // self.coarsest_stride) *
  873. self.coarsest_stride).astype(int)
  874. w = (np.ceil(im_w / self.coarsest_stride) *
  875. self.coarsest_stride).astype(int)
  876. if h == im_h and w == im_w:
  877. return sample
  878. if self.pad_mode == -1:
  879. offsets = self.offsets
  880. elif self.pad_mode == 0:
  881. offsets = [0, 0]
  882. elif self.pad_mode == 1:
  883. offsets = [(h - im_h) // 2, (w - im_w) // 2]
  884. else:
  885. offsets = [h - im_h, w - im_w]
  886. sample['image'] = self.apply_im(sample['image'], offsets, (h, w))
  887. if 'mask' in sample:
  888. sample['mask'] = self.apply_mask(sample['mask'], offsets, (h, w))
  889. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  890. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], offsets)
  891. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  892. sample['gt_poly'] = self.apply_segm(
  893. sample['gt_poly'], offsets, im_size=[im_h, im_w], size=[h, w])
  894. return sample
  895. class MixupImage(Transform):
  896. def __init__(self, alpha=1.5, beta=1.5, mixup_epoch=-1):
  897. """
  898. Mixup two images and their gt_bbbox/gt_score.
  899. Args:
  900. alpha (float, optional): Alpha parameter of beta distribution. Defaults to 1.5.
  901. beta (float, optional): Beta parameter of beta distribution. Defaults to 1.5.
  902. """
  903. super(MixupImage, self).__init__()
  904. if alpha <= 0.0:
  905. raise ValueError("alpha should be positive in {}".format(self))
  906. if beta <= 0.0:
  907. raise ValueError("beta should be positive in {}".format(self))
  908. self.alpha = alpha
  909. self.beta = beta
  910. self.mixup_epoch = mixup_epoch
  911. def apply_im(self, image1, image2, factor):
  912. h = max(image1.shape[0], image2.shape[0])
  913. w = max(image1.shape[1], image2.shape[1])
  914. img = np.zeros((h, w, image1.shape[2]), 'float32')
  915. img[:image1.shape[0], :image1.shape[1], :] = \
  916. image1.astype('float32') * factor
  917. img[:image2.shape[0], :image2.shape[1], :] += \
  918. image2.astype('float32') * (1.0 - factor)
  919. return img.astype('uint8')
  920. def __call__(self, sample):
  921. if not isinstance(sample, Sequence):
  922. return sample
  923. assert len(sample) == 2, 'mixup need two samples'
  924. factor = np.random.beta(self.alpha, self.beta)
  925. factor = max(0.0, min(1.0, factor))
  926. if factor >= 1.0:
  927. return sample[0]
  928. if factor <= 0.0:
  929. return sample[1]
  930. image = self.apply_im(sample[0]['image'], sample[1]['image'], factor)
  931. result = copy.deepcopy(sample[0])
  932. result['image'] = image
  933. # apply bbox and score
  934. if 'gt_bbox' in sample[0]:
  935. gt_bbox1 = sample[0]['gt_bbox']
  936. gt_bbox2 = sample[1]['gt_bbox']
  937. gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
  938. result['gt_bbox'] = gt_bbox
  939. if 'gt_poly' in sample[0]:
  940. gt_poly1 = sample[0]['gt_poly']
  941. gt_poly2 = sample[1]['gt_poly']
  942. gt_poly = gt_poly1 + gt_poly2
  943. result['gt_poly'] = gt_poly
  944. if 'gt_class' in sample[0]:
  945. gt_class1 = sample[0]['gt_class']
  946. gt_class2 = sample[1]['gt_class']
  947. gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
  948. result['gt_class'] = gt_class
  949. gt_score1 = np.ones_like(sample[0]['gt_class'])
  950. gt_score2 = np.ones_like(sample[1]['gt_class'])
  951. gt_score = np.concatenate(
  952. (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
  953. result['gt_score'] = gt_score
  954. if 'is_crowd' in sample[0]:
  955. is_crowd1 = sample[0]['is_crowd']
  956. is_crowd2 = sample[1]['is_crowd']
  957. is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
  958. result['is_crowd'] = is_crowd
  959. if 'difficult' in sample[0]:
  960. is_difficult1 = sample[0]['difficult']
  961. is_difficult2 = sample[1]['difficult']
  962. is_difficult = np.concatenate(
  963. (is_difficult1, is_difficult2), axis=0)
  964. result['difficult'] = is_difficult
  965. return result
  966. class RandomDistort(Transform):
  967. """
  968. Random color distortion.
  969. Args:
  970. brightness_range(float, optional): Range of brightness distortion. Defaults to .5.
  971. brightness_prob(float, optional): Probability of brightness distortion. Defaults to .5.
  972. contrast_range(float, optional): Range of contrast distortion. Defaults to .5.
  973. contrast_prob(float, optional): Probability of contrast distortion. Defaults to .5.
  974. saturation_range(float, optional): Range of saturation distortion. Defaults to .5.
  975. saturation_prob(float, optional): Probability of saturation distortion. Defaults to .5.
  976. hue_range(float, optional): Range of hue distortion. Defaults to .5.
  977. hue_prob(float, optional): Probability of hue distortion. Defaults to .5.
  978. random_apply (bool, optional): whether to apply in random (yolo) or fixed (SSD)
  979. order. Defaults to True.
  980. count (int, optional): the number of doing distortion. Defaults to 4.
  981. shuffle_channel (bool, optional): whether to swap channels randomly. Defaults to False.
  982. """
  983. def __init__(self,
  984. brightness_range=0.5,
  985. brightness_prob=0.5,
  986. contrast_range=0.5,
  987. contrast_prob=0.5,
  988. saturation_range=0.5,
  989. saturation_prob=0.5,
  990. hue_range=18,
  991. hue_prob=0.5,
  992. random_apply=True,
  993. count=4,
  994. shuffle_channel=False):
  995. super(RandomDistort, self).__init__()
  996. self.brightness_range = [1 - brightness_range, 1 + brightness_range]
  997. self.brightness_prob = brightness_prob
  998. self.contrast_range = [1 - contrast_range, 1 + contrast_range]
  999. self.contrast_prob = contrast_prob
  1000. self.saturation_range = [1 - saturation_range, 1 + saturation_range]
  1001. self.saturation_prob = saturation_prob
  1002. self.hue_range = [1 - hue_range, 1 + hue_range]
  1003. self.hue_prob = hue_prob
  1004. self.random_apply = random_apply
  1005. self.count = count
  1006. self.shuffle_channel = shuffle_channel
  1007. def apply_hue(self, image):
  1008. low, high = self.hue_range
  1009. if np.random.uniform(0., 1.) < self.hue_prob:
  1010. return image
  1011. image = image.astype(np.float32)
  1012. # it works, but result differ from HSV version
  1013. delta = np.random.uniform(low, high)
  1014. u = np.cos(delta * np.pi)
  1015. w = np.sin(delta * np.pi)
  1016. bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]])
  1017. tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321],
  1018. [0.211, -0.523, 0.311]])
  1019. ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
  1020. [1.0, -1.107, 1.705]])
  1021. t = np.dot(np.dot(ityiq, bt), tyiq).T
  1022. image = np.dot(image, t)
  1023. return image
  1024. def apply_saturation(self, image):
  1025. low, high = self.saturation_range
  1026. if np.random.uniform(0., 1.) < self.saturation_prob:
  1027. return image
  1028. delta = np.random.uniform(low, high)
  1029. image = image.astype(np.float32)
  1030. # it works, but result differ from HSV version
  1031. gray = image * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
  1032. gray = gray.sum(axis=2, keepdims=True)
  1033. gray *= (1.0 - delta)
  1034. image *= delta
  1035. image += gray
  1036. return image
  1037. def apply_contrast(self, image):
  1038. low, high = self.contrast_range
  1039. if np.random.uniform(0., 1.) < self.contrast_prob:
  1040. return image
  1041. delta = np.random.uniform(low, high)
  1042. image = image.astype(np.float32)
  1043. image *= delta
  1044. return image
  1045. def apply_brightness(self, image):
  1046. low, high = self.brightness_range
  1047. if np.random.uniform(0., 1.) < self.brightness_prob:
  1048. return image
  1049. delta = np.random.uniform(low, high)
  1050. image = image.astype(np.float32)
  1051. image += delta
  1052. return image
  1053. def apply(self, sample):
  1054. if self.random_apply:
  1055. functions = [
  1056. self.apply_brightness, self.apply_contrast,
  1057. self.apply_saturation, self.apply_hue
  1058. ]
  1059. distortions = np.random.permutation(functions)[:self.count]
  1060. for func in distortions:
  1061. sample['image'] = func(sample['image'])
  1062. return sample
  1063. sample['image'] = self.apply_brightness(sample['image'])
  1064. mode = np.random.randint(0, 2)
  1065. if mode:
  1066. sample['image'] = self.apply_contrast(sample['image'])
  1067. sample['image'] = self.apply_saturation(sample['image'])
  1068. sample['image'] = self.apply_hue(sample['image'])
  1069. if not mode:
  1070. sample['image'] = self.apply_contrast(sample['image'])
  1071. if self.shuffle_channel:
  1072. if np.random.randint(0, 2):
  1073. sample['image'] = sample['image'][..., np.random.permutation(
  1074. 3)]
  1075. return sample
  1076. class RandomBlur(Transform):
  1077. """
  1078. Randomly blur input image(s).
  1079. Args:
  1080. prob (float): Probability of blurring.
  1081. """
  1082. def __init__(self, prob=0.1):
  1083. super(RandomBlur, self).__init__()
  1084. self.prob = prob
  1085. def apply_im(self, image, radius):
  1086. image = cv2.GaussianBlur(image, (radius, radius), 0, 0)
  1087. return image
  1088. def apply(self, sample):
  1089. if self.prob <= 0:
  1090. n = 0
  1091. elif self.prob >= 1:
  1092. n = 1
  1093. else:
  1094. n = int(1.0 / self.prob)
  1095. if n > 0:
  1096. if np.random.randint(0, n) == 0:
  1097. radius = np.random.randint(3, 10)
  1098. if radius % 2 != 1:
  1099. radius = radius + 1
  1100. if radius > 9:
  1101. radius = 9
  1102. sample['image'] = self.apply_im(sample['image'], radius)
  1103. return sample
  1104. class _PadBox(Transform):
  1105. def __init__(self, num_max_boxes=50):
  1106. """
  1107. Pad zeros to bboxes if number of bboxes is less than num_max_boxes.
  1108. Args:
  1109. num_max_boxes (int, optional): the max number of bboxes. Defaults to 50.
  1110. """
  1111. self.num_max_boxes = num_max_boxes
  1112. super(_PadBox, self).__init__()
  1113. def apply(self, sample):
  1114. gt_num = min(self.num_max_boxes, len(sample['gt_bbox']))
  1115. num_max = self.num_max_boxes
  1116. pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
  1117. if gt_num > 0:
  1118. pad_bbox[:gt_num, :] = sample['gt_bbox'][:gt_num, :]
  1119. sample['gt_bbox'] = pad_bbox
  1120. if 'gt_class' in sample:
  1121. pad_class = np.zeros((num_max, ), dtype=np.int32)
  1122. if gt_num > 0:
  1123. pad_class[:gt_num] = sample['gt_class'][:gt_num, 0]
  1124. sample['gt_class'] = pad_class
  1125. if 'gt_score' in sample:
  1126. pad_score = np.zeros((num_max, ), dtype=np.float32)
  1127. if gt_num > 0:
  1128. pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
  1129. sample['gt_score'] = pad_score
  1130. # in training, for example in op ExpandImage,
  1131. # the bbox and gt_class is expanded, but the difficult is not,
  1132. # so, judging by it's length
  1133. if 'difficult' in sample:
  1134. pad_diff = np.zeros((num_max, ), dtype=np.int32)
  1135. if gt_num > 0:
  1136. pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
  1137. sample['difficult'] = pad_diff
  1138. if 'is_crowd' in sample:
  1139. pad_crowd = np.zeros((num_max, ), dtype=np.int32)
  1140. if gt_num > 0:
  1141. pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
  1142. sample['is_crowd'] = pad_crowd
  1143. return sample
  1144. class _NormalizeBox(Transform):
  1145. def __init__(self):
  1146. super(_NormalizeBox, self).__init__()
  1147. def apply(self, sample):
  1148. height, width = sample['image'].shape[:2]
  1149. for i in range(sample['gt_bbox'].shape[0]):
  1150. sample['gt_bbox'][i][0] = sample['gt_bbox'][i][0] / width
  1151. sample['gt_bbox'][i][1] = sample['gt_bbox'][i][1] / height
  1152. sample['gt_bbox'][i][2] = sample['gt_bbox'][i][2] / width
  1153. sample['gt_bbox'][i][3] = sample['gt_bbox'][i][3] / height
  1154. return sample
  1155. class _BboxXYXY2XYWH(Transform):
  1156. """
  1157. Convert bbox XYXY format to XYWH format.
  1158. """
  1159. def __init__(self):
  1160. super(_BboxXYXY2XYWH, self).__init__()
  1161. def apply(self, sample):
  1162. bbox = sample['gt_bbox']
  1163. bbox[:, 2:4] = bbox[:, 2:4] - bbox[:, :2]
  1164. bbox[:, :2] = bbox[:, :2] + bbox[:, 2:4] / 2.
  1165. sample['gt_bbox'] = bbox
  1166. return sample
  1167. class _Permute(Transform):
  1168. def __init__(self):
  1169. super(_Permute, self).__init__()
  1170. def apply(self, sample):
  1171. sample['image'] = permute(sample['image'], False)
  1172. return sample
  1173. class ArrangeSegmenter(Transform):
  1174. def __init__(self, mode):
  1175. super(ArrangeSegmenter, self).__init__()
  1176. if mode not in ['train', 'eval', 'test', 'quant']:
  1177. raise ValueError(
  1178. "mode should be defined as one of ['train', 'eval', 'test', 'quant']!"
  1179. )
  1180. self.mode = mode
  1181. def apply(self, sample):
  1182. if 'mask' in sample:
  1183. mask = sample['mask']
  1184. image = permute(sample['image'], False)
  1185. if self.mode == 'train':
  1186. mask = mask.astype('int64')
  1187. return image, mask
  1188. if self.mode == 'eval':
  1189. mask = np.asarray(Image.open(mask))
  1190. mask = mask[np.newaxis, :, :].astype('int64')
  1191. return image, mask
  1192. if self.mode == 'test':
  1193. return image,
  1194. class ArrangeClassifier(Transform):
  1195. def __init__(self, mode):
  1196. super(ArrangeClassifier, self).__init__()
  1197. if mode not in ['train', 'eval', 'test', 'quant']:
  1198. raise ValueError(
  1199. "mode should be defined as one of ['train', 'eval', 'test', 'quant']!"
  1200. )
  1201. self.mode = mode
  1202. def apply(self, sample):
  1203. image = permute(sample['image'], False)
  1204. if self.mode in ['train', 'eval']:
  1205. return image, sample['label']
  1206. else:
  1207. return image
  1208. class ArrangeDetector(Transform):
  1209. def __init__(self, mode):
  1210. super(ArrangeDetector, self).__init__()
  1211. if mode not in ['train', 'eval', 'test', 'quant']:
  1212. raise ValueError(
  1213. "mode should be defined as one of ['train', 'eval', 'test', 'quant']!"
  1214. )
  1215. self.mode = mode
  1216. def apply(self, sample):
  1217. if self.mode == 'eval' and 'gt_poly' in sample:
  1218. del sample['gt_poly']
  1219. return sample