operators.py 77 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # function:
  15. # operators to process sample,
  16. # eg: decode/resize/crop image
  17. from __future__ import absolute_import
  18. from __future__ import print_function
  19. from __future__ import division
  20. try:
  21. from collections.abc import Sequence
  22. except Exception:
  23. from collections import Sequence
  24. from numbers import Number, Integral
  25. import uuid
  26. import random
  27. import math
  28. import numpy as np
  29. import os
  30. import copy
  31. import cv2
  32. from PIL import Image, ImageEnhance, ImageDraw
  33. from paddlex.ppdet.core.workspace import serializable
  34. from paddlex.ppdet.modeling.layers import AnchorGrid
  35. from paddlex.ppdet.modeling import bbox_utils
  36. from .op_helper import (satisfy_sample_constraint, filter_and_process,
  37. generate_sample_bbox, clip_bbox, data_anchor_sampling,
  38. satisfy_sample_constraint_coverage, crop_image_sampling,
  39. generate_sample_bbox_square, bbox_area_sampling,
  40. is_poly, gaussian_radius, draw_gaussian)
  41. from paddlex.ppdet.utils.logger import setup_logger
  42. logger = setup_logger(__name__)
  43. registered_ops = []
  44. def register_op(cls):
  45. registered_ops.append(cls.__name__)
  46. if not hasattr(BaseOperator, cls.__name__):
  47. setattr(BaseOperator, cls.__name__, cls)
  48. else:
  49. raise KeyError("The {} class has been registered.".format(cls.__name__))
  50. return serializable(cls)
  51. class BboxError(ValueError):
  52. pass
  53. class ImageError(ValueError):
  54. pass
  55. class BaseOperator(object):
  56. def __init__(self, name=None):
  57. if name is None:
  58. name = self.__class__.__name__
  59. self._id = name + '_' + str(uuid.uuid4())[-6:]
  60. def apply(self, sample, context=None):
  61. """ Process a sample.
  62. Args:
  63. sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
  64. context (dict): info about this sample processing
  65. Returns:
  66. result (dict): a processed sample
  67. """
  68. return sample
  69. def __call__(self, sample, context=None):
  70. """ Process a sample.
  71. Args:
  72. sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
  73. context (dict): info about this sample processing
  74. Returns:
  75. result (dict): a processed sample
  76. """
  77. if isinstance(sample, Sequence):
  78. for i in range(len(sample)):
  79. sample[i] = self.apply(sample[i], context)
  80. else:
  81. sample = self.apply(sample, context)
  82. return sample
  83. def __str__(self):
  84. return str(self._id)
  85. @register_op
  86. class Decode(BaseOperator):
  87. def __init__(self):
  88. """ Transform the image data to numpy format following the rgb format
  89. """
  90. super(Decode, self).__init__()
  91. def apply(self, sample, context=None):
  92. """ load image if 'im_file' field is not empty but 'image' is"""
  93. if 'image' not in sample:
  94. with open(sample['im_file'], 'rb') as f:
  95. sample['image'] = f.read()
  96. sample.pop('im_file')
  97. im = sample['image']
  98. data = np.frombuffer(im, dtype='uint8')
  99. im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
  100. if 'keep_ori_im' in sample and sample['keep_ori_im']:
  101. sample['ori_image'] = im
  102. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  103. sample['image'] = im
  104. if 'h' not in sample:
  105. sample['h'] = im.shape[0]
  106. elif sample['h'] != im.shape[0]:
  107. logger.warn(
  108. "The actual image height: {} is not equal to the "
  109. "height: {} in annotation, and update sample['h'] by actual "
  110. "image height.".format(im.shape[0], sample['h']))
  111. sample['h'] = im.shape[0]
  112. if 'w' not in sample:
  113. sample['w'] = im.shape[1]
  114. elif sample['w'] != im.shape[1]:
  115. logger.warn(
  116. "The actual image width: {} is not equal to the "
  117. "width: {} in annotation, and update sample['w'] by actual "
  118. "image width.".format(im.shape[1], sample['w']))
  119. sample['w'] = im.shape[1]
  120. sample['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
  121. sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
  122. return sample
  123. @register_op
  124. class Permute(BaseOperator):
  125. def __init__(self):
  126. """
  127. Change the channel to be (C, H, W)
  128. """
  129. super(Permute, self).__init__()
  130. def apply(self, sample, context=None):
  131. im = sample['image']
  132. im = im.transpose((2, 0, 1))
  133. sample['image'] = im
  134. return sample
  135. @register_op
  136. class Lighting(BaseOperator):
  137. """
  138. Lighting the image by eigenvalues and eigenvectors
  139. Args:
  140. eigval (list): eigenvalues
  141. eigvec (list): eigenvectors
  142. alphastd (float): random weight of lighting, 0.1 by default
  143. """
  144. def __init__(self, eigval, eigvec, alphastd=0.1):
  145. super(Lighting, self).__init__()
  146. self.alphastd = alphastd
  147. self.eigval = np.array(eigval).astype('float32')
  148. self.eigvec = np.array(eigvec).astype('float32')
  149. def apply(self, sample, context=None):
  150. alpha = np.random.normal(scale=self.alphastd, size=(3, ))
  151. sample['image'] += np.dot(self.eigvec, self.eigval * alpha)
  152. return sample
  153. @register_op
  154. class RandomErasingImage(BaseOperator):
  155. def __init__(self, prob=0.5, lower=0.02, higher=0.4, aspect_ratio=0.3):
  156. """
  157. Random Erasing Data Augmentation, see https://arxiv.org/abs/1708.04896
  158. Args:
  159. prob (float): probability to carry out random erasing
  160. lower (float): lower limit of the erasing area ratio
  161. heigher (float): upper limit of the erasing area ratio
  162. aspect_ratio (float): aspect ratio of the erasing region
  163. """
  164. super(RandomErasingImage, self).__init__()
  165. self.prob = prob
  166. self.lower = lower
  167. self.heigher = heigher
  168. self.aspect_ratio = aspect_ratio
  169. def apply(self, sample):
  170. gt_bbox = sample['gt_bbox']
  171. im = sample['image']
  172. if not isinstance(im, np.ndarray):
  173. raise TypeError("{}: image is not a numpy array.".format(self))
  174. if len(im.shape) != 3:
  175. raise ImageError("{}: image is not 3-dimensional.".format(self))
  176. for idx in range(gt_bbox.shape[0]):
  177. if self.prob <= np.random.rand():
  178. continue
  179. x1, y1, x2, y2 = gt_bbox[idx, :]
  180. w_bbox = x2 - x1
  181. h_bbox = y2 - y1
  182. area = w_bbox * h_bbox
  183. target_area = random.uniform(self.lower, self.higher) * area
  184. aspect_ratio = random.uniform(self.aspect_ratio,
  185. 1 / self.aspect_ratio)
  186. h = int(round(math.sqrt(target_area * aspect_ratio)))
  187. w = int(round(math.sqrt(target_area / aspect_ratio)))
  188. if w < w_bbox and h < h_bbox:
  189. off_y1 = random.randint(0, int(h_bbox - h))
  190. off_x1 = random.randint(0, int(w_bbox - w))
  191. im[int(y1 + off_y1):int(y1 + off_y1 + h), int(x1 + off_x1):int(
  192. x1 + off_x1 + w), :] = 0
  193. sample['image'] = im
  194. return sample
  195. @register_op
  196. class NormalizeImage(BaseOperator):
  197. def __init__(self, mean=[0.485, 0.456, 0.406], std=[1, 1, 1],
  198. is_scale=True):
  199. """
  200. Args:
  201. mean (list): the pixel mean
  202. std (list): the pixel variance
  203. """
  204. super(NormalizeImage, self).__init__()
  205. self.mean = mean
  206. self.std = std
  207. self.is_scale = is_scale
  208. if not (isinstance(self.mean, list) and isinstance(self.std, list) and
  209. isinstance(self.is_scale, bool)):
  210. raise TypeError("{}: input type is invalid.".format(self))
  211. from functools import reduce
  212. if reduce(lambda x, y: x * y, self.std) == 0:
  213. raise ValueError('{}: std is invalid!'.format(self))
  214. def apply(self, sample, context=None):
  215. """Normalize the image.
  216. Operators:
  217. 1.(optional) Scale the image to [0,1]
  218. 2. Each pixel minus mean and is divided by std
  219. """
  220. im = sample['image']
  221. im = im.astype(np.float32, copy=False)
  222. mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
  223. std = np.array(self.std)[np.newaxis, np.newaxis, :]
  224. if self.is_scale:
  225. im = im / 255.0
  226. im -= mean
  227. im /= std
  228. sample['image'] = im
  229. return sample
  230. @register_op
  231. class GridMask(BaseOperator):
  232. def __init__(self,
  233. use_h=True,
  234. use_w=True,
  235. rotate=1,
  236. offset=False,
  237. ratio=0.5,
  238. mode=1,
  239. prob=0.7,
  240. upper_iter=360000):
  241. """
  242. GridMask Data Augmentation, see https://arxiv.org/abs/2001.04086
  243. Args:
  244. use_h (bool): whether to mask vertically
  245. use_w (boo;): whether to mask horizontally
  246. rotate (float): angle for the mask to rotate
  247. offset (float): mask offset
  248. ratio (float): mask ratio
  249. mode (int): gridmask mode
  250. prob (float): max probability to carry out gridmask
  251. upper_iter (int): suggested to be equal to global max_iter
  252. """
  253. super(GridMask, self).__init__()
  254. self.use_h = use_h
  255. self.use_w = use_w
  256. self.rotate = rotate
  257. self.offset = offset
  258. self.ratio = ratio
  259. self.mode = mode
  260. self.prob = prob
  261. self.upper_iter = upper_iter
  262. from .gridmask_utils import Gridmask
  263. self.gridmask_op = Gridmask(
  264. use_h,
  265. use_w,
  266. rotate=rotate,
  267. offset=offset,
  268. ratio=ratio,
  269. mode=mode,
  270. prob=prob,
  271. upper_iter=upper_iter)
  272. def apply(self, sample, context=None):
  273. sample['image'] = self.gridmask_op(sample['image'], sample['curr_iter'])
  274. return sample
  275. @register_op
  276. class RandomDistort(BaseOperator):
  277. """Random color distortion.
  278. Args:
  279. hue (list): hue settings. in [lower, upper, probability] format.
  280. saturation (list): saturation settings. in [lower, upper, probability] format.
  281. contrast (list): contrast settings. in [lower, upper, probability] format.
  282. brightness (list): brightness settings. in [lower, upper, probability] format.
  283. random_apply (bool): whether to apply in random (yolo) or fixed (SSD)
  284. order.
  285. count (int): the number of doing distrot
  286. random_channel (bool): whether to swap channels randomly
  287. """
  288. def __init__(self,
  289. hue=[-18, 18, 0.5],
  290. saturation=[0.5, 1.5, 0.5],
  291. contrast=[0.5, 1.5, 0.5],
  292. brightness=[0.5, 1.5, 0.5],
  293. random_apply=True,
  294. count=4,
  295. random_channel=False):
  296. super(RandomDistort, self).__init__()
  297. self.hue = hue
  298. self.saturation = saturation
  299. self.contrast = contrast
  300. self.brightness = brightness
  301. self.random_apply = random_apply
  302. self.count = count
  303. self.random_channel = random_channel
  304. def apply_hue(self, img):
  305. low, high, prob = self.hue
  306. if np.random.uniform(0., 1.) < prob:
  307. return img
  308. img = img.astype(np.float32)
  309. # it works, but result differ from HSV version
  310. delta = np.random.uniform(low, high)
  311. u = np.cos(delta * np.pi)
  312. w = np.sin(delta * np.pi)
  313. bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]])
  314. tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321],
  315. [0.211, -0.523, 0.311]])
  316. ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
  317. [1.0, -1.107, 1.705]])
  318. t = np.dot(np.dot(ityiq, bt), tyiq).T
  319. img = np.dot(img, t)
  320. return img
  321. def apply_saturation(self, img):
  322. low, high, prob = self.saturation
  323. if np.random.uniform(0., 1.) < prob:
  324. return img
  325. delta = np.random.uniform(low, high)
  326. img = img.astype(np.float32)
  327. # it works, but result differ from HSV version
  328. gray = img * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
  329. gray = gray.sum(axis=2, keepdims=True)
  330. gray *= (1.0 - delta)
  331. img *= delta
  332. img += gray
  333. return img
  334. def apply_contrast(self, img):
  335. low, high, prob = self.contrast
  336. if np.random.uniform(0., 1.) < prob:
  337. return img
  338. delta = np.random.uniform(low, high)
  339. img = img.astype(np.float32)
  340. img *= delta
  341. return img
  342. def apply_brightness(self, img):
  343. low, high, prob = self.brightness
  344. if np.random.uniform(0., 1.) < prob:
  345. return img
  346. delta = np.random.uniform(low, high)
  347. img = img.astype(np.float32)
  348. img += delta
  349. return img
  350. def apply(self, sample, context=None):
  351. img = sample['image']
  352. if self.random_apply:
  353. functions = [
  354. self.apply_brightness, self.apply_contrast,
  355. self.apply_saturation, self.apply_hue
  356. ]
  357. distortions = np.random.permutation(functions)[:self.count]
  358. for func in distortions:
  359. img = func(img)
  360. sample['image'] = img
  361. return sample
  362. img = self.apply_brightness(img)
  363. mode = np.random.randint(0, 2)
  364. if mode:
  365. img = self.apply_contrast(img)
  366. img = self.apply_saturation(img)
  367. img = self.apply_hue(img)
  368. if not mode:
  369. img = self.apply_contrast(img)
  370. if self.random_channel:
  371. if np.random.randint(0, 2):
  372. img = img[..., np.random.permutation(3)]
  373. sample['image'] = img
  374. return sample
  375. @register_op
  376. class AutoAugment(BaseOperator):
  377. def __init__(self, autoaug_type="v1"):
  378. """
  379. Args:
  380. autoaug_type (str): autoaug type, support v0, v1, v2, v3, test
  381. """
  382. super(AutoAugment, self).__init__()
  383. self.autoaug_type = autoaug_type
  384. def apply(self, sample, context=None):
  385. """
  386. Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172
  387. """
  388. im = sample['image']
  389. gt_bbox = sample['gt_bbox']
  390. if not isinstance(im, np.ndarray):
  391. raise TypeError("{}: image is not a numpy array.".format(self))
  392. if len(im.shape) != 3:
  393. raise ImageError("{}: image is not 3-dimensional.".format(self))
  394. if len(gt_bbox) == 0:
  395. return sample
  396. height, width, _ = im.shape
  397. norm_gt_bbox = np.ones_like(gt_bbox, dtype=np.float32)
  398. norm_gt_bbox[:, 0] = gt_bbox[:, 1] / float(height)
  399. norm_gt_bbox[:, 1] = gt_bbox[:, 0] / float(width)
  400. norm_gt_bbox[:, 2] = gt_bbox[:, 3] / float(height)
  401. norm_gt_bbox[:, 3] = gt_bbox[:, 2] / float(width)
  402. from .autoaugment_utils import distort_image_with_autoaugment
  403. im, norm_gt_bbox = distort_image_with_autoaugment(im, norm_gt_bbox,
  404. self.autoaug_type)
  405. gt_bbox[:, 0] = norm_gt_bbox[:, 1] * float(width)
  406. gt_bbox[:, 1] = norm_gt_bbox[:, 0] * float(height)
  407. gt_bbox[:, 2] = norm_gt_bbox[:, 3] * float(width)
  408. gt_bbox[:, 3] = norm_gt_bbox[:, 2] * float(height)
  409. sample['image'] = im
  410. sample['gt_bbox'] = gt_bbox
  411. return sample
  412. @register_op
  413. class RandomFlip(BaseOperator):
  414. def __init__(self, prob=0.5):
  415. """
  416. Args:
  417. prob (float): the probability of flipping image
  418. """
  419. super(RandomFlip, self).__init__()
  420. self.prob = prob
  421. if not (isinstance(self.prob, float)):
  422. raise TypeError("{}: input type is invalid.".format(self))
  423. def apply_segm(self, segms, height, width):
  424. def _flip_poly(poly, width):
  425. flipped_poly = np.array(poly)
  426. flipped_poly[0::2] = width - np.array(poly[0::2])
  427. return flipped_poly.tolist()
  428. def _flip_rle(rle, height, width):
  429. if 'counts' in rle and type(rle['counts']) == list:
  430. rle = mask_util.frPyObjects(rle, height, width)
  431. mask = mask_util.decode(rle)
  432. mask = mask[:, ::-1]
  433. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  434. return rle
  435. flipped_segms = []
  436. for segm in segms:
  437. if is_poly(segm):
  438. # Polygon format
  439. flipped_segms.append([_flip_poly(poly, width) for poly in segm])
  440. else:
  441. # RLE format
  442. import pycocotools.mask as mask_util
  443. flipped_segms.append(_flip_rle(segm, height, width))
  444. return flipped_segms
  445. def apply_keypoint(self, gt_keypoint, width):
  446. for i in range(gt_keypoint.shape[1]):
  447. if i % 2 == 0:
  448. old_x = gt_keypoint[:, i].copy()
  449. gt_keypoint[:, i] = width - old_x
  450. return gt_keypoint
  451. def apply_image(self, image):
  452. return image[:, ::-1, :]
  453. def apply_bbox(self, bbox, width):
  454. oldx1 = bbox[:, 0].copy()
  455. oldx2 = bbox[:, 2].copy()
  456. bbox[:, 0] = width - oldx2
  457. bbox[:, 2] = width - oldx1
  458. return bbox
  459. def apply_rbox(self, bbox, width):
  460. oldx1 = bbox[:, 0].copy()
  461. oldx2 = bbox[:, 2].copy()
  462. oldx3 = bbox[:, 4].copy()
  463. oldx4 = bbox[:, 6].copy()
  464. bbox[:, 0] = width - oldx1
  465. bbox[:, 2] = width - oldx2
  466. bbox[:, 4] = width - oldx3
  467. bbox[:, 6] = width - oldx4
  468. bbox = [bbox_utils.get_best_begin_point_single(e) for e in bbox]
  469. return bbox
  470. def apply(self, sample, context=None):
  471. """Filp the image and bounding box.
  472. Operators:
  473. 1. Flip the image numpy.
  474. 2. Transform the bboxes' x coordinates.
  475. (Must judge whether the coordinates are normalized!)
  476. 3. Transform the segmentations' x coordinates.
  477. (Must judge whether the coordinates are normalized!)
  478. Output:
  479. sample: the image, bounding box and segmentation part
  480. in sample are flipped.
  481. """
  482. if np.random.uniform(0, 1) < self.prob:
  483. im = sample['image']
  484. height, width = im.shape[:2]
  485. im = self.apply_image(im)
  486. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  487. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], width)
  488. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  489. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], height,
  490. width)
  491. if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
  492. sample['gt_keypoint'] = self.apply_keypoint(
  493. sample['gt_keypoint'], width)
  494. if 'semantic' in sample and sample['semantic']:
  495. sample['semantic'] = sample['semantic'][:, ::-1]
  496. if 'gt_segm' in sample and sample['gt_segm'].any():
  497. sample['gt_segm'] = sample['gt_segm'][:, :, ::-1]
  498. if 'gt_rbox2poly' in sample and sample['gt_rbox2poly'].any():
  499. sample['gt_rbox2poly'] = self.apply_rbox(sample['gt_rbox2poly'],
  500. width)
  501. sample['flipped'] = True
  502. sample['image'] = im
  503. return sample
  504. @register_op
  505. class Resize(BaseOperator):
  506. def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
  507. """
  508. Resize image to target size. if keep_ratio is True,
  509. resize the image's long side to the maximum of target_size
  510. if keep_ratio is False, resize the image to target size(h, w)
  511. Args:
  512. target_size (int|list): image target size
  513. keep_ratio (bool): whether keep_ratio or not, default true
  514. interp (int): the interpolation method
  515. """
  516. super(Resize, self).__init__()
  517. self.keep_ratio = keep_ratio
  518. self.interp = interp
  519. if not isinstance(target_size, (Integral, Sequence)):
  520. raise TypeError(
  521. "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
  522. format(type(target_size)))
  523. if isinstance(target_size, Integral):
  524. target_size = [target_size, target_size]
  525. self.target_size = target_size
  526. def apply_image(self, image, scale):
  527. im_scale_x, im_scale_y = scale
  528. return cv2.resize(
  529. image,
  530. None,
  531. None,
  532. fx=im_scale_x,
  533. fy=im_scale_y,
  534. interpolation=self.interp)
  535. def apply_bbox(self, bbox, scale, size):
  536. im_scale_x, im_scale_y = scale
  537. resize_w, resize_h = size
  538. bbox[:, 0::2] *= im_scale_x
  539. bbox[:, 1::2] *= im_scale_y
  540. bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w)
  541. bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h)
  542. return bbox
  543. def apply_segm(self, segms, im_size, scale):
  544. def _resize_poly(poly, im_scale_x, im_scale_y):
  545. resized_poly = np.array(poly).astype('float32')
  546. resized_poly[0::2] *= im_scale_x
  547. resized_poly[1::2] *= im_scale_y
  548. return resized_poly.tolist()
  549. def _resize_rle(rle, im_h, im_w, im_scale_x, im_scale_y):
  550. if 'counts' in rle and type(rle['counts']) == list:
  551. rle = mask_util.frPyObjects(rle, im_h, im_w)
  552. mask = mask_util.decode(rle)
  553. mask = cv2.resize(
  554. image,
  555. None,
  556. None,
  557. fx=im_scale_x,
  558. fy=im_scale_y,
  559. interpolation=self.interp)
  560. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  561. return rle
  562. im_h, im_w = im_size
  563. im_scale_x, im_scale_y = scale
  564. resized_segms = []
  565. for segm in segms:
  566. if is_poly(segm):
  567. # Polygon format
  568. resized_segms.append([
  569. _resize_poly(poly, im_scale_x, im_scale_y) for poly in segm
  570. ])
  571. else:
  572. # RLE format
  573. import pycocotools.mask as mask_util
  574. resized_segms.append(
  575. _resize_rle(segm, im_h, im_w, im_scale_x, im_scale_y))
  576. return resized_segms
  577. def apply(self, sample, context=None):
  578. """ Resize the image numpy.
  579. """
  580. im = sample['image']
  581. if not isinstance(im, np.ndarray):
  582. raise TypeError("{}: image type is not numpy.".format(self))
  583. if len(im.shape) != 3:
  584. raise ImageError('{}: image is not 3-dimensional.'.format(self))
  585. # apply image
  586. im_shape = im.shape
  587. if self.keep_ratio:
  588. im_size_min = np.min(im_shape[0:2])
  589. im_size_max = np.max(im_shape[0:2])
  590. target_size_min = np.min(self.target_size)
  591. target_size_max = np.max(self.target_size)
  592. im_scale = min(target_size_min / im_size_min,
  593. target_size_max / im_size_max)
  594. resize_h = im_scale * float(im_shape[0])
  595. resize_w = im_scale * float(im_shape[1])
  596. im_scale_x = im_scale
  597. im_scale_y = im_scale
  598. else:
  599. resize_h, resize_w = self.target_size
  600. im_scale_y = resize_h / im_shape[0]
  601. im_scale_x = resize_w / im_shape[1]
  602. im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
  603. sample['image'] = im
  604. sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
  605. if 'scale_factor' in sample:
  606. scale_factor = sample['scale_factor']
  607. sample['scale_factor'] = np.asarray(
  608. [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
  609. dtype=np.float32)
  610. else:
  611. sample['scale_factor'] = np.asarray(
  612. [im_scale_y, im_scale_x], dtype=np.float32)
  613. # apply bbox
  614. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  615. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'],
  616. [im_scale_x, im_scale_y],
  617. [resize_w, resize_h])
  618. # apply rbox
  619. if 'gt_rbox2poly' in sample:
  620. if np.array(sample['gt_rbox2poly']).shape[1] != 8:
  621. logger.warn(
  622. "gt_rbox2poly's length shoule be 8, but actually is {}".
  623. format(len(sample['gt_rbox2poly'])))
  624. sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
  625. [im_scale_x, im_scale_y],
  626. [resize_w, resize_h])
  627. # apply polygon
  628. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  629. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
  630. [im_scale_x, im_scale_y])
  631. # apply semantic
  632. if 'semantic' in sample and sample['semantic']:
  633. semantic = sample['semantic']
  634. semantic = cv2.resize(
  635. semantic.astype('float32'),
  636. None,
  637. None,
  638. fx=im_scale_x,
  639. fy=im_scale_y,
  640. interpolation=self.interp)
  641. semantic = np.asarray(semantic).astype('int32')
  642. semantic = np.expand_dims(semantic, 0)
  643. sample['semantic'] = semantic
  644. # apply gt_segm
  645. if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
  646. masks = [
  647. cv2.resize(
  648. gt_segm,
  649. None,
  650. None,
  651. fx=im_scale_x,
  652. fy=im_scale_y,
  653. interpolation=cv2.INTER_NEAREST)
  654. for gt_segm in sample['gt_segm']
  655. ]
  656. sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
  657. return sample
  658. @register_op
  659. class MultiscaleTestResize(BaseOperator):
  660. def __init__(self,
  661. origin_target_size=[800, 1333],
  662. target_size=[],
  663. interp=cv2.INTER_LINEAR,
  664. use_flip=True):
  665. """
  666. Rescale image to the each size in target size, and capped at max_size.
  667. Args:
  668. origin_target_size (list): origin target size of image
  669. target_size (list): A list of target sizes of image.
  670. interp (int): the interpolation method.
  671. use_flip (bool): whether use flip augmentation.
  672. """
  673. super(MultiscaleTestResize, self).__init__()
  674. self.interp = interp
  675. self.use_flip = use_flip
  676. if not isinstance(target_size, Sequence):
  677. raise TypeError(
  678. "Type of target_size is invalid. Must be List or Tuple, now is {}".
  679. format(type(target_size)))
  680. self.target_size = target_size
  681. if not isinstance(origin_target_size, Sequence):
  682. raise TypeError(
  683. "Type of origin_target_size is invalid. Must be List or Tuple, now is {}".
  684. format(type(origin_target_size)))
  685. self.origin_target_size = origin_target_size
  686. def apply(self, sample, context=None):
  687. """ Resize the image numpy for multi-scale test.
  688. """
  689. samples = []
  690. resizer = Resize(
  691. self.origin_target_size, keep_ratio=True, interp=self.interp)
  692. samples.append(resizer(sample.copy(), context))
  693. if self.use_flip:
  694. flipper = RandomFlip(1.1)
  695. samples.append(flipper(sample.copy(), context=context))
  696. for size in self.target_size:
  697. resizer = Resize(size, keep_ratio=True, interp=self.interp)
  698. samples.append(resizer(sample.copy(), context))
  699. return samples
  700. @register_op
  701. class RandomResize(BaseOperator):
  702. def __init__(self,
  703. target_size,
  704. keep_ratio=True,
  705. interp=cv2.INTER_LINEAR,
  706. random_size=True,
  707. random_interp=False):
  708. """
  709. Resize image to target size randomly. random target_size and interpolation method
  710. Args:
  711. target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
  712. keep_ratio (bool): whether keep_raio or not, default true
  713. interp (int): the interpolation method
  714. random_size (bool): whether random select target size of image
  715. random_interp (bool): whether random select interpolation method
  716. """
  717. super(RandomResize, self).__init__()
  718. self.keep_ratio = keep_ratio
  719. self.interp = interp
  720. self.interps = [
  721. cv2.INTER_NEAREST,
  722. cv2.INTER_LINEAR,
  723. cv2.INTER_AREA,
  724. cv2.INTER_CUBIC,
  725. cv2.INTER_LANCZOS4,
  726. ]
  727. assert isinstance(target_size, (
  728. Integral, Sequence)), "target_size must be Integer, List or Tuple"
  729. if random_size and not isinstance(target_size, Sequence):
  730. raise TypeError(
  731. "Type of target_size is invalid when random_size is True. Must be List or Tuple, now is {}".
  732. format(type(target_size)))
  733. self.target_size = target_size
  734. self.random_size = random_size
  735. self.random_interp = random_interp
  736. def apply(self, sample, context=None):
  737. """ Resize the image numpy.
  738. """
  739. if self.random_size:
  740. target_size = random.choice(self.target_size)
  741. else:
  742. target_size = self.target_size
  743. if self.random_interp:
  744. interp = random.choice(self.interps)
  745. else:
  746. interp = self.interp
  747. resizer = Resize(target_size, self.keep_ratio, interp)
  748. return resizer(sample, context=context)
  749. @register_op
  750. class RandomExpand(BaseOperator):
  751. """Random expand the canvas.
  752. Args:
  753. ratio (float): maximum expansion ratio.
  754. prob (float): probability to expand.
  755. fill_value (list): color value used to fill the canvas. in RGB order.
  756. """
  757. def __init__(self, ratio=4., prob=0.5, fill_value=(127.5, 127.5, 127.5)):
  758. super(RandomExpand, self).__init__()
  759. assert ratio > 1.01, "expand ratio must be larger than 1.01"
  760. self.ratio = ratio
  761. self.prob = prob
  762. assert isinstance(fill_value, (Number, Sequence)), \
  763. "fill value must be either float or sequence"
  764. if isinstance(fill_value, Number):
  765. fill_value = (fill_value, ) * 3
  766. if not isinstance(fill_value, tuple):
  767. fill_value = tuple(fill_value)
  768. self.fill_value = fill_value
  769. def apply(self, sample, context=None):
  770. if np.random.uniform(0., 1.) < self.prob:
  771. return sample
  772. im = sample['image']
  773. height, width = im.shape[:2]
  774. ratio = np.random.uniform(1., self.ratio)
  775. h = int(height * ratio)
  776. w = int(width * ratio)
  777. if not h > height or not w > width:
  778. return sample
  779. y = np.random.randint(0, h - height)
  780. x = np.random.randint(0, w - width)
  781. offsets, size = [x, y], [h, w]
  782. pad = Pad(size,
  783. pad_mode=-1,
  784. offsets=offsets,
  785. fill_value=self.fill_value)
  786. return pad(sample, context=context)
  787. @register_op
  788. class CropWithSampling(BaseOperator):
  789. def __init__(self, batch_sampler, satisfy_all=False, avoid_no_bbox=True):
  790. """
  791. Args:
  792. batch_sampler (list): Multiple sets of different
  793. parameters for cropping.
  794. satisfy_all (bool): whether all boxes must satisfy.
  795. e.g.[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
  796. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
  797. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
  798. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
  799. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
  800. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
  801. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
  802. [max sample, max trial, min scale, max scale,
  803. min aspect ratio, max aspect ratio,
  804. min overlap, max overlap]
  805. avoid_no_bbox (bool): whether to to avoid the
  806. situation where the box does not appear.
  807. """
  808. super(CropWithSampling, self).__init__()
  809. self.batch_sampler = batch_sampler
  810. self.satisfy_all = satisfy_all
  811. self.avoid_no_bbox = avoid_no_bbox
  812. def apply(self, sample, context):
  813. """
  814. Crop the image and modify bounding box.
  815. Operators:
  816. 1. Scale the image width and height.
  817. 2. Crop the image according to a radom sample.
  818. 3. Rescale the bounding box.
  819. 4. Determine if the new bbox is satisfied in the new image.
  820. Returns:
  821. sample: the image, bounding box are replaced.
  822. """
  823. assert 'image' in sample, "image data not found"
  824. im = sample['image']
  825. gt_bbox = sample['gt_bbox']
  826. gt_class = sample['gt_class']
  827. im_height, im_width = im.shape[:2]
  828. gt_score = None
  829. if 'gt_score' in sample:
  830. gt_score = sample['gt_score']
  831. sampled_bbox = []
  832. gt_bbox = gt_bbox.tolist()
  833. for sampler in self.batch_sampler:
  834. found = 0
  835. for i in range(sampler[1]):
  836. if found >= sampler[0]:
  837. break
  838. sample_bbox = generate_sample_bbox(sampler)
  839. if satisfy_sample_constraint(sampler, sample_bbox, gt_bbox,
  840. self.satisfy_all):
  841. sampled_bbox.append(sample_bbox)
  842. found = found + 1
  843. im = np.array(im)
  844. while sampled_bbox:
  845. idx = int(np.random.uniform(0, len(sampled_bbox)))
  846. sample_bbox = sampled_bbox.pop(idx)
  847. sample_bbox = clip_bbox(sample_bbox)
  848. crop_bbox, crop_class, crop_score = \
  849. filter_and_process(sample_bbox, gt_bbox, gt_class, scores=gt_score)
  850. if self.avoid_no_bbox:
  851. if len(crop_bbox) < 1:
  852. continue
  853. xmin = int(sample_bbox[0] * im_width)
  854. xmax = int(sample_bbox[2] * im_width)
  855. ymin = int(sample_bbox[1] * im_height)
  856. ymax = int(sample_bbox[3] * im_height)
  857. im = im[ymin:ymax, xmin:xmax]
  858. sample['image'] = im
  859. sample['gt_bbox'] = crop_bbox
  860. sample['gt_class'] = crop_class
  861. sample['gt_score'] = crop_score
  862. return sample
  863. return sample
  864. @register_op
  865. class CropWithDataAchorSampling(BaseOperator):
  866. def __init__(self,
  867. batch_sampler,
  868. anchor_sampler=None,
  869. target_size=None,
  870. das_anchor_scales=[16, 32, 64, 128],
  871. sampling_prob=0.5,
  872. min_size=8.,
  873. avoid_no_bbox=True):
  874. """
  875. Args:
  876. anchor_sampler (list): anchor_sampling sets of different
  877. parameters for cropping.
  878. batch_sampler (list): Multiple sets of different
  879. parameters for cropping.
  880. e.g.[[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]]
  881. [[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  882. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  883. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  884. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  885. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]]
  886. [max sample, max trial, min scale, max scale,
  887. min aspect ratio, max aspect ratio,
  888. min overlap, max overlap, min coverage, max coverage]
  889. target_size (int): target image size.
  890. das_anchor_scales (list[float]): a list of anchor scales in data
  891. anchor smapling.
  892. min_size (float): minimum size of sampled bbox.
  893. avoid_no_bbox (bool): whether to to avoid the
  894. situation where the box does not appear.
  895. """
  896. super(CropWithDataAchorSampling, self).__init__()
  897. self.anchor_sampler = anchor_sampler
  898. self.batch_sampler = batch_sampler
  899. self.target_size = target_size
  900. self.sampling_prob = sampling_prob
  901. self.min_size = min_size
  902. self.avoid_no_bbox = avoid_no_bbox
  903. self.das_anchor_scales = np.array(das_anchor_scales)
  904. def apply(self, sample, context):
  905. """
  906. Crop the image and modify bounding box.
  907. Operators:
  908. 1. Scale the image width and height.
  909. 2. Crop the image according to a radom sample.
  910. 3. Rescale the bounding box.
  911. 4. Determine if the new bbox is satisfied in the new image.
  912. Returns:
  913. sample: the image, bounding box are replaced.
  914. """
  915. assert 'image' in sample, "image data not found"
  916. im = sample['image']
  917. gt_bbox = sample['gt_bbox']
  918. gt_class = sample['gt_class']
  919. image_height, image_width = im.shape[:2]
  920. gt_bbox[:, 0] /= image_width
  921. gt_bbox[:, 1] /= image_height
  922. gt_bbox[:, 2] /= image_width
  923. gt_bbox[:, 3] /= image_height
  924. gt_score = None
  925. if 'gt_score' in sample:
  926. gt_score = sample['gt_score']
  927. sampled_bbox = []
  928. gt_bbox = gt_bbox.tolist()
  929. prob = np.random.uniform(0., 1.)
  930. if prob > self.sampling_prob: # anchor sampling
  931. assert self.anchor_sampler
  932. for sampler in self.anchor_sampler:
  933. found = 0
  934. for i in range(sampler[1]):
  935. if found >= sampler[0]:
  936. break
  937. sample_bbox = data_anchor_sampling(
  938. gt_bbox, image_width, image_height,
  939. self.das_anchor_scales, self.target_size)
  940. if sample_bbox == 0:
  941. break
  942. if satisfy_sample_constraint_coverage(sampler, sample_bbox,
  943. gt_bbox):
  944. sampled_bbox.append(sample_bbox)
  945. found = found + 1
  946. im = np.array(im)
  947. while sampled_bbox:
  948. idx = int(np.random.uniform(0, len(sampled_bbox)))
  949. sample_bbox = sampled_bbox.pop(idx)
  950. if 'gt_keypoint' in sample.keys():
  951. keypoints = (sample['gt_keypoint'],
  952. sample['keypoint_ignore'])
  953. crop_bbox, crop_class, crop_score, gt_keypoints = \
  954. filter_and_process(sample_bbox, gt_bbox, gt_class,
  955. scores=gt_score,
  956. keypoints=keypoints)
  957. else:
  958. crop_bbox, crop_class, crop_score = filter_and_process(
  959. sample_bbox, gt_bbox, gt_class, scores=gt_score)
  960. crop_bbox, crop_class, crop_score = bbox_area_sampling(
  961. crop_bbox, crop_class, crop_score, self.target_size,
  962. self.min_size)
  963. if self.avoid_no_bbox:
  964. if len(crop_bbox) < 1:
  965. continue
  966. im = crop_image_sampling(im, sample_bbox, image_width,
  967. image_height, self.target_size)
  968. height, width = im.shape[:2]
  969. crop_bbox[:, 0] *= width
  970. crop_bbox[:, 1] *= height
  971. crop_bbox[:, 2] *= width
  972. crop_bbox[:, 3] *= height
  973. sample['image'] = im
  974. sample['gt_bbox'] = crop_bbox
  975. sample['gt_class'] = crop_class
  976. if 'gt_score' in sample:
  977. sample['gt_score'] = crop_score
  978. if 'gt_keypoint' in sample.keys():
  979. sample['gt_keypoint'] = gt_keypoints[0]
  980. sample['keypoint_ignore'] = gt_keypoints[1]
  981. return sample
  982. return sample
  983. else:
  984. for sampler in self.batch_sampler:
  985. found = 0
  986. for i in range(sampler[1]):
  987. if found >= sampler[0]:
  988. break
  989. sample_bbox = generate_sample_bbox_square(
  990. sampler, image_width, image_height)
  991. if satisfy_sample_constraint_coverage(sampler, sample_bbox,
  992. gt_bbox):
  993. sampled_bbox.append(sample_bbox)
  994. found = found + 1
  995. im = np.array(im)
  996. while sampled_bbox:
  997. idx = int(np.random.uniform(0, len(sampled_bbox)))
  998. sample_bbox = sampled_bbox.pop(idx)
  999. sample_bbox = clip_bbox(sample_bbox)
  1000. if 'gt_keypoint' in sample.keys():
  1001. keypoints = (sample['gt_keypoint'],
  1002. sample['keypoint_ignore'])
  1003. crop_bbox, crop_class, crop_score, gt_keypoints = \
  1004. filter_and_process(sample_bbox, gt_bbox, gt_class,
  1005. scores=gt_score,
  1006. keypoints=keypoints)
  1007. else:
  1008. crop_bbox, crop_class, crop_score = filter_and_process(
  1009. sample_bbox, gt_bbox, gt_class, scores=gt_score)
  1010. # sampling bbox according the bbox area
  1011. crop_bbox, crop_class, crop_score = bbox_area_sampling(
  1012. crop_bbox, crop_class, crop_score, self.target_size,
  1013. self.min_size)
  1014. if self.avoid_no_bbox:
  1015. if len(crop_bbox) < 1:
  1016. continue
  1017. xmin = int(sample_bbox[0] * image_width)
  1018. xmax = int(sample_bbox[2] * image_width)
  1019. ymin = int(sample_bbox[1] * image_height)
  1020. ymax = int(sample_bbox[3] * image_height)
  1021. im = im[ymin:ymax, xmin:xmax]
  1022. height, width = im.shape[:2]
  1023. crop_bbox[:, 0] *= width
  1024. crop_bbox[:, 1] *= height
  1025. crop_bbox[:, 2] *= width
  1026. crop_bbox[:, 3] *= height
  1027. sample['image'] = im
  1028. sample['gt_bbox'] = crop_bbox
  1029. sample['gt_class'] = crop_class
  1030. if 'gt_score' in sample:
  1031. sample['gt_score'] = crop_score
  1032. if 'gt_keypoint' in sample.keys():
  1033. sample['gt_keypoint'] = gt_keypoints[0]
  1034. sample['keypoint_ignore'] = gt_keypoints[1]
  1035. return sample
  1036. return sample
  1037. @register_op
  1038. class RandomCrop(BaseOperator):
  1039. """Random crop image and bboxes.
  1040. Args:
  1041. aspect_ratio (list): aspect ratio of cropped region.
  1042. in [min, max] format.
  1043. thresholds (list): iou thresholds for decide a valid bbox crop.
  1044. scaling (list): ratio between a cropped region and the original image.
  1045. in [min, max] format.
  1046. num_attempts (int): number of tries before giving up.
  1047. allow_no_crop (bool): allow return without actually cropping them.
  1048. cover_all_box (bool): ensure all bboxes are covered in the final crop.
  1049. is_mask_crop(bool): whether crop the segmentation.
  1050. """
  1051. def __init__(self,
  1052. aspect_ratio=[.5, 2.],
  1053. thresholds=[.0, .1, .3, .5, .7, .9],
  1054. scaling=[.3, 1.],
  1055. num_attempts=50,
  1056. allow_no_crop=True,
  1057. cover_all_box=False,
  1058. is_mask_crop=False):
  1059. super(RandomCrop, self).__init__()
  1060. self.aspect_ratio = aspect_ratio
  1061. self.thresholds = thresholds
  1062. self.scaling = scaling
  1063. self.num_attempts = num_attempts
  1064. self.allow_no_crop = allow_no_crop
  1065. self.cover_all_box = cover_all_box
  1066. self.is_mask_crop = is_mask_crop
  1067. def crop_segms(self, segms, valid_ids, crop, height, width):
  1068. def _crop_poly(segm, crop):
  1069. xmin, ymin, xmax, ymax = crop
  1070. crop_coord = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]
  1071. crop_p = np.array(crop_coord).reshape(4, 2)
  1072. crop_p = Polygon(crop_p)
  1073. crop_segm = list()
  1074. for poly in segm:
  1075. poly = np.array(poly).reshape(len(poly) // 2, 2)
  1076. polygon = Polygon(poly)
  1077. if not polygon.is_valid:
  1078. exterior = polygon.exterior
  1079. multi_lines = exterior.intersection(exterior)
  1080. polygons = shapely.ops.polygonize(multi_lines)
  1081. polygon = MultiPolygon(polygons)
  1082. multi_polygon = list()
  1083. if isinstance(polygon, MultiPolygon):
  1084. multi_polygon = copy.deepcopy(polygon)
  1085. else:
  1086. multi_polygon.append(copy.deepcopy(polygon))
  1087. for per_polygon in multi_polygon:
  1088. inter = per_polygon.intersection(crop_p)
  1089. if not inter:
  1090. continue
  1091. if isinstance(inter, (MultiPolygon, GeometryCollection)):
  1092. for part in inter:
  1093. if not isinstance(part, Polygon):
  1094. continue
  1095. part = np.squeeze(
  1096. np.array(part.exterior.coords[:-1]).reshape(1,
  1097. -1))
  1098. part[0::2] -= xmin
  1099. part[1::2] -= ymin
  1100. crop_segm.append(part.tolist())
  1101. elif isinstance(inter, Polygon):
  1102. crop_poly = np.squeeze(
  1103. np.array(inter.exterior.coords[:-1]).reshape(1, -1))
  1104. crop_poly[0::2] -= xmin
  1105. crop_poly[1::2] -= ymin
  1106. crop_segm.append(crop_poly.tolist())
  1107. else:
  1108. continue
  1109. return crop_segm
  1110. def _crop_rle(rle, crop, height, width):
  1111. if 'counts' in rle and type(rle['counts']) == list:
  1112. rle = mask_util.frPyObjects(rle, height, width)
  1113. mask = mask_util.decode(rle)
  1114. mask = mask[crop[1]:crop[3], crop[0]:crop[2]]
  1115. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  1116. return rle
  1117. crop_segms = []
  1118. for id in valid_ids:
  1119. segm = segms[id]
  1120. if is_poly(segm):
  1121. import copy
  1122. import shapely.ops
  1123. from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
  1124. logging.getLogger("shapely").setLevel(logging.WARNING)
  1125. # Polygon format
  1126. crop_segms.append(_crop_poly(segm, crop))
  1127. else:
  1128. # RLE format
  1129. import pycocotools.mask as mask_util
  1130. crop_segms.append(_crop_rle(segm, crop, height, width))
  1131. return crop_segms
  1132. def apply(self, sample, context=None):
  1133. if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
  1134. return sample
  1135. h, w = sample['image'].shape[:2]
  1136. gt_bbox = sample['gt_bbox']
  1137. # NOTE Original method attempts to generate one candidate for each
  1138. # threshold then randomly sample one from the resulting list.
  1139. # Here a short circuit approach is taken, i.e., randomly choose a
  1140. # threshold and attempt to find a valid crop, and simply return the
  1141. # first one found.
  1142. # The probability is not exactly the same, kinda resembling the
  1143. # "Monty Hall" problem. Actually carrying out the attempts will affect
  1144. # observability (just like opening doors in the "Monty Hall" game).
  1145. thresholds = list(self.thresholds)
  1146. if self.allow_no_crop:
  1147. thresholds.append('no_crop')
  1148. np.random.shuffle(thresholds)
  1149. for thresh in thresholds:
  1150. if thresh == 'no_crop':
  1151. return sample
  1152. found = False
  1153. for i in range(self.num_attempts):
  1154. scale = np.random.uniform(*self.scaling)
  1155. if self.aspect_ratio is not None:
  1156. min_ar, max_ar = self.aspect_ratio
  1157. aspect_ratio = np.random.uniform(
  1158. max(min_ar, scale**2), min(max_ar, scale**-2))
  1159. h_scale = scale / np.sqrt(aspect_ratio)
  1160. w_scale = scale * np.sqrt(aspect_ratio)
  1161. else:
  1162. h_scale = np.random.uniform(*self.scaling)
  1163. w_scale = np.random.uniform(*self.scaling)
  1164. crop_h = h * h_scale
  1165. crop_w = w * w_scale
  1166. if self.aspect_ratio is None:
  1167. if crop_h / crop_w < 0.5 or crop_h / crop_w > 2.0:
  1168. continue
  1169. crop_h = int(crop_h)
  1170. crop_w = int(crop_w)
  1171. crop_y = np.random.randint(0, h - crop_h)
  1172. crop_x = np.random.randint(0, w - crop_w)
  1173. crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
  1174. iou = self._iou_matrix(
  1175. gt_bbox, np.array(
  1176. [crop_box], dtype=np.float32))
  1177. if iou.max() < thresh:
  1178. continue
  1179. if self.cover_all_box and iou.min() < thresh:
  1180. continue
  1181. cropped_box, valid_ids = self._crop_box_with_center_constraint(
  1182. gt_bbox, np.array(
  1183. crop_box, dtype=np.float32))
  1184. if valid_ids.size > 0:
  1185. found = True
  1186. break
  1187. if found:
  1188. if self.is_mask_crop and 'gt_poly' in sample and len(sample[
  1189. 'gt_poly']) > 0:
  1190. crop_polys = self.crop_segms(
  1191. sample['gt_poly'],
  1192. valid_ids,
  1193. np.array(
  1194. crop_box, dtype=np.int64),
  1195. h,
  1196. w)
  1197. if [] in crop_polys:
  1198. delete_id = list()
  1199. valid_polys = list()
  1200. for id, crop_poly in enumerate(crop_polys):
  1201. if crop_poly == []:
  1202. delete_id.append(id)
  1203. else:
  1204. valid_polys.append(crop_poly)
  1205. valid_ids = np.delete(valid_ids, delete_id)
  1206. if len(valid_polys) == 0:
  1207. return sample
  1208. sample['gt_poly'] = valid_polys
  1209. else:
  1210. sample['gt_poly'] = crop_polys
  1211. if 'gt_segm' in sample:
  1212. sample['gt_segm'] = self._crop_segm(sample['gt_segm'],
  1213. crop_box)
  1214. sample['gt_segm'] = np.take(
  1215. sample['gt_segm'], valid_ids, axis=0)
  1216. sample['image'] = self._crop_image(sample['image'], crop_box)
  1217. sample['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
  1218. sample['gt_class'] = np.take(
  1219. sample['gt_class'], valid_ids, axis=0)
  1220. if 'gt_score' in sample:
  1221. sample['gt_score'] = np.take(
  1222. sample['gt_score'], valid_ids, axis=0)
  1223. if 'is_crowd' in sample:
  1224. sample['is_crowd'] = np.take(
  1225. sample['is_crowd'], valid_ids, axis=0)
  1226. return sample
  1227. return sample
  1228. def _iou_matrix(self, a, b):
  1229. tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
  1230. br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
  1231. area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2)
  1232. area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
  1233. area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
  1234. area_o = (area_a[:, np.newaxis] + area_b - area_i)
  1235. return area_i / (area_o + 1e-10)
  1236. def _crop_box_with_center_constraint(self, box, crop):
  1237. cropped_box = box.copy()
  1238. cropped_box[:, :2] = np.maximum(box[:, :2], crop[:2])
  1239. cropped_box[:, 2:] = np.minimum(box[:, 2:], crop[2:])
  1240. cropped_box[:, :2] -= crop[:2]
  1241. cropped_box[:, 2:] -= crop[:2]
  1242. centers = (box[:, :2] + box[:, 2:]) / 2
  1243. valid = np.logical_and(crop[:2] <= centers,
  1244. centers < crop[2:]).all(axis=1)
  1245. valid = np.logical_and(
  1246. valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
  1247. return cropped_box, np.where(valid)[0]
  1248. def _crop_image(self, img, crop):
  1249. x1, y1, x2, y2 = crop
  1250. return img[y1:y2, x1:x2, :]
  1251. def _crop_segm(self, segm, crop):
  1252. x1, y1, x2, y2 = crop
  1253. return segm[:, y1:y2, x1:x2]
  1254. @register_op
  1255. class RandomScaledCrop(BaseOperator):
  1256. """Resize image and bbox based on long side (with optional random scaling),
  1257. then crop or pad image to target size.
  1258. Args:
  1259. target_dim (int): target size.
  1260. scale_range (list): random scale range.
  1261. interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
  1262. """
  1263. def __init__(self,
  1264. target_dim=512,
  1265. scale_range=[.1, 2.],
  1266. interp=cv2.INTER_LINEAR):
  1267. super(RandomScaledCrop, self).__init__()
  1268. self.target_dim = target_dim
  1269. self.scale_range = scale_range
  1270. self.interp = interp
  1271. def apply(self, sample, context=None):
  1272. img = sample['image']
  1273. h, w = img.shape[:2]
  1274. random_scale = np.random.uniform(*self.scale_range)
  1275. dim = self.target_dim
  1276. random_dim = int(dim * random_scale)
  1277. dim_max = max(h, w)
  1278. scale = random_dim / dim_max
  1279. resize_w = w * scale
  1280. resize_h = h * scale
  1281. offset_x = int(max(0, np.random.uniform(0., resize_w - dim)))
  1282. offset_y = int(max(0, np.random.uniform(0., resize_h - dim)))
  1283. img = cv2.resize(img, (resize_w, resize_h), interpolation=self.interp)
  1284. img = np.array(img)
  1285. canvas = np.zeros((dim, dim, 3), dtype=img.dtype)
  1286. canvas[:min(dim, resize_h), :min(dim, resize_w), :] = img[
  1287. offset_y:offset_y + dim, offset_x:offset_x + dim, :]
  1288. sample['image'] = canvas
  1289. sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
  1290. scale_factor = sample['sacle_factor']
  1291. sample['scale_factor'] = np.asarray(
  1292. [scale_factor[0] * scale, scale_factor[1] * scale],
  1293. dtype=np.float32)
  1294. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  1295. scale_array = np.array([scale, scale] * 2, dtype=np.float32)
  1296. shift_array = np.array([offset_x, offset_y] * 2, dtype=np.float32)
  1297. boxes = sample['gt_bbox'] * scale_array - shift_array
  1298. boxes = np.clip(boxes, 0, dim - 1)
  1299. # filter boxes with no area
  1300. area = np.prod(boxes[..., 2:] - boxes[..., :2], axis=1)
  1301. valid = (area > 1.).nonzero()[0]
  1302. sample['gt_bbox'] = boxes[valid]
  1303. sample['gt_class'] = sample['gt_class'][valid]
  1304. return sample
  1305. @register_op
  1306. class Cutmix(BaseOperator):
  1307. def __init__(self, alpha=1.5, beta=1.5):
  1308. """
  1309. CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
  1310. Cutmix image and gt_bbbox/gt_score
  1311. Args:
  1312. alpha (float): alpha parameter of beta distribute
  1313. beta (float): beta parameter of beta distribute
  1314. """
  1315. super(Cutmix, self).__init__()
  1316. self.alpha = alpha
  1317. self.beta = beta
  1318. if self.alpha <= 0.0:
  1319. raise ValueError("alpha shold be positive in {}".format(self))
  1320. if self.beta <= 0.0:
  1321. raise ValueError("beta shold be positive in {}".format(self))
  1322. def apply_image(self, img1, img2, factor):
  1323. """ _rand_bbox """
  1324. h = max(img1.shape[0], img2.shape[0])
  1325. w = max(img1.shape[1], img2.shape[1])
  1326. cut_rat = np.sqrt(1. - factor)
  1327. cut_w = np.int(w * cut_rat)
  1328. cut_h = np.int(h * cut_rat)
  1329. # uniform
  1330. cx = np.random.randint(w)
  1331. cy = np.random.randint(h)
  1332. bbx1 = np.clip(cx - cut_w // 2, 0, w - 1)
  1333. bby1 = np.clip(cy - cut_h // 2, 0, h - 1)
  1334. bbx2 = np.clip(cx + cut_w // 2, 0, w - 1)
  1335. bby2 = np.clip(cy + cut_h // 2, 0, h - 1)
  1336. img_1_pad = np.zeros((h, w, img1.shape[2]), 'float32')
  1337. img_1_pad[:img1.shape[0], :img1.shape[1], :] = \
  1338. img1.astype('float32')
  1339. img_2_pad = np.zeros((h, w, img2.shape[2]), 'float32')
  1340. img_2_pad[:img2.shape[0], :img2.shape[1], :] = \
  1341. img2.astype('float32')
  1342. img_1_pad[bby1:bby2, bbx1:bbx2, :] = img_2_pad[bby1:bby2, bbx1:bbx2, :]
  1343. return img_1_pad
  1344. def __call__(self, sample, context=None):
  1345. if not isinstance(sample, Sequence):
  1346. return sample
  1347. assert len(sample) == 2, 'cutmix need two samples'
  1348. factor = np.random.beta(self.alpha, self.beta)
  1349. factor = max(0.0, min(1.0, factor))
  1350. if factor >= 1.0:
  1351. return sample[0]
  1352. if factor <= 0.0:
  1353. return sample[1]
  1354. img1 = sample[0]['image']
  1355. img2 = sample[1]['image']
  1356. img = self.apply_image(img1, img2, factor)
  1357. gt_bbox1 = sample[0]['gt_bbox']
  1358. gt_bbox2 = sample[1]['gt_bbox']
  1359. gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
  1360. gt_class1 = sample[0]['gt_class']
  1361. gt_class2 = sample[1]['gt_class']
  1362. gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
  1363. gt_score1 = np.ones_like(sample[0]['gt_class'])
  1364. gt_score2 = np.ones_like(sample[1]['gt_class'])
  1365. gt_score = np.concatenate(
  1366. (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
  1367. result = copy.deepcopy(sample[0])
  1368. result['image'] = img
  1369. result['gt_bbox'] = gt_bbox
  1370. result['gt_score'] = gt_score
  1371. result['gt_class'] = gt_class
  1372. if 'is_crowd' in sample[0]:
  1373. is_crowd1 = sample[0]['is_crowd']
  1374. is_crowd2 = sample[1]['is_crowd']
  1375. is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
  1376. result['is_crowd'] = is_crowd
  1377. if 'difficult' in sample[0]:
  1378. is_difficult1 = sample[0]['difficult']
  1379. is_difficult2 = sample[1]['difficult']
  1380. is_difficult = np.concatenate(
  1381. (is_difficult1, is_difficult2), axis=0)
  1382. result['difficult'] = is_difficult
  1383. return result
  1384. @register_op
  1385. class Mixup(BaseOperator):
  1386. def __init__(self, alpha=1.5, beta=1.5):
  1387. """ Mixup image and gt_bbbox/gt_score
  1388. Args:
  1389. alpha (float): alpha parameter of beta distribute
  1390. beta (float): beta parameter of beta distribute
  1391. """
  1392. super(Mixup, self).__init__()
  1393. self.alpha = alpha
  1394. self.beta = beta
  1395. if self.alpha <= 0.0:
  1396. raise ValueError("alpha shold be positive in {}".format(self))
  1397. if self.beta <= 0.0:
  1398. raise ValueError("beta shold be positive in {}".format(self))
  1399. def apply_image(self, img1, img2, factor):
  1400. h = max(img1.shape[0], img2.shape[0])
  1401. w = max(img1.shape[1], img2.shape[1])
  1402. img = np.zeros((h, w, img1.shape[2]), 'float32')
  1403. img[:img1.shape[0], :img1.shape[1], :] = \
  1404. img1.astype('float32') * factor
  1405. img[:img2.shape[0], :img2.shape[1], :] += \
  1406. img2.astype('float32') * (1.0 - factor)
  1407. return img.astype('uint8')
  1408. def __call__(self, sample, context=None):
  1409. if not isinstance(sample, Sequence):
  1410. return sample
  1411. assert len(sample) == 2, 'mixup need two samples'
  1412. factor = np.random.beta(self.alpha, self.beta)
  1413. factor = max(0.0, min(1.0, factor))
  1414. if factor >= 1.0:
  1415. return sample[0]
  1416. if factor <= 0.0:
  1417. return sample[1]
  1418. im = self.apply_image(sample[0]['image'], sample[1]['image'], factor)
  1419. result = copy.deepcopy(sample[0])
  1420. result['image'] = im
  1421. # apply bbox and score
  1422. if 'gt_bbox' in sample[0]:
  1423. gt_bbox1 = sample[0]['gt_bbox']
  1424. gt_bbox2 = sample[1]['gt_bbox']
  1425. gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
  1426. result['gt_bbox'] = gt_bbox
  1427. if 'gt_class' in sample[0]:
  1428. gt_class1 = sample[0]['gt_class']
  1429. gt_class2 = sample[1]['gt_class']
  1430. gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
  1431. result['gt_class'] = gt_class
  1432. gt_score1 = np.ones_like(sample[0]['gt_class'])
  1433. gt_score2 = np.ones_like(sample[1]['gt_class'])
  1434. gt_score = np.concatenate(
  1435. (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
  1436. result['gt_score'] = gt_score
  1437. if 'is_crowd' in sample[0]:
  1438. is_crowd1 = sample[0]['is_crowd']
  1439. is_crowd2 = sample[1]['is_crowd']
  1440. is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
  1441. result['is_crowd'] = is_crowd
  1442. if 'difficult' in sample[0]:
  1443. is_difficult1 = sample[0]['difficult']
  1444. is_difficult2 = sample[1]['difficult']
  1445. is_difficult = np.concatenate(
  1446. (is_difficult1, is_difficult2), axis=0)
  1447. result['difficult'] = is_difficult
  1448. if 'gt_ide' in sample[0]:
  1449. gt_ide1 = sample[0]['gt_ide']
  1450. gt_ide2 = sample[1]['gt_ide']
  1451. gt_ide = np.concatenate((gt_ide1, gt_ide2), axis=0)
  1452. result['gt_ide'] = gt_ide
  1453. return result
  1454. @register_op
  1455. class NormalizeBox(BaseOperator):
  1456. """Transform the bounding box's coornidates to [0,1]."""
  1457. def __init__(self):
  1458. super(NormalizeBox, self).__init__()
  1459. def apply(self, sample, context):
  1460. im = sample['image']
  1461. gt_bbox = sample['gt_bbox']
  1462. height, width, _ = im.shape
  1463. for i in range(gt_bbox.shape[0]):
  1464. gt_bbox[i][0] = gt_bbox[i][0] / width
  1465. gt_bbox[i][1] = gt_bbox[i][1] / height
  1466. gt_bbox[i][2] = gt_bbox[i][2] / width
  1467. gt_bbox[i][3] = gt_bbox[i][3] / height
  1468. sample['gt_bbox'] = gt_bbox
  1469. if 'gt_keypoint' in sample.keys():
  1470. gt_keypoint = sample['gt_keypoint']
  1471. for i in range(gt_keypoint.shape[1]):
  1472. if i % 2:
  1473. gt_keypoint[:, i] = gt_keypoint[:, i] / height
  1474. else:
  1475. gt_keypoint[:, i] = gt_keypoint[:, i] / width
  1476. sample['gt_keypoint'] = gt_keypoint
  1477. return sample
  1478. @register_op
  1479. class BboxXYXY2XYWH(BaseOperator):
  1480. """
  1481. Convert bbox XYXY format to XYWH format.
  1482. """
  1483. def __init__(self):
  1484. super(BboxXYXY2XYWH, self).__init__()
  1485. def apply(self, sample, context=None):
  1486. assert 'gt_bbox' in sample
  1487. bbox = sample['gt_bbox']
  1488. bbox[:, 2:4] = bbox[:, 2:4] - bbox[:, :2]
  1489. bbox[:, :2] = bbox[:, :2] + bbox[:, 2:4] / 2.
  1490. sample['gt_bbox'] = bbox
  1491. return sample
  1492. @register_op
  1493. class PadBox(BaseOperator):
  1494. def __init__(self, num_max_boxes=50):
  1495. """
  1496. Pad zeros to bboxes if number of bboxes is less than num_max_boxes.
  1497. Args:
  1498. num_max_boxes (int): the max number of bboxes
  1499. """
  1500. self.num_max_boxes = num_max_boxes
  1501. super(PadBox, self).__init__()
  1502. def apply(self, sample, context=None):
  1503. assert 'gt_bbox' in sample
  1504. bbox = sample['gt_bbox']
  1505. gt_num = min(self.num_max_boxes, len(bbox))
  1506. num_max = self.num_max_boxes
  1507. # fields = context['fields'] if context else []
  1508. pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
  1509. if gt_num > 0:
  1510. pad_bbox[:gt_num, :] = bbox[:gt_num, :]
  1511. sample['gt_bbox'] = pad_bbox
  1512. if 'gt_class' in sample:
  1513. pad_class = np.zeros((num_max, ), dtype=np.int32)
  1514. if gt_num > 0:
  1515. pad_class[:gt_num] = sample['gt_class'][:gt_num, 0]
  1516. sample['gt_class'] = pad_class
  1517. if 'gt_score' in sample:
  1518. pad_score = np.zeros((num_max, ), dtype=np.float32)
  1519. if gt_num > 0:
  1520. pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
  1521. sample['gt_score'] = pad_score
  1522. # in training, for example in op ExpandImage,
  1523. # the bbox and gt_class is expandded, but the difficult is not,
  1524. # so, judging by it's length
  1525. if 'difficult' in sample:
  1526. pad_diff = np.zeros((num_max, ), dtype=np.int32)
  1527. if gt_num > 0:
  1528. pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
  1529. sample['difficult'] = pad_diff
  1530. if 'is_crowd' in sample:
  1531. pad_crowd = np.zeros((num_max, ), dtype=np.int32)
  1532. if gt_num > 0:
  1533. pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
  1534. sample['is_crowd'] = pad_crowd
  1535. if 'gt_ide' in sample:
  1536. pad_ide = np.zeros((num_max, ), dtype=np.int32)
  1537. if gt_num > 0:
  1538. pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
  1539. sample['gt_ide'] = pad_ide
  1540. return sample
  1541. @register_op
  1542. class DebugVisibleImage(BaseOperator):
  1543. """
  1544. In debug mode, visualize images according to `gt_box`.
  1545. (Currently only supported when not cropping and flipping image.)
  1546. """
  1547. def __init__(self, output_dir='output/debug', is_normalized=False):
  1548. super(DebugVisibleImage, self).__init__()
  1549. self.is_normalized = is_normalized
  1550. self.output_dir = output_dir
  1551. if not os.path.isdir(output_dir):
  1552. os.makedirs(output_dir)
  1553. if not isinstance(self.is_normalized, bool):
  1554. raise TypeError("{}: input type is invalid.".format(self))
  1555. def apply(self, sample, context=None):
  1556. image = Image.open(sample['im_file']).convert('RGB')
  1557. out_file_name = sample['im_file'].split('/')[-1]
  1558. width = sample['w']
  1559. height = sample['h']
  1560. gt_bbox = sample['gt_bbox']
  1561. gt_class = sample['gt_class']
  1562. draw = ImageDraw.Draw(image)
  1563. for i in range(gt_bbox.shape[0]):
  1564. if self.is_normalized:
  1565. gt_bbox[i][0] = gt_bbox[i][0] * width
  1566. gt_bbox[i][1] = gt_bbox[i][1] * height
  1567. gt_bbox[i][2] = gt_bbox[i][2] * width
  1568. gt_bbox[i][3] = gt_bbox[i][3] * height
  1569. xmin, ymin, xmax, ymax = gt_bbox[i]
  1570. draw.line(
  1571. [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
  1572. (xmin, ymin)],
  1573. width=2,
  1574. fill='green')
  1575. # draw label
  1576. text = str(gt_class[i][0])
  1577. tw, th = draw.textsize(text)
  1578. draw.rectangle(
  1579. [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
  1580. draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
  1581. if 'gt_keypoint' in sample.keys():
  1582. gt_keypoint = sample['gt_keypoint']
  1583. if self.is_normalized:
  1584. for i in range(gt_keypoint.shape[1]):
  1585. if i % 2:
  1586. gt_keypoint[:, i] = gt_keypoint[:, i] * height
  1587. else:
  1588. gt_keypoint[:, i] = gt_keypoint[:, i] * width
  1589. for i in range(gt_keypoint.shape[0]):
  1590. keypoint = gt_keypoint[i]
  1591. for j in range(int(keypoint.shape[0] / 2)):
  1592. x1 = round(keypoint[2 * j]).astype(np.int32)
  1593. y1 = round(keypoint[2 * j + 1]).astype(np.int32)
  1594. draw.ellipse(
  1595. (x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
  1596. save_path = os.path.join(self.output_dir, out_file_name)
  1597. image.save(save_path, quality=95)
  1598. return sample
  1599. @register_op
  1600. class Pad(BaseOperator):
  1601. def __init__(self,
  1602. size=None,
  1603. size_divisor=32,
  1604. pad_mode=0,
  1605. offsets=None,
  1606. fill_value=(127.5, 127.5, 127.5)):
  1607. """
  1608. Pad image to a specified size or multiple of size_divisor.
  1609. Args:
  1610. size (int, Sequence): image target size, if None, pad to multiple of size_divisor, default None
  1611. size_divisor (int): size divisor, default 32
  1612. pad_mode (int): pad mode, currently only supports four modes [-1, 0, 1, 2]. if -1, use specified offsets
  1613. if 0, only pad to right and bottom. if 1, pad according to center. if 2, only pad left and top
  1614. offsets (list): [offset_x, offset_y], specify offset while padding, only supported pad_mode=-1
  1615. fill_value (bool): rgb value of pad area, default (127.5, 127.5, 127.5)
  1616. """
  1617. super(Pad, self).__init__()
  1618. if not isinstance(size, (int, Sequence)):
  1619. raise TypeError(
  1620. "Type of target_size is invalid when random_size is True. \
  1621. Must be List, now is {}".format(type(size)))
  1622. if isinstance(size, int):
  1623. size = [size, size]
  1624. assert pad_mode in [
  1625. -1, 0, 1, 2
  1626. ], 'currently only supports four modes [-1, 0, 1, 2]'
  1627. assert pad_mode == -1 and offsets, 'if pad_mode is -1, offsets should not be None'
  1628. self.size = size
  1629. self.size_divisor = size_divisor
  1630. self.pad_mode = pad_mode
  1631. self.fill_value = fill_value
  1632. self.offsets = offsets
  1633. def apply_segm(self, segms, offsets, im_size, size):
  1634. def _expand_poly(poly, x, y):
  1635. expanded_poly = np.array(poly)
  1636. expanded_poly[0::2] += x
  1637. expanded_poly[1::2] += y
  1638. return expanded_poly.tolist()
  1639. def _expand_rle(rle, x, y, height, width, h, w):
  1640. if 'counts' in rle and type(rle['counts']) == list:
  1641. rle = mask_util.frPyObjects(rle, height, width)
  1642. mask = mask_util.decode(rle)
  1643. expanded_mask = np.full((h, w), 0).astype(mask.dtype)
  1644. expanded_mask[y:y + height, x:x + width] = mask
  1645. rle = mask_util.encode(
  1646. np.array(
  1647. expanded_mask, order='F', dtype=np.uint8))
  1648. return rle
  1649. x, y = offsets
  1650. height, width = im_size
  1651. h, w = size
  1652. expanded_segms = []
  1653. for segm in segms:
  1654. if is_poly(segm):
  1655. # Polygon format
  1656. expanded_segms.append(
  1657. [_expand_poly(poly, x, y) for poly in segm])
  1658. else:
  1659. # RLE format
  1660. import pycocotools.mask as mask_util
  1661. expanded_segms.append(
  1662. _expand_rle(segm, x, y, height, width, h, w))
  1663. return expanded_segms
  1664. def apply_bbox(self, bbox, offsets):
  1665. return bbox + np.array(offsets * 2, dtype=np.float32)
  1666. def apply_keypoint(self, keypoints, offsets):
  1667. n = len(keypoints[0]) // 2
  1668. return keypoints + np.array(offsets * n, dtype=np.float32)
  1669. def apply_image(self, image, offsets, im_size, size):
  1670. x, y = offsets
  1671. im_h, im_w = im_size
  1672. h, w = size
  1673. canvas = np.ones((h, w, 3), dtype=np.float32)
  1674. canvas *= np.array(self.fill_value, dtype=np.float32)
  1675. canvas[y:y + im_h, x:x + im_w, :] = image.astype(np.float32)
  1676. return canvas
  1677. def apply(self, sample, context=None):
  1678. im = sample['image']
  1679. im_h, im_w = im.shape[:2]
  1680. if self.size:
  1681. h, w = self.size
  1682. assert (
  1683. im_h < h and im_w < w
  1684. ), '(h, w) of target size should be greater than (im_h, im_w)'
  1685. else:
  1686. h = np.ceil(im_h // self.size_divisor) * self.size_divisor
  1687. w = np.ceil(im_w / self.size_divisor) * self.size_divisor
  1688. if h == im_h and w == im_w:
  1689. return sample
  1690. if self.pad_mode == -1:
  1691. offset_x, offset_y = self.offsets
  1692. elif self.pad_mode == 0:
  1693. offset_y, offset_x = 0, 0
  1694. elif self.pad_mode == 1:
  1695. offset_y, offset_x = (h - im_h) // 2, (w - im_w) // 2
  1696. else:
  1697. offset_y, offset_x = h - im_h, w - im_w
  1698. offsets, im_size, size = [offset_x, offset_y], [im_h, im_w], [h, w]
  1699. sample['image'] = self.apply_image(im, offsets, im_size, size)
  1700. if self.pad_mode == 0:
  1701. return sample
  1702. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  1703. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], offsets)
  1704. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  1705. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], offsets,
  1706. im_size, size)
  1707. if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
  1708. sample['gt_keypoint'] = self.apply_keypoint(sample['gt_keypoint'],
  1709. offsets)
  1710. return sample
  1711. @register_op
  1712. class Poly2Mask(BaseOperator):
  1713. """
  1714. gt poly to mask annotations
  1715. """
  1716. def __init__(self):
  1717. super(Poly2Mask, self).__init__()
  1718. import pycocotools.mask as maskUtils
  1719. self.maskutils = maskUtils
  1720. def _poly2mask(self, mask_ann, img_h, img_w):
  1721. if isinstance(mask_ann, list):
  1722. # polygon -- a single object might consist of multiple parts
  1723. # we merge all parts into one mask rle code
  1724. rles = self.maskutils.frPyObjects(mask_ann, img_h, img_w)
  1725. rle = self.maskutils.merge(rles)
  1726. elif isinstance(mask_ann['counts'], list):
  1727. # uncompressed RLE
  1728. rle = self.maskutils.frPyObjects(mask_ann, img_h, img_w)
  1729. else:
  1730. # rle
  1731. rle = mask_ann
  1732. mask = self.maskutils.decode(rle)
  1733. return mask
  1734. def apply(self, sample, context=None):
  1735. assert 'gt_poly' in sample
  1736. im_h = sample['h']
  1737. im_w = sample['w']
  1738. masks = [
  1739. self._poly2mask(gt_poly, im_h, im_w)
  1740. for gt_poly in sample['gt_poly']
  1741. ]
  1742. sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
  1743. return sample
  1744. @register_op
  1745. class Rbox2Poly(BaseOperator):
  1746. """
  1747. Convert rbbox format to poly format.
  1748. """
  1749. def __init__(self):
  1750. super(Rbox2Poly, self).__init__()
  1751. def apply(self, sample, context=None):
  1752. assert 'gt_rbox' in sample
  1753. assert sample['gt_rbox'].shape[1] == 5
  1754. rrects = sample['gt_rbox']
  1755. x_ctr = rrects[:, 0]
  1756. y_ctr = rrects[:, 1]
  1757. width = rrects[:, 2]
  1758. height = rrects[:, 3]
  1759. x1 = x_ctr - width / 2.0
  1760. y1 = y_ctr - height / 2.0
  1761. x2 = x_ctr + width / 2.0
  1762. y2 = y_ctr + height / 2.0
  1763. sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
  1764. polys = bbox_utils.rbox2poly_np(rrects)
  1765. sample['gt_rbox2poly'] = polys
  1766. return sample
  1767. @register_op
  1768. class AugmentHSV(BaseOperator):
  1769. def __init__(self, fraction=0.50, is_bgr=False):
  1770. """
  1771. Augment the SV channel of image data.
  1772. Args:
  1773. fraction (float): the fraction for augment
  1774. is_bgr (bool): whether the image is BGR mode
  1775. """
  1776. super(AugmentHSV, self).__init__()
  1777. self.fraction = fraction
  1778. self.is_bgr = is_bgr
  1779. def apply(self, sample, context=None):
  1780. img = sample['image']
  1781. if self.is_bgr:
  1782. img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
  1783. else:
  1784. img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
  1785. S = img_hsv[:, :, 1].astype(np.float32)
  1786. V = img_hsv[:, :, 2].astype(np.float32)
  1787. a = (random.random() * 2 - 1) * self.fraction + 1
  1788. S *= a
  1789. if a > 1:
  1790. np.clip(S, a_min=0, a_max=255, out=S)
  1791. a = (random.random() * 2 - 1) * self.fraction + 1
  1792. V *= a
  1793. if a > 1:
  1794. np.clip(V, a_min=0, a_max=255, out=V)
  1795. img_hsv[:, :, 1] = S.astype(np.uint8)
  1796. img_hsv[:, :, 2] = V.astype(np.uint8)
  1797. if self.is_bgr:
  1798. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
  1799. else:
  1800. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB, dst=img)
  1801. sample['image'] = img
  1802. return sample
  1803. @register_op
  1804. class Norm2PixelBbox(BaseOperator):
  1805. """
  1806. Transform the bounding box's coornidates which is in [0,1] to pixels.
  1807. """
  1808. def __init__(self):
  1809. super(Norm2PixelBbox, self).__init__()
  1810. def apply(self, sample, context=None):
  1811. assert 'gt_bbox' in sample
  1812. bbox = sample['gt_bbox']
  1813. height, width = sample['image'].shape[:2]
  1814. bbox[:, 0::2] = bbox[:, 0::2] * width
  1815. bbox[:, 1::2] = bbox[:, 1::2] * height
  1816. sample['gt_bbox'] = bbox
  1817. return sample
  1818. @register_op
  1819. class BboxCXCYWH2XYXY(BaseOperator):
  1820. """
  1821. Convert bbox CXCYWH format to XYXY format.
  1822. [center_x, center_y, width, height] -> [x0, y0, x1, y1]
  1823. """
  1824. def __init__(self):
  1825. super(BboxCXCYWH2XYXY, self).__init__()
  1826. def apply(self, sample, context=None):
  1827. assert 'gt_bbox' in sample
  1828. bbox0 = sample['gt_bbox']
  1829. bbox = bbox0.copy()
  1830. bbox[:, :2] = bbox0[:, :2] - bbox0[:, 2:4] / 2.
  1831. bbox[:, 2:4] = bbox0[:, :2] + bbox0[:, 2:4] / 2.
  1832. sample['gt_bbox'] = bbox
  1833. return sample