operators.py 107 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # function:
  15. # operators to process sample,
  16. # eg: decode/resize/crop image
  17. from __future__ import absolute_import
  18. from __future__ import print_function
  19. from __future__ import division
  20. try:
  21. from collections.abc import Sequence
  22. except Exception:
  23. from collections import Sequence
  24. from numbers import Number, Integral
  25. import uuid
  26. import random
  27. import math
  28. import numpy as np
  29. import os
  30. import copy
  31. import logging
  32. import cv2
  33. from PIL import Image, ImageDraw
  34. from paddlex.ppdet.core.workspace import serializable
  35. from paddlex.ppdet.modeling import bbox_utils
  36. from ..reader import Compose
  37. from .op_helper import (satisfy_sample_constraint, filter_and_process,
  38. generate_sample_bbox, clip_bbox, data_anchor_sampling,
  39. satisfy_sample_constraint_coverage,
  40. crop_image_sampling, generate_sample_bbox_square,
  41. bbox_area_sampling, is_poly, transform_bbox)
  42. from paddlex.ppdet.utils.logger import setup_logger
  43. logger = setup_logger(__name__)
  44. registered_ops = []
  45. def register_op(cls):
  46. registered_ops.append(cls.__name__)
  47. if not hasattr(BaseOperator, cls.__name__):
  48. setattr(BaseOperator, cls.__name__, cls)
  49. else:
  50. raise KeyError("The {} class has been registered.".format(
  51. cls.__name__))
  52. return serializable(cls)
  53. class BboxError(ValueError):
  54. pass
  55. class ImageError(ValueError):
  56. pass
  57. class BaseOperator(object):
  58. def __init__(self, name=None):
  59. if name is None:
  60. name = self.__class__.__name__
  61. self._id = name + '_' + str(uuid.uuid4())[-6:]
  62. def apply(self, sample, context=None):
  63. """ Process a sample.
  64. Args:
  65. sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
  66. context (dict): info about this sample processing
  67. Returns:
  68. result (dict): a processed sample
  69. """
  70. return sample
  71. def __call__(self, sample, context=None):
  72. """ Process a sample.
  73. Args:
  74. sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
  75. context (dict): info about this sample processing
  76. Returns:
  77. result (dict): a processed sample
  78. """
  79. if isinstance(sample, Sequence):
  80. for i in range(len(sample)):
  81. sample[i] = self.apply(sample[i], context)
  82. else:
  83. sample = self.apply(sample, context)
  84. return sample
  85. def __str__(self):
  86. return str(self._id)
  87. @register_op
  88. class Decode(BaseOperator):
  89. def __init__(self):
  90. """ Transform the image data to numpy format following the rgb format
  91. """
  92. super(Decode, self).__init__()
  93. def apply(self, sample, context=None):
  94. """ load image if 'im_file' field is not empty but 'image' is"""
  95. if 'image' not in sample:
  96. with open(sample['im_file'], 'rb') as f:
  97. sample['image'] = f.read()
  98. sample.pop('im_file')
  99. im = sample['image']
  100. data = np.frombuffer(im, dtype='uint8')
  101. im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
  102. if 'keep_ori_im' in sample and sample['keep_ori_im']:
  103. sample['ori_image'] = im
  104. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  105. sample['image'] = im
  106. if 'h' not in sample:
  107. sample['h'] = im.shape[0]
  108. elif sample['h'] != im.shape[0]:
  109. logger.warning(
  110. "The actual image height: {} is not equal to the "
  111. "height: {} in annotation, and update sample['h'] by actual "
  112. "image height.".format(im.shape[0], sample['h']))
  113. sample['h'] = im.shape[0]
  114. if 'w' not in sample:
  115. sample['w'] = im.shape[1]
  116. elif sample['w'] != im.shape[1]:
  117. logger.warning(
  118. "The actual image width: {} is not equal to the "
  119. "width: {} in annotation, and update sample['w'] by actual "
  120. "image width.".format(im.shape[1], sample['w']))
  121. sample['w'] = im.shape[1]
  122. sample['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
  123. sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
  124. return sample
  125. @register_op
  126. class Permute(BaseOperator):
  127. def __init__(self):
  128. """
  129. Change the channel to be (C, H, W)
  130. """
  131. super(Permute, self).__init__()
  132. def apply(self, sample, context=None):
  133. im = sample['image']
  134. im = im.transpose((2, 0, 1))
  135. sample['image'] = im
  136. return sample
  137. @register_op
  138. class Lighting(BaseOperator):
  139. """
  140. Lighting the image by eigenvalues and eigenvectors
  141. Args:
  142. eigval (list): eigenvalues
  143. eigvec (list): eigenvectors
  144. alphastd (float): random weight of lighting, 0.1 by default
  145. """
  146. def __init__(self, eigval, eigvec, alphastd=0.1):
  147. super(Lighting, self).__init__()
  148. self.alphastd = alphastd
  149. self.eigval = np.array(eigval).astype('float32')
  150. self.eigvec = np.array(eigvec).astype('float32')
  151. def apply(self, sample, context=None):
  152. alpha = np.random.normal(scale=self.alphastd, size=(3, ))
  153. sample['image'] += np.dot(self.eigvec, self.eigval * alpha)
  154. return sample
  155. @register_op
  156. class RandomErasingImage(BaseOperator):
  157. def __init__(self, prob=0.5, lower=0.02, higher=0.4, aspect_ratio=0.3):
  158. """
  159. Random Erasing Data Augmentation, see https://arxiv.org/abs/1708.04896
  160. Args:
  161. prob (float): probability to carry out random erasing
  162. lower (float): lower limit of the erasing area ratio
  163. higher (float): upper limit of the erasing area ratio
  164. aspect_ratio (float): aspect ratio of the erasing region
  165. """
  166. super(RandomErasingImage, self).__init__()
  167. self.prob = prob
  168. self.lower = lower
  169. self.higher = higher
  170. self.aspect_ratio = aspect_ratio
  171. def apply(self, sample):
  172. gt_bbox = sample['gt_bbox']
  173. im = sample['image']
  174. if not isinstance(im, np.ndarray):
  175. raise TypeError("{}: image is not a numpy array.".format(self))
  176. if len(im.shape) != 3:
  177. raise ImageError("{}: image is not 3-dimensional.".format(self))
  178. for idx in range(gt_bbox.shape[0]):
  179. if self.prob <= np.random.rand():
  180. continue
  181. x1, y1, x2, y2 = gt_bbox[idx, :]
  182. w_bbox = x2 - x1
  183. h_bbox = y2 - y1
  184. area = w_bbox * h_bbox
  185. target_area = random.uniform(self.lower, self.higher) * area
  186. aspect_ratio = random.uniform(self.aspect_ratio,
  187. 1 / self.aspect_ratio)
  188. h = int(round(math.sqrt(target_area * aspect_ratio)))
  189. w = int(round(math.sqrt(target_area / aspect_ratio)))
  190. if w < w_bbox and h < h_bbox:
  191. off_y1 = random.randint(0, int(h_bbox - h))
  192. off_x1 = random.randint(0, int(w_bbox - w))
  193. im[int(y1 + off_y1):int(y1 + off_y1 + h), int(x1 + off_x1):int(
  194. x1 + off_x1 + w), :] = 0
  195. sample['image'] = im
  196. return sample
  197. @register_op
  198. class NormalizeImage(BaseOperator):
  199. def __init__(self,
  200. mean=[0.485, 0.456, 0.406],
  201. std=[1, 1, 1],
  202. is_scale=True):
  203. """
  204. Args:
  205. mean (list): the pixel mean
  206. std (list): the pixel variance
  207. """
  208. super(NormalizeImage, self).__init__()
  209. self.mean = mean
  210. self.std = std
  211. self.is_scale = is_scale
  212. if not (isinstance(self.mean, list) and isinstance(self.std, list) and
  213. isinstance(self.is_scale, bool)):
  214. raise TypeError("{}: input type is invalid.".format(self))
  215. from functools import reduce
  216. if reduce(lambda x, y: x * y, self.std) == 0:
  217. raise ValueError('{}: std is invalid!'.format(self))
  218. def apply(self, sample, context=None):
  219. """Normalize the image.
  220. Operators:
  221. 1.(optional) Scale the image to [0,1]
  222. 2. Each pixel minus mean and is divided by std
  223. """
  224. im = sample['image']
  225. im = im.astype(np.float32, copy=False)
  226. mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
  227. std = np.array(self.std)[np.newaxis, np.newaxis, :]
  228. if self.is_scale:
  229. im = im / 255.0
  230. im -= mean
  231. im /= std
  232. sample['image'] = im
  233. return sample
  234. @register_op
  235. class GridMask(BaseOperator):
  236. def __init__(self,
  237. use_h=True,
  238. use_w=True,
  239. rotate=1,
  240. offset=False,
  241. ratio=0.5,
  242. mode=1,
  243. prob=0.7,
  244. upper_iter=360000):
  245. """
  246. GridMask Data Augmentation, see https://arxiv.org/abs/2001.04086
  247. Args:
  248. use_h (bool): whether to mask vertically
  249. use_w (boo;): whether to mask horizontally
  250. rotate (float): angle for the mask to rotate
  251. offset (float): mask offset
  252. ratio (float): mask ratio
  253. mode (int): gridmask mode
  254. prob (float): max probability to carry out gridmask
  255. upper_iter (int): suggested to be equal to global max_iter
  256. """
  257. super(GridMask, self).__init__()
  258. self.use_h = use_h
  259. self.use_w = use_w
  260. self.rotate = rotate
  261. self.offset = offset
  262. self.ratio = ratio
  263. self.mode = mode
  264. self.prob = prob
  265. self.upper_iter = upper_iter
  266. from .gridmask_utils import Gridmask
  267. self.gridmask_op = Gridmask(
  268. use_h,
  269. use_w,
  270. rotate=rotate,
  271. offset=offset,
  272. ratio=ratio,
  273. mode=mode,
  274. prob=prob,
  275. upper_iter=upper_iter)
  276. def apply(self, sample, context=None):
  277. sample['image'] = self.gridmask_op(sample['image'],
  278. sample['curr_iter'])
  279. return sample
  280. @register_op
  281. class RandomDistort(BaseOperator):
  282. """Random color distortion.
  283. Args:
  284. hue (list): hue settings. in [lower, upper, probability] format.
  285. saturation (list): saturation settings. in [lower, upper, probability] format.
  286. contrast (list): contrast settings. in [lower, upper, probability] format.
  287. brightness (list): brightness settings. in [lower, upper, probability] format.
  288. random_apply (bool): whether to apply in random (yolo) or fixed (SSD)
  289. order.
  290. count (int): the number of doing distrot
  291. random_channel (bool): whether to swap channels randomly
  292. """
  293. def __init__(self,
  294. hue=[-18, 18, 0.5],
  295. saturation=[0.5, 1.5, 0.5],
  296. contrast=[0.5, 1.5, 0.5],
  297. brightness=[0.5, 1.5, 0.5],
  298. random_apply=True,
  299. count=4,
  300. random_channel=False):
  301. super(RandomDistort, self).__init__()
  302. self.hue = hue
  303. self.saturation = saturation
  304. self.contrast = contrast
  305. self.brightness = brightness
  306. self.random_apply = random_apply
  307. self.count = count
  308. self.random_channel = random_channel
  309. def apply_hue(self, img):
  310. low, high, prob = self.hue
  311. if np.random.uniform(0., 1.) < prob:
  312. return img
  313. img = img.astype(np.float32)
  314. # it works, but result differ from HSV version
  315. delta = np.random.uniform(low, high)
  316. u = np.cos(delta * np.pi)
  317. w = np.sin(delta * np.pi)
  318. bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]])
  319. tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321],
  320. [0.211, -0.523, 0.311]])
  321. ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
  322. [1.0, -1.107, 1.705]])
  323. t = np.dot(np.dot(ityiq, bt), tyiq).T
  324. img = np.dot(img, t)
  325. return img
  326. def apply_saturation(self, img):
  327. low, high, prob = self.saturation
  328. if np.random.uniform(0., 1.) < prob:
  329. return img
  330. delta = np.random.uniform(low, high)
  331. img = img.astype(np.float32)
  332. # it works, but result differ from HSV version
  333. gray = img * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
  334. gray = gray.sum(axis=2, keepdims=True)
  335. gray *= (1.0 - delta)
  336. img *= delta
  337. img += gray
  338. return img
  339. def apply_contrast(self, img):
  340. low, high, prob = self.contrast
  341. if np.random.uniform(0., 1.) < prob:
  342. return img
  343. delta = np.random.uniform(low, high)
  344. img = img.astype(np.float32)
  345. img *= delta
  346. return img
  347. def apply_brightness(self, img):
  348. low, high, prob = self.brightness
  349. if np.random.uniform(0., 1.) < prob:
  350. return img
  351. delta = np.random.uniform(low, high)
  352. img = img.astype(np.float32)
  353. img += delta
  354. return img
  355. def apply(self, sample, context=None):
  356. img = sample['image']
  357. if self.random_apply:
  358. functions = [
  359. self.apply_brightness, self.apply_contrast,
  360. self.apply_saturation, self.apply_hue
  361. ]
  362. distortions = np.random.permutation(functions)[:self.count]
  363. for func in distortions:
  364. img = func(img)
  365. sample['image'] = img
  366. return sample
  367. img = self.apply_brightness(img)
  368. mode = np.random.randint(0, 2)
  369. if mode:
  370. img = self.apply_contrast(img)
  371. img = self.apply_saturation(img)
  372. img = self.apply_hue(img)
  373. if not mode:
  374. img = self.apply_contrast(img)
  375. if self.random_channel:
  376. if np.random.randint(0, 2):
  377. img = img[..., np.random.permutation(3)]
  378. sample['image'] = img
  379. return sample
  380. @register_op
  381. class AutoAugment(BaseOperator):
  382. def __init__(self, autoaug_type="v1"):
  383. """
  384. Args:
  385. autoaug_type (str): autoaug type, support v0, v1, v2, v3, test
  386. """
  387. super(AutoAugment, self).__init__()
  388. self.autoaug_type = autoaug_type
  389. def apply(self, sample, context=None):
  390. """
  391. Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172
  392. """
  393. im = sample['image']
  394. gt_bbox = sample['gt_bbox']
  395. if not isinstance(im, np.ndarray):
  396. raise TypeError("{}: image is not a numpy array.".format(self))
  397. if len(im.shape) != 3:
  398. raise ImageError("{}: image is not 3-dimensional.".format(self))
  399. if len(gt_bbox) == 0:
  400. return sample
  401. height, width, _ = im.shape
  402. norm_gt_bbox = np.ones_like(gt_bbox, dtype=np.float32)
  403. norm_gt_bbox[:, 0] = gt_bbox[:, 1] / float(height)
  404. norm_gt_bbox[:, 1] = gt_bbox[:, 0] / float(width)
  405. norm_gt_bbox[:, 2] = gt_bbox[:, 3] / float(height)
  406. norm_gt_bbox[:, 3] = gt_bbox[:, 2] / float(width)
  407. from .autoaugment_utils import distort_image_with_autoaugment
  408. im, norm_gt_bbox = distort_image_with_autoaugment(im, norm_gt_bbox,
  409. self.autoaug_type)
  410. gt_bbox[:, 0] = norm_gt_bbox[:, 1] * float(width)
  411. gt_bbox[:, 1] = norm_gt_bbox[:, 0] * float(height)
  412. gt_bbox[:, 2] = norm_gt_bbox[:, 3] * float(width)
  413. gt_bbox[:, 3] = norm_gt_bbox[:, 2] * float(height)
  414. sample['image'] = im
  415. sample['gt_bbox'] = gt_bbox
  416. return sample
  417. @register_op
  418. class RandomFlip(BaseOperator):
  419. def __init__(self, prob=0.5):
  420. """
  421. Args:
  422. prob (float): the probability of flipping image
  423. """
  424. super(RandomFlip, self).__init__()
  425. self.prob = prob
  426. if not (isinstance(self.prob, float)):
  427. raise TypeError("{}: input type is invalid.".format(self))
  428. def apply_segm(self, segms, height, width):
  429. def _flip_poly(poly, width):
  430. flipped_poly = np.array(poly)
  431. flipped_poly[0::2] = width - np.array(poly[0::2])
  432. return flipped_poly.tolist()
  433. def _flip_rle(rle, height, width):
  434. if 'counts' in rle and type(rle['counts']) == list:
  435. rle = mask_util.frPyObjects(rle, height, width)
  436. mask = mask_util.decode(rle)
  437. mask = mask[:, ::-1]
  438. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  439. return rle
  440. flipped_segms = []
  441. for segm in segms:
  442. if is_poly(segm):
  443. # Polygon format
  444. flipped_segms.append(
  445. [_flip_poly(poly, width) for poly in segm])
  446. else:
  447. # RLE format
  448. import pycocotools.mask as mask_util
  449. flipped_segms.append(_flip_rle(segm, height, width))
  450. return flipped_segms
  451. def apply_keypoint(self, gt_keypoint, width):
  452. for i in range(gt_keypoint.shape[1]):
  453. if i % 2 == 0:
  454. old_x = gt_keypoint[:, i].copy()
  455. gt_keypoint[:, i] = width - old_x
  456. return gt_keypoint
  457. def apply_image(self, image):
  458. return image[:, ::-1, :]
  459. def apply_bbox(self, bbox, width):
  460. oldx1 = bbox[:, 0].copy()
  461. oldx2 = bbox[:, 2].copy()
  462. bbox[:, 0] = width - oldx2
  463. bbox[:, 2] = width - oldx1
  464. return bbox
  465. def apply_rbox(self, bbox, width):
  466. oldx1 = bbox[:, 0].copy()
  467. oldx2 = bbox[:, 2].copy()
  468. oldx3 = bbox[:, 4].copy()
  469. oldx4 = bbox[:, 6].copy()
  470. bbox[:, 0] = width - oldx1
  471. bbox[:, 2] = width - oldx2
  472. bbox[:, 4] = width - oldx3
  473. bbox[:, 6] = width - oldx4
  474. bbox = [bbox_utils.get_best_begin_point_single(e) for e in bbox]
  475. return bbox
  476. def apply(self, sample, context=None):
  477. """Filp the image and bounding box.
  478. Operators:
  479. 1. Flip the image numpy.
  480. 2. Transform the bboxes' x coordinates.
  481. (Must judge whether the coordinates are normalized!)
  482. 3. Transform the segmentations' x coordinates.
  483. (Must judge whether the coordinates are normalized!)
  484. Output:
  485. sample: the image, bounding box and segmentation part
  486. in sample are flipped.
  487. """
  488. if np.random.uniform(0, 1) < self.prob:
  489. im = sample['image']
  490. height, width = im.shape[:2]
  491. im = self.apply_image(im)
  492. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  493. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], width)
  494. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  495. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], height,
  496. width)
  497. if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
  498. sample['gt_keypoint'] = self.apply_keypoint(
  499. sample['gt_keypoint'], width)
  500. if 'semantic' in sample and sample['semantic']:
  501. sample['semantic'] = sample['semantic'][:, ::-1]
  502. if 'gt_segm' in sample and sample['gt_segm'].any():
  503. sample['gt_segm'] = sample['gt_segm'][:, :, ::-1]
  504. if 'gt_rbox2poly' in sample and sample['gt_rbox2poly'].any():
  505. sample['gt_rbox2poly'] = self.apply_rbox(
  506. sample['gt_rbox2poly'], width)
  507. sample['flipped'] = True
  508. sample['image'] = im
  509. return sample
  510. @register_op
  511. class Resize(BaseOperator):
  512. def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
  513. """
  514. Resize image to target size. if keep_ratio is True,
  515. resize the image's long side to the maximum of target_size
  516. if keep_ratio is False, resize the image to target size(h, w)
  517. Args:
  518. target_size (int|list): image target size
  519. keep_ratio (bool): whether keep_ratio or not, default true
  520. interp (int): the interpolation method
  521. """
  522. super(Resize, self).__init__()
  523. self.keep_ratio = keep_ratio
  524. self.interp = interp
  525. if not isinstance(target_size, (Integral, Sequence)):
  526. raise TypeError(
  527. "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
  528. format(type(target_size)))
  529. if isinstance(target_size, Integral):
  530. target_size = [target_size, target_size]
  531. self.target_size = target_size
  532. def apply_image(self, image, scale):
  533. im_scale_x, im_scale_y = scale
  534. return cv2.resize(
  535. image,
  536. None,
  537. None,
  538. fx=im_scale_x,
  539. fy=im_scale_y,
  540. interpolation=self.interp)
  541. def apply_bbox(self, bbox, scale, size):
  542. im_scale_x, im_scale_y = scale
  543. resize_w, resize_h = size
  544. bbox[:, 0::2] *= im_scale_x
  545. bbox[:, 1::2] *= im_scale_y
  546. bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w)
  547. bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h)
  548. return bbox
  549. def apply_segm(self, segms, im_size, scale):
  550. def _resize_poly(poly, im_scale_x, im_scale_y):
  551. resized_poly = np.array(poly).astype('float32')
  552. resized_poly[0::2] *= im_scale_x
  553. resized_poly[1::2] *= im_scale_y
  554. return resized_poly.tolist()
  555. def _resize_rle(rle, im_h, im_w, im_scale_x, im_scale_y):
  556. if 'counts' in rle and type(rle['counts']) == list:
  557. rle = mask_util.frPyObjects(rle, im_h, im_w)
  558. mask = mask_util.decode(rle)
  559. mask = cv2.resize(
  560. mask,
  561. None,
  562. None,
  563. fx=im_scale_x,
  564. fy=im_scale_y,
  565. interpolation=self.interp)
  566. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  567. return rle
  568. im_h, im_w = im_size
  569. im_scale_x, im_scale_y = scale
  570. resized_segms = []
  571. for segm in segms:
  572. if is_poly(segm):
  573. # Polygon format
  574. resized_segms.append([
  575. _resize_poly(poly, im_scale_x, im_scale_y) for poly in segm
  576. ])
  577. else:
  578. # RLE format
  579. import pycocotools.mask as mask_util
  580. resized_segms.append(
  581. _resize_rle(segm, im_h, im_w, im_scale_x, im_scale_y))
  582. return resized_segms
  583. def apply(self, sample, context=None):
  584. """ Resize the image numpy.
  585. """
  586. im = sample['image']
  587. if not isinstance(im, np.ndarray):
  588. raise TypeError("{}: image type is not numpy.".format(self))
  589. if len(im.shape) != 3:
  590. raise ImageError('{}: image is not 3-dimensional.'.format(self))
  591. # apply image
  592. im_shape = im.shape
  593. if self.keep_ratio:
  594. im_size_min = np.min(im_shape[0:2])
  595. im_size_max = np.max(im_shape[0:2])
  596. target_size_min = np.min(self.target_size)
  597. target_size_max = np.max(self.target_size)
  598. im_scale = min(target_size_min / im_size_min,
  599. target_size_max / im_size_max)
  600. resize_h = im_scale * float(im_shape[0])
  601. resize_w = im_scale * float(im_shape[1])
  602. im_scale_x = im_scale
  603. im_scale_y = im_scale
  604. else:
  605. resize_h, resize_w = self.target_size
  606. im_scale_y = resize_h / im_shape[0]
  607. im_scale_x = resize_w / im_shape[1]
  608. im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
  609. sample['image'] = im
  610. sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
  611. if 'scale_factor' in sample:
  612. scale_factor = sample['scale_factor']
  613. sample['scale_factor'] = np.asarray(
  614. [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
  615. dtype=np.float32)
  616. else:
  617. sample['scale_factor'] = np.asarray(
  618. [im_scale_y, im_scale_x], dtype=np.float32)
  619. # apply bbox
  620. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  621. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'],
  622. [im_scale_x, im_scale_y],
  623. [resize_w, resize_h])
  624. # apply rbox
  625. if 'gt_rbox2poly' in sample:
  626. if np.array(sample['gt_rbox2poly']).shape[1] != 8:
  627. logger.warning(
  628. "gt_rbox2poly's length shoule be 8, but actually is {}".
  629. format(len(sample['gt_rbox2poly'])))
  630. sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
  631. [im_scale_x, im_scale_y],
  632. [resize_w, resize_h])
  633. # apply polygon
  634. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  635. sample['gt_poly'] = self.apply_segm(
  636. sample['gt_poly'], im_shape[:2], [im_scale_x, im_scale_y])
  637. # apply semantic
  638. if 'semantic' in sample and sample['semantic']:
  639. semantic = sample['semantic']
  640. semantic = cv2.resize(
  641. semantic.astype('float32'),
  642. None,
  643. None,
  644. fx=im_scale_x,
  645. fy=im_scale_y,
  646. interpolation=self.interp)
  647. semantic = np.asarray(semantic).astype('int32')
  648. semantic = np.expand_dims(semantic, 0)
  649. sample['semantic'] = semantic
  650. # apply gt_segm
  651. if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
  652. masks = [
  653. cv2.resize(
  654. gt_segm,
  655. None,
  656. None,
  657. fx=im_scale_x,
  658. fy=im_scale_y,
  659. interpolation=cv2.INTER_NEAREST)
  660. for gt_segm in sample['gt_segm']
  661. ]
  662. sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
  663. return sample
  664. @register_op
  665. class MultiscaleTestResize(BaseOperator):
  666. def __init__(self,
  667. origin_target_size=[800, 1333],
  668. target_size=[],
  669. interp=cv2.INTER_LINEAR,
  670. use_flip=True):
  671. """
  672. Rescale image to the each size in target size, and capped at max_size.
  673. Args:
  674. origin_target_size (list): origin target size of image
  675. target_size (list): A list of target sizes of image.
  676. interp (int): the interpolation method.
  677. use_flip (bool): whether use flip augmentation.
  678. """
  679. super(MultiscaleTestResize, self).__init__()
  680. self.interp = interp
  681. self.use_flip = use_flip
  682. if not isinstance(target_size, Sequence):
  683. raise TypeError(
  684. "Type of target_size is invalid. Must be List or Tuple, now is {}".
  685. format(type(target_size)))
  686. self.target_size = target_size
  687. if not isinstance(origin_target_size, Sequence):
  688. raise TypeError(
  689. "Type of origin_target_size is invalid. Must be List or Tuple, now is {}".
  690. format(type(origin_target_size)))
  691. self.origin_target_size = origin_target_size
  692. def apply(self, sample, context=None):
  693. """ Resize the image numpy for multi-scale test.
  694. """
  695. samples = []
  696. resizer = Resize(
  697. self.origin_target_size, keep_ratio=True, interp=self.interp)
  698. samples.append(resizer(sample.copy(), context))
  699. if self.use_flip:
  700. flipper = RandomFlip(1.1)
  701. samples.append(flipper(sample.copy(), context=context))
  702. for size in self.target_size:
  703. resizer = Resize(size, keep_ratio=True, interp=self.interp)
  704. samples.append(resizer(sample.copy(), context))
  705. return samples
  706. @register_op
  707. class RandomResize(BaseOperator):
  708. def __init__(self,
  709. target_size,
  710. keep_ratio=True,
  711. interp=cv2.INTER_LINEAR,
  712. random_size=True,
  713. random_interp=False):
  714. """
  715. Resize image to target size randomly. random target_size and interpolation method
  716. Args:
  717. target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
  718. keep_ratio (bool): whether keep_raio or not, default true
  719. interp (int): the interpolation method
  720. random_size (bool): whether random select target size of image
  721. random_interp (bool): whether random select interpolation method
  722. """
  723. super(RandomResize, self).__init__()
  724. self.keep_ratio = keep_ratio
  725. self.interp = interp
  726. self.interps = [
  727. cv2.INTER_NEAREST,
  728. cv2.INTER_LINEAR,
  729. cv2.INTER_AREA,
  730. cv2.INTER_CUBIC,
  731. cv2.INTER_LANCZOS4,
  732. ]
  733. assert isinstance(target_size, (
  734. Integral, Sequence)), "target_size must be Integer, List or Tuple"
  735. if random_size and not isinstance(target_size, Sequence):
  736. raise TypeError(
  737. "Type of target_size is invalid when random_size is True. Must be List or Tuple, now is {}".
  738. format(type(target_size)))
  739. self.target_size = target_size
  740. self.random_size = random_size
  741. self.random_interp = random_interp
  742. def apply(self, sample, context=None):
  743. """ Resize the image numpy.
  744. """
  745. if self.random_size:
  746. target_size = random.choice(self.target_size)
  747. else:
  748. target_size = self.target_size
  749. if self.random_interp:
  750. interp = random.choice(self.interps)
  751. else:
  752. interp = self.interp
  753. resizer = Resize(target_size, self.keep_ratio, interp)
  754. return resizer(sample, context=context)
  755. @register_op
  756. class RandomExpand(BaseOperator):
  757. """Random expand the canvas.
  758. Args:
  759. ratio (float): maximum expansion ratio.
  760. prob (float): probability to expand.
  761. fill_value (list): color value used to fill the canvas. in RGB order.
  762. """
  763. def __init__(self, ratio=4., prob=0.5, fill_value=(127.5, 127.5, 127.5)):
  764. super(RandomExpand, self).__init__()
  765. assert ratio > 1.01, "expand ratio must be larger than 1.01"
  766. self.ratio = ratio
  767. self.prob = prob
  768. assert isinstance(fill_value, (Number, Sequence)), \
  769. "fill value must be either float or sequence"
  770. if isinstance(fill_value, Number):
  771. fill_value = (fill_value, ) * 3
  772. if not isinstance(fill_value, tuple):
  773. fill_value = tuple(fill_value)
  774. self.fill_value = fill_value
  775. def apply(self, sample, context=None):
  776. if np.random.uniform(0., 1.) < self.prob:
  777. return sample
  778. im = sample['image']
  779. height, width = im.shape[:2]
  780. ratio = np.random.uniform(1., self.ratio)
  781. h = int(height * ratio)
  782. w = int(width * ratio)
  783. if not h > height or not w > width:
  784. return sample
  785. y = np.random.randint(0, h - height)
  786. x = np.random.randint(0, w - width)
  787. offsets, size = [x, y], [h, w]
  788. pad = Pad(size,
  789. pad_mode=-1,
  790. offsets=offsets,
  791. fill_value=self.fill_value)
  792. return pad(sample, context=context)
  793. @register_op
  794. class CropWithSampling(BaseOperator):
  795. def __init__(self, batch_sampler, satisfy_all=False, avoid_no_bbox=True):
  796. """
  797. Args:
  798. batch_sampler (list): Multiple sets of different
  799. parameters for cropping.
  800. satisfy_all (bool): whether all boxes must satisfy.
  801. e.g.[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
  802. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
  803. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
  804. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
  805. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
  806. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
  807. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
  808. [max sample, max trial, min scale, max scale,
  809. min aspect ratio, max aspect ratio,
  810. min overlap, max overlap]
  811. avoid_no_bbox (bool): whether to to avoid the
  812. situation where the box does not appear.
  813. """
  814. super(CropWithSampling, self).__init__()
  815. self.batch_sampler = batch_sampler
  816. self.satisfy_all = satisfy_all
  817. self.avoid_no_bbox = avoid_no_bbox
  818. def apply(self, sample, context):
  819. """
  820. Crop the image and modify bounding box.
  821. Operators:
  822. 1. Scale the image width and height.
  823. 2. Crop the image according to a radom sample.
  824. 3. Rescale the bounding box.
  825. 4. Determine if the new bbox is satisfied in the new image.
  826. Returns:
  827. sample: the image, bounding box are replaced.
  828. """
  829. assert 'image' in sample, "image data not found"
  830. im = sample['image']
  831. gt_bbox = sample['gt_bbox']
  832. gt_class = sample['gt_class']
  833. im_height, im_width = im.shape[:2]
  834. gt_score = None
  835. if 'gt_score' in sample:
  836. gt_score = sample['gt_score']
  837. sampled_bbox = []
  838. gt_bbox = gt_bbox.tolist()
  839. for sampler in self.batch_sampler:
  840. found = 0
  841. for i in range(sampler[1]):
  842. if found >= sampler[0]:
  843. break
  844. sample_bbox = generate_sample_bbox(sampler)
  845. if satisfy_sample_constraint(sampler, sample_bbox, gt_bbox,
  846. self.satisfy_all):
  847. sampled_bbox.append(sample_bbox)
  848. found = found + 1
  849. im = np.array(im)
  850. while sampled_bbox:
  851. idx = int(np.random.uniform(0, len(sampled_bbox)))
  852. sample_bbox = sampled_bbox.pop(idx)
  853. sample_bbox = clip_bbox(sample_bbox)
  854. crop_bbox, crop_class, crop_score = \
  855. filter_and_process(sample_bbox, gt_bbox, gt_class, scores=gt_score)
  856. if self.avoid_no_bbox:
  857. if len(crop_bbox) < 1:
  858. continue
  859. xmin = int(sample_bbox[0] * im_width)
  860. xmax = int(sample_bbox[2] * im_width)
  861. ymin = int(sample_bbox[1] * im_height)
  862. ymax = int(sample_bbox[3] * im_height)
  863. im = im[ymin:ymax, xmin:xmax]
  864. sample['image'] = im
  865. sample['gt_bbox'] = crop_bbox
  866. sample['gt_class'] = crop_class
  867. sample['gt_score'] = crop_score
  868. return sample
  869. return sample
  870. @register_op
  871. class CropWithDataAchorSampling(BaseOperator):
  872. def __init__(self,
  873. batch_sampler,
  874. anchor_sampler=None,
  875. target_size=None,
  876. das_anchor_scales=[16, 32, 64, 128],
  877. sampling_prob=0.5,
  878. min_size=8.,
  879. avoid_no_bbox=True):
  880. """
  881. Args:
  882. anchor_sampler (list): anchor_sampling sets of different
  883. parameters for cropping.
  884. batch_sampler (list): Multiple sets of different
  885. parameters for cropping.
  886. e.g.[[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]]
  887. [[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  888. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  889. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  890. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  891. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]]
  892. [max sample, max trial, min scale, max scale,
  893. min aspect ratio, max aspect ratio,
  894. min overlap, max overlap, min coverage, max coverage]
  895. target_size (int): target image size.
  896. das_anchor_scales (list[float]): a list of anchor scales in data
  897. anchor smapling.
  898. min_size (float): minimum size of sampled bbox.
  899. avoid_no_bbox (bool): whether to to avoid the
  900. situation where the box does not appear.
  901. """
  902. super(CropWithDataAchorSampling, self).__init__()
  903. self.anchor_sampler = anchor_sampler
  904. self.batch_sampler = batch_sampler
  905. self.target_size = target_size
  906. self.sampling_prob = sampling_prob
  907. self.min_size = min_size
  908. self.avoid_no_bbox = avoid_no_bbox
  909. self.das_anchor_scales = np.array(das_anchor_scales)
  910. def apply(self, sample, context):
  911. """
  912. Crop the image and modify bounding box.
  913. Operators:
  914. 1. Scale the image width and height.
  915. 2. Crop the image according to a radom sample.
  916. 3. Rescale the bounding box.
  917. 4. Determine if the new bbox is satisfied in the new image.
  918. Returns:
  919. sample: the image, bounding box are replaced.
  920. """
  921. assert 'image' in sample, "image data not found"
  922. im = sample['image']
  923. gt_bbox = sample['gt_bbox']
  924. gt_class = sample['gt_class']
  925. image_height, image_width = im.shape[:2]
  926. gt_bbox[:, 0] /= image_width
  927. gt_bbox[:, 1] /= image_height
  928. gt_bbox[:, 2] /= image_width
  929. gt_bbox[:, 3] /= image_height
  930. gt_score = None
  931. if 'gt_score' in sample:
  932. gt_score = sample['gt_score']
  933. sampled_bbox = []
  934. gt_bbox = gt_bbox.tolist()
  935. prob = np.random.uniform(0., 1.)
  936. if prob > self.sampling_prob: # anchor sampling
  937. assert self.anchor_sampler
  938. for sampler in self.anchor_sampler:
  939. found = 0
  940. for i in range(sampler[1]):
  941. if found >= sampler[0]:
  942. break
  943. sample_bbox = data_anchor_sampling(
  944. gt_bbox, image_width, image_height,
  945. self.das_anchor_scales, self.target_size)
  946. if sample_bbox == 0:
  947. break
  948. if satisfy_sample_constraint_coverage(sampler, sample_bbox,
  949. gt_bbox):
  950. sampled_bbox.append(sample_bbox)
  951. found = found + 1
  952. im = np.array(im)
  953. while sampled_bbox:
  954. idx = int(np.random.uniform(0, len(sampled_bbox)))
  955. sample_bbox = sampled_bbox.pop(idx)
  956. if 'gt_keypoint' in sample.keys():
  957. keypoints = (sample['gt_keypoint'],
  958. sample['keypoint_ignore'])
  959. crop_bbox, crop_class, crop_score, gt_keypoints = \
  960. filter_and_process(sample_bbox, gt_bbox, gt_class,
  961. scores=gt_score,
  962. keypoints=keypoints)
  963. else:
  964. crop_bbox, crop_class, crop_score = filter_and_process(
  965. sample_bbox, gt_bbox, gt_class, scores=gt_score)
  966. crop_bbox, crop_class, crop_score = bbox_area_sampling(
  967. crop_bbox, crop_class, crop_score, self.target_size,
  968. self.min_size)
  969. if self.avoid_no_bbox:
  970. if len(crop_bbox) < 1:
  971. continue
  972. im = crop_image_sampling(im, sample_bbox, image_width,
  973. image_height, self.target_size)
  974. height, width = im.shape[:2]
  975. crop_bbox[:, 0] *= width
  976. crop_bbox[:, 1] *= height
  977. crop_bbox[:, 2] *= width
  978. crop_bbox[:, 3] *= height
  979. sample['image'] = im
  980. sample['gt_bbox'] = crop_bbox
  981. sample['gt_class'] = crop_class
  982. if 'gt_score' in sample:
  983. sample['gt_score'] = crop_score
  984. if 'gt_keypoint' in sample.keys():
  985. sample['gt_keypoint'] = gt_keypoints[0]
  986. sample['keypoint_ignore'] = gt_keypoints[1]
  987. return sample
  988. return sample
  989. else:
  990. for sampler in self.batch_sampler:
  991. found = 0
  992. for i in range(sampler[1]):
  993. if found >= sampler[0]:
  994. break
  995. sample_bbox = generate_sample_bbox_square(
  996. sampler, image_width, image_height)
  997. if satisfy_sample_constraint_coverage(sampler, sample_bbox,
  998. gt_bbox):
  999. sampled_bbox.append(sample_bbox)
  1000. found = found + 1
  1001. im = np.array(im)
  1002. while sampled_bbox:
  1003. idx = int(np.random.uniform(0, len(sampled_bbox)))
  1004. sample_bbox = sampled_bbox.pop(idx)
  1005. sample_bbox = clip_bbox(sample_bbox)
  1006. if 'gt_keypoint' in sample.keys():
  1007. keypoints = (sample['gt_keypoint'],
  1008. sample['keypoint_ignore'])
  1009. crop_bbox, crop_class, crop_score, gt_keypoints = \
  1010. filter_and_process(sample_bbox, gt_bbox, gt_class,
  1011. scores=gt_score,
  1012. keypoints=keypoints)
  1013. else:
  1014. crop_bbox, crop_class, crop_score = filter_and_process(
  1015. sample_bbox, gt_bbox, gt_class, scores=gt_score)
  1016. # sampling bbox according the bbox area
  1017. crop_bbox, crop_class, crop_score = bbox_area_sampling(
  1018. crop_bbox, crop_class, crop_score, self.target_size,
  1019. self.min_size)
  1020. if self.avoid_no_bbox:
  1021. if len(crop_bbox) < 1:
  1022. continue
  1023. xmin = int(sample_bbox[0] * image_width)
  1024. xmax = int(sample_bbox[2] * image_width)
  1025. ymin = int(sample_bbox[1] * image_height)
  1026. ymax = int(sample_bbox[3] * image_height)
  1027. im = im[ymin:ymax, xmin:xmax]
  1028. height, width = im.shape[:2]
  1029. crop_bbox[:, 0] *= width
  1030. crop_bbox[:, 1] *= height
  1031. crop_bbox[:, 2] *= width
  1032. crop_bbox[:, 3] *= height
  1033. sample['image'] = im
  1034. sample['gt_bbox'] = crop_bbox
  1035. sample['gt_class'] = crop_class
  1036. if 'gt_score' in sample:
  1037. sample['gt_score'] = crop_score
  1038. if 'gt_keypoint' in sample.keys():
  1039. sample['gt_keypoint'] = gt_keypoints[0]
  1040. sample['keypoint_ignore'] = gt_keypoints[1]
  1041. return sample
  1042. return sample
  1043. @register_op
  1044. class RandomCrop(BaseOperator):
  1045. """Random crop image and bboxes.
  1046. Args:
  1047. aspect_ratio (list): aspect ratio of cropped region.
  1048. in [min, max] format.
  1049. thresholds (list): iou thresholds for decide a valid bbox crop.
  1050. scaling (list): ratio between a cropped region and the original image.
  1051. in [min, max] format.
  1052. num_attempts (int): number of tries before giving up.
  1053. allow_no_crop (bool): allow return without actually cropping them.
  1054. cover_all_box (bool): ensure all bboxes are covered in the final crop.
  1055. is_mask_crop(bool): whether crop the segmentation.
  1056. """
  1057. def __init__(self,
  1058. aspect_ratio=[.5, 2.],
  1059. thresholds=[.0, .1, .3, .5, .7, .9],
  1060. scaling=[.3, 1.],
  1061. num_attempts=50,
  1062. allow_no_crop=True,
  1063. cover_all_box=False,
  1064. is_mask_crop=False):
  1065. super(RandomCrop, self).__init__()
  1066. self.aspect_ratio = aspect_ratio
  1067. self.thresholds = thresholds
  1068. self.scaling = scaling
  1069. self.num_attempts = num_attempts
  1070. self.allow_no_crop = allow_no_crop
  1071. self.cover_all_box = cover_all_box
  1072. self.is_mask_crop = is_mask_crop
  1073. def crop_segms(self, segms, valid_ids, crop, height, width):
  1074. def _crop_poly(segm, crop):
  1075. xmin, ymin, xmax, ymax = crop
  1076. crop_coord = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]
  1077. crop_p = np.array(crop_coord).reshape(4, 2)
  1078. crop_p = Polygon(crop_p)
  1079. crop_segm = list()
  1080. for poly in segm:
  1081. poly = np.array(poly).reshape(len(poly) // 2, 2)
  1082. polygon = Polygon(poly)
  1083. if not polygon.is_valid:
  1084. exterior = polygon.exterior
  1085. multi_lines = exterior.intersection(exterior)
  1086. polygons = shapely.ops.polygonize(multi_lines)
  1087. polygon = MultiPolygon(polygons)
  1088. multi_polygon = list()
  1089. if isinstance(polygon, MultiPolygon):
  1090. multi_polygon = copy.deepcopy(polygon)
  1091. else:
  1092. multi_polygon.append(copy.deepcopy(polygon))
  1093. for per_polygon in multi_polygon:
  1094. inter = per_polygon.intersection(crop_p)
  1095. if not inter:
  1096. continue
  1097. if isinstance(inter, (MultiPolygon, GeometryCollection)):
  1098. for part in inter:
  1099. if not isinstance(part, Polygon):
  1100. continue
  1101. part = np.squeeze(
  1102. np.array(part.exterior.coords[:-1]).reshape(
  1103. 1, -1))
  1104. part[0::2] -= xmin
  1105. part[1::2] -= ymin
  1106. crop_segm.append(part.tolist())
  1107. elif isinstance(inter, Polygon):
  1108. crop_poly = np.squeeze(
  1109. np.array(inter.exterior.coords[:-1]).reshape(1,
  1110. -1))
  1111. crop_poly[0::2] -= xmin
  1112. crop_poly[1::2] -= ymin
  1113. crop_segm.append(crop_poly.tolist())
  1114. else:
  1115. continue
  1116. return crop_segm
  1117. def _crop_rle(rle, crop, height, width):
  1118. if 'counts' in rle and type(rle['counts']) == list:
  1119. rle = mask_util.frPyObjects(rle, height, width)
  1120. mask = mask_util.decode(rle)
  1121. mask = mask[crop[1]:crop[3], crop[0]:crop[2]]
  1122. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  1123. return rle
  1124. crop_segms = []
  1125. for id in valid_ids:
  1126. segm = segms[id]
  1127. if is_poly(segm):
  1128. import copy
  1129. import shapely.ops
  1130. from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
  1131. logging.getLogger("shapely").setLevel(logging.WARNING)
  1132. # Polygon format
  1133. crop_segms.append(_crop_poly(segm, crop))
  1134. else:
  1135. # RLE format
  1136. import pycocotools.mask as mask_util
  1137. crop_segms.append(_crop_rle(segm, crop, height, width))
  1138. return crop_segms
  1139. def apply(self, sample, context=None):
  1140. if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
  1141. return sample
  1142. h, w = sample['image'].shape[:2]
  1143. gt_bbox = sample['gt_bbox']
  1144. # NOTE Original method attempts to generate one candidate for each
  1145. # threshold then randomly sample one from the resulting list.
  1146. # Here a short circuit approach is taken, i.e., randomly choose a
  1147. # threshold and attempt to find a valid crop, and simply return the
  1148. # first one found.
  1149. # The probability is not exactly the same, kinda resembling the
  1150. # "Monty Hall" problem. Actually carrying out the attempts will affect
  1151. # observability (just like opening doors in the "Monty Hall" game).
  1152. thresholds = list(self.thresholds)
  1153. if self.allow_no_crop:
  1154. thresholds.append('no_crop')
  1155. np.random.shuffle(thresholds)
  1156. for thresh in thresholds:
  1157. if thresh == 'no_crop':
  1158. return sample
  1159. found = False
  1160. for i in range(self.num_attempts):
  1161. scale = np.random.uniform(*self.scaling)
  1162. if self.aspect_ratio is not None:
  1163. min_ar, max_ar = self.aspect_ratio
  1164. aspect_ratio = np.random.uniform(
  1165. max(min_ar, scale**2), min(max_ar, scale**-2))
  1166. h_scale = scale / np.sqrt(aspect_ratio)
  1167. w_scale = scale * np.sqrt(aspect_ratio)
  1168. else:
  1169. h_scale = np.random.uniform(*self.scaling)
  1170. w_scale = np.random.uniform(*self.scaling)
  1171. crop_h = h * h_scale
  1172. crop_w = w * w_scale
  1173. if self.aspect_ratio is None:
  1174. if crop_h / crop_w < 0.5 or crop_h / crop_w > 2.0:
  1175. continue
  1176. crop_h = int(crop_h)
  1177. crop_w = int(crop_w)
  1178. crop_y = np.random.randint(0, h - crop_h)
  1179. crop_x = np.random.randint(0, w - crop_w)
  1180. crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
  1181. iou = self._iou_matrix(
  1182. gt_bbox, np.array(
  1183. [crop_box], dtype=np.float32))
  1184. if iou.max() < thresh:
  1185. continue
  1186. if self.cover_all_box and iou.min() < thresh:
  1187. continue
  1188. cropped_box, valid_ids = self._crop_box_with_center_constraint(
  1189. gt_bbox, np.array(
  1190. crop_box, dtype=np.float32))
  1191. if valid_ids.size > 0:
  1192. found = True
  1193. break
  1194. if found:
  1195. if self.is_mask_crop and 'gt_poly' in sample and len(sample[
  1196. 'gt_poly']) > 0:
  1197. crop_polys = self.crop_segms(
  1198. sample['gt_poly'],
  1199. valid_ids,
  1200. np.array(
  1201. crop_box, dtype=np.int64),
  1202. h,
  1203. w)
  1204. if [] in crop_polys:
  1205. delete_id = list()
  1206. valid_polys = list()
  1207. for id, crop_poly in enumerate(crop_polys):
  1208. if crop_poly == []:
  1209. delete_id.append(id)
  1210. else:
  1211. valid_polys.append(crop_poly)
  1212. valid_ids = np.delete(valid_ids, delete_id)
  1213. if len(valid_polys) == 0:
  1214. return sample
  1215. sample['gt_poly'] = valid_polys
  1216. else:
  1217. sample['gt_poly'] = crop_polys
  1218. if 'gt_segm' in sample:
  1219. sample['gt_segm'] = self._crop_segm(sample['gt_segm'],
  1220. crop_box)
  1221. sample['gt_segm'] = np.take(
  1222. sample['gt_segm'], valid_ids, axis=0)
  1223. sample['image'] = self._crop_image(sample['image'], crop_box)
  1224. sample['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
  1225. sample['gt_class'] = np.take(
  1226. sample['gt_class'], valid_ids, axis=0)
  1227. if 'gt_score' in sample:
  1228. sample['gt_score'] = np.take(
  1229. sample['gt_score'], valid_ids, axis=0)
  1230. if 'is_crowd' in sample:
  1231. sample['is_crowd'] = np.take(
  1232. sample['is_crowd'], valid_ids, axis=0)
  1233. return sample
  1234. return sample
  1235. def _iou_matrix(self, a, b):
  1236. tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
  1237. br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
  1238. area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2)
  1239. area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
  1240. area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
  1241. area_o = (area_a[:, np.newaxis] + area_b - area_i)
  1242. return area_i / (area_o + 1e-10)
  1243. def _crop_box_with_center_constraint(self, box, crop):
  1244. cropped_box = box.copy()
  1245. cropped_box[:, :2] = np.maximum(box[:, :2], crop[:2])
  1246. cropped_box[:, 2:] = np.minimum(box[:, 2:], crop[2:])
  1247. cropped_box[:, :2] -= crop[:2]
  1248. cropped_box[:, 2:] -= crop[:2]
  1249. centers = (box[:, :2] + box[:, 2:]) / 2
  1250. valid = np.logical_and(crop[:2] <= centers,
  1251. centers < crop[2:]).all(axis=1)
  1252. valid = np.logical_and(
  1253. valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
  1254. return cropped_box, np.where(valid)[0]
  1255. def _crop_image(self, img, crop):
  1256. x1, y1, x2, y2 = crop
  1257. return img[y1:y2, x1:x2, :]
  1258. def _crop_segm(self, segm, crop):
  1259. x1, y1, x2, y2 = crop
  1260. return segm[:, y1:y2, x1:x2]
  1261. @register_op
  1262. class RandomScaledCrop(BaseOperator):
  1263. """Resize image and bbox based on long side (with optional random scaling),
  1264. then crop or pad image to target size.
  1265. Args:
  1266. target_dim (int): target size.
  1267. scale_range (list): random scale range.
  1268. interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
  1269. """
  1270. def __init__(self,
  1271. target_dim=512,
  1272. scale_range=[.1, 2.],
  1273. interp=cv2.INTER_LINEAR):
  1274. super(RandomScaledCrop, self).__init__()
  1275. self.target_dim = target_dim
  1276. self.scale_range = scale_range
  1277. self.interp = interp
  1278. def apply(self, sample, context=None):
  1279. img = sample['image']
  1280. h, w = img.shape[:2]
  1281. random_scale = np.random.uniform(*self.scale_range)
  1282. dim = self.target_dim
  1283. random_dim = int(dim * random_scale)
  1284. dim_max = max(h, w)
  1285. scale = random_dim / dim_max
  1286. resize_w = w * scale
  1287. resize_h = h * scale
  1288. offset_x = int(max(0, np.random.uniform(0., resize_w - dim)))
  1289. offset_y = int(max(0, np.random.uniform(0., resize_h - dim)))
  1290. img = cv2.resize(img, (resize_w, resize_h), interpolation=self.interp)
  1291. img = np.array(img)
  1292. canvas = np.zeros((dim, dim, 3), dtype=img.dtype)
  1293. canvas[:min(dim, resize_h), :min(dim, resize_w), :] = img[
  1294. offset_y:offset_y + dim, offset_x:offset_x + dim, :]
  1295. sample['image'] = canvas
  1296. sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
  1297. scale_factor = sample['sacle_factor']
  1298. sample['scale_factor'] = np.asarray(
  1299. [scale_factor[0] * scale, scale_factor[1] * scale],
  1300. dtype=np.float32)
  1301. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  1302. scale_array = np.array([scale, scale] * 2, dtype=np.float32)
  1303. shift_array = np.array([offset_x, offset_y] * 2, dtype=np.float32)
  1304. boxes = sample['gt_bbox'] * scale_array - shift_array
  1305. boxes = np.clip(boxes, 0, dim - 1)
  1306. # filter boxes with no area
  1307. area = np.prod(boxes[..., 2:] - boxes[..., :2], axis=1)
  1308. valid = (area > 1.).nonzero()[0]
  1309. sample['gt_bbox'] = boxes[valid]
  1310. sample['gt_class'] = sample['gt_class'][valid]
  1311. return sample
  1312. @register_op
  1313. class Cutmix(BaseOperator):
  1314. def __init__(self, alpha=1.5, beta=1.5):
  1315. """
  1316. CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
  1317. Cutmix image and gt_bbbox/gt_score
  1318. Args:
  1319. alpha (float): alpha parameter of beta distribute
  1320. beta (float): beta parameter of beta distribute
  1321. """
  1322. super(Cutmix, self).__init__()
  1323. self.alpha = alpha
  1324. self.beta = beta
  1325. if self.alpha <= 0.0:
  1326. raise ValueError("alpha shold be positive in {}".format(self))
  1327. if self.beta <= 0.0:
  1328. raise ValueError("beta shold be positive in {}".format(self))
  1329. def apply_image(self, img1, img2, factor):
  1330. """ _rand_bbox """
  1331. h = max(img1.shape[0], img2.shape[0])
  1332. w = max(img1.shape[1], img2.shape[1])
  1333. cut_rat = np.sqrt(1. - factor)
  1334. cut_w = np.int32(w * cut_rat)
  1335. cut_h = np.int32(h * cut_rat)
  1336. # uniform
  1337. cx = np.random.randint(w)
  1338. cy = np.random.randint(h)
  1339. bbx1 = np.clip(cx - cut_w // 2, 0, w - 1)
  1340. bby1 = np.clip(cy - cut_h // 2, 0, h - 1)
  1341. bbx2 = np.clip(cx + cut_w // 2, 0, w - 1)
  1342. bby2 = np.clip(cy + cut_h // 2, 0, h - 1)
  1343. img_1_pad = np.zeros((h, w, img1.shape[2]), 'float32')
  1344. img_1_pad[:img1.shape[0], :img1.shape[1], :] = \
  1345. img1.astype('float32')
  1346. img_2_pad = np.zeros((h, w, img2.shape[2]), 'float32')
  1347. img_2_pad[:img2.shape[0], :img2.shape[1], :] = \
  1348. img2.astype('float32')
  1349. img_1_pad[bby1:bby2, bbx1:bbx2, :] = img_2_pad[bby1:bby2, bbx1:bbx2, :]
  1350. return img_1_pad
  1351. def __call__(self, sample, context=None):
  1352. if not isinstance(sample, Sequence):
  1353. return sample
  1354. assert len(sample) == 2, 'cutmix need two samples'
  1355. factor = np.random.beta(self.alpha, self.beta)
  1356. factor = max(0.0, min(1.0, factor))
  1357. if factor >= 1.0:
  1358. return sample[0]
  1359. if factor <= 0.0:
  1360. return sample[1]
  1361. img1 = sample[0]['image']
  1362. img2 = sample[1]['image']
  1363. img = self.apply_image(img1, img2, factor)
  1364. gt_bbox1 = sample[0]['gt_bbox']
  1365. gt_bbox2 = sample[1]['gt_bbox']
  1366. gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
  1367. gt_class1 = sample[0]['gt_class']
  1368. gt_class2 = sample[1]['gt_class']
  1369. gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
  1370. gt_score1 = np.ones_like(sample[0]['gt_class'])
  1371. gt_score2 = np.ones_like(sample[1]['gt_class'])
  1372. gt_score = np.concatenate(
  1373. (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
  1374. result = copy.deepcopy(sample[0])
  1375. result['image'] = img
  1376. result['gt_bbox'] = gt_bbox
  1377. result['gt_score'] = gt_score
  1378. result['gt_class'] = gt_class
  1379. if 'is_crowd' in sample[0]:
  1380. is_crowd1 = sample[0]['is_crowd']
  1381. is_crowd2 = sample[1]['is_crowd']
  1382. is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
  1383. result['is_crowd'] = is_crowd
  1384. if 'difficult' in sample[0]:
  1385. is_difficult1 = sample[0]['difficult']
  1386. is_difficult2 = sample[1]['difficult']
  1387. is_difficult = np.concatenate(
  1388. (is_difficult1, is_difficult2), axis=0)
  1389. result['difficult'] = is_difficult
  1390. return result
  1391. @register_op
  1392. class Mixup(BaseOperator):
  1393. def __init__(self, alpha=1.5, beta=1.5):
  1394. """ Mixup image and gt_bbbox/gt_score
  1395. Args:
  1396. alpha (float): alpha parameter of beta distribute
  1397. beta (float): beta parameter of beta distribute
  1398. """
  1399. super(Mixup, self).__init__()
  1400. self.alpha = alpha
  1401. self.beta = beta
  1402. if self.alpha <= 0.0:
  1403. raise ValueError("alpha shold be positive in {}".format(self))
  1404. if self.beta <= 0.0:
  1405. raise ValueError("beta shold be positive in {}".format(self))
  1406. def apply_image(self, img1, img2, factor):
  1407. h = max(img1.shape[0], img2.shape[0])
  1408. w = max(img1.shape[1], img2.shape[1])
  1409. img = np.zeros((h, w, img1.shape[2]), 'float32')
  1410. img[:img1.shape[0], :img1.shape[1], :] = \
  1411. img1.astype('float32') * factor
  1412. img[:img2.shape[0], :img2.shape[1], :] += \
  1413. img2.astype('float32') * (1.0 - factor)
  1414. return img.astype('uint8')
  1415. def __call__(self, sample, context=None):
  1416. if not isinstance(sample, Sequence):
  1417. return sample
  1418. assert len(sample) == 2, 'mixup need two samples'
  1419. factor = np.random.beta(self.alpha, self.beta)
  1420. factor = max(0.0, min(1.0, factor))
  1421. if factor >= 1.0:
  1422. return sample[0]
  1423. if factor <= 0.0:
  1424. return sample[1]
  1425. im = self.apply_image(sample[0]['image'], sample[1]['image'], factor)
  1426. result = copy.deepcopy(sample[0])
  1427. result['image'] = im
  1428. # apply bbox and score
  1429. if 'gt_bbox' in sample[0]:
  1430. gt_bbox1 = sample[0]['gt_bbox']
  1431. gt_bbox2 = sample[1]['gt_bbox']
  1432. gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
  1433. result['gt_bbox'] = gt_bbox
  1434. if 'gt_class' in sample[0]:
  1435. gt_class1 = sample[0]['gt_class']
  1436. gt_class2 = sample[1]['gt_class']
  1437. gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
  1438. result['gt_class'] = gt_class
  1439. gt_score1 = np.ones_like(sample[0]['gt_class'])
  1440. gt_score2 = np.ones_like(sample[1]['gt_class'])
  1441. gt_score = np.concatenate(
  1442. (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
  1443. result['gt_score'] = gt_score
  1444. if 'is_crowd' in sample[0]:
  1445. is_crowd1 = sample[0]['is_crowd']
  1446. is_crowd2 = sample[1]['is_crowd']
  1447. is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
  1448. result['is_crowd'] = is_crowd
  1449. if 'difficult' in sample[0]:
  1450. is_difficult1 = sample[0]['difficult']
  1451. is_difficult2 = sample[1]['difficult']
  1452. is_difficult = np.concatenate(
  1453. (is_difficult1, is_difficult2), axis=0)
  1454. result['difficult'] = is_difficult
  1455. if 'gt_ide' in sample[0]:
  1456. gt_ide1 = sample[0]['gt_ide']
  1457. gt_ide2 = sample[1]['gt_ide']
  1458. gt_ide = np.concatenate((gt_ide1, gt_ide2), axis=0)
  1459. result['gt_ide'] = gt_ide
  1460. return result
  1461. @register_op
  1462. class NormalizeBox(BaseOperator):
  1463. """Transform the bounding box's coornidates to [0,1]."""
  1464. def __init__(self):
  1465. super(NormalizeBox, self).__init__()
  1466. def apply(self, sample, context):
  1467. im = sample['image']
  1468. gt_bbox = sample['gt_bbox']
  1469. height, width, _ = im.shape
  1470. for i in range(gt_bbox.shape[0]):
  1471. gt_bbox[i][0] = gt_bbox[i][0] / width
  1472. gt_bbox[i][1] = gt_bbox[i][1] / height
  1473. gt_bbox[i][2] = gt_bbox[i][2] / width
  1474. gt_bbox[i][3] = gt_bbox[i][3] / height
  1475. sample['gt_bbox'] = gt_bbox
  1476. if 'gt_keypoint' in sample.keys():
  1477. gt_keypoint = sample['gt_keypoint']
  1478. for i in range(gt_keypoint.shape[1]):
  1479. if i % 2:
  1480. gt_keypoint[:, i] = gt_keypoint[:, i] / height
  1481. else:
  1482. gt_keypoint[:, i] = gt_keypoint[:, i] / width
  1483. sample['gt_keypoint'] = gt_keypoint
  1484. return sample
  1485. @register_op
  1486. class BboxXYXY2XYWH(BaseOperator):
  1487. """
  1488. Convert bbox XYXY format to XYWH format.
  1489. """
  1490. def __init__(self):
  1491. super(BboxXYXY2XYWH, self).__init__()
  1492. def apply(self, sample, context=None):
  1493. assert 'gt_bbox' in sample
  1494. bbox = sample['gt_bbox']
  1495. bbox[:, 2:4] = bbox[:, 2:4] - bbox[:, :2]
  1496. bbox[:, :2] = bbox[:, :2] + bbox[:, 2:4] / 2.
  1497. sample['gt_bbox'] = bbox
  1498. return sample
  1499. @register_op
  1500. class PadBox(BaseOperator):
  1501. def __init__(self, num_max_boxes=50):
  1502. """
  1503. Pad zeros to bboxes if number of bboxes is less than num_max_boxes.
  1504. Args:
  1505. num_max_boxes (int): the max number of bboxes
  1506. """
  1507. self.num_max_boxes = num_max_boxes
  1508. super(PadBox, self).__init__()
  1509. def apply(self, sample, context=None):
  1510. assert 'gt_bbox' in sample
  1511. bbox = sample['gt_bbox']
  1512. gt_num = min(self.num_max_boxes, len(bbox))
  1513. num_max = self.num_max_boxes
  1514. # fields = context['fields'] if context else []
  1515. pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
  1516. if gt_num > 0:
  1517. pad_bbox[:gt_num, :] = bbox[:gt_num, :]
  1518. sample['gt_bbox'] = pad_bbox
  1519. if 'gt_class' in sample:
  1520. pad_class = np.zeros((num_max, ), dtype=np.int32)
  1521. if gt_num > 0:
  1522. pad_class[:gt_num] = sample['gt_class'][:gt_num, 0]
  1523. sample['gt_class'] = pad_class
  1524. if 'gt_score' in sample:
  1525. pad_score = np.zeros((num_max, ), dtype=np.float32)
  1526. if gt_num > 0:
  1527. pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
  1528. sample['gt_score'] = pad_score
  1529. # in training, for example in op ExpandImage,
  1530. # the bbox and gt_class is expandded, but the difficult is not,
  1531. # so, judging by it's length
  1532. if 'difficult' in sample:
  1533. pad_diff = np.zeros((num_max, ), dtype=np.int32)
  1534. if gt_num > 0:
  1535. pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
  1536. sample['difficult'] = pad_diff
  1537. if 'is_crowd' in sample:
  1538. pad_crowd = np.zeros((num_max, ), dtype=np.int32)
  1539. if gt_num > 0:
  1540. pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
  1541. sample['is_crowd'] = pad_crowd
  1542. if 'gt_ide' in sample:
  1543. pad_ide = np.zeros((num_max, ), dtype=np.int32)
  1544. if gt_num > 0:
  1545. pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
  1546. sample['gt_ide'] = pad_ide
  1547. return sample
  1548. @register_op
  1549. class DebugVisibleImage(BaseOperator):
  1550. """
  1551. In debug mode, visualize images according to `gt_box`.
  1552. (Currently only supported when not cropping and flipping image.)
  1553. """
  1554. def __init__(self, output_dir='output/debug', is_normalized=False):
  1555. super(DebugVisibleImage, self).__init__()
  1556. self.is_normalized = is_normalized
  1557. self.output_dir = output_dir
  1558. if not os.path.isdir(output_dir):
  1559. os.makedirs(output_dir)
  1560. if not isinstance(self.is_normalized, bool):
  1561. raise TypeError("{}: input type is invalid.".format(self))
  1562. def apply(self, sample, context=None):
  1563. image = Image.fromarray(sample['image'].astype(np.uint8))
  1564. out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
  1565. width = sample['w']
  1566. height = sample['h']
  1567. gt_bbox = sample['gt_bbox']
  1568. gt_class = sample['gt_class']
  1569. draw = ImageDraw.Draw(image)
  1570. for i in range(gt_bbox.shape[0]):
  1571. if self.is_normalized:
  1572. gt_bbox[i][0] = gt_bbox[i][0] * width
  1573. gt_bbox[i][1] = gt_bbox[i][1] * height
  1574. gt_bbox[i][2] = gt_bbox[i][2] * width
  1575. gt_bbox[i][3] = gt_bbox[i][3] * height
  1576. xmin, ymin, xmax, ymax = gt_bbox[i]
  1577. draw.line(
  1578. [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
  1579. (xmin, ymin)],
  1580. width=2,
  1581. fill='green')
  1582. # draw label
  1583. text = str(gt_class[i][0])
  1584. tw, th = draw.textsize(text)
  1585. draw.rectangle(
  1586. [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
  1587. draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
  1588. if 'gt_keypoint' in sample.keys():
  1589. gt_keypoint = sample['gt_keypoint']
  1590. if self.is_normalized:
  1591. for i in range(gt_keypoint.shape[1]):
  1592. if i % 2:
  1593. gt_keypoint[:, i] = gt_keypoint[:, i] * height
  1594. else:
  1595. gt_keypoint[:, i] = gt_keypoint[:, i] * width
  1596. for i in range(gt_keypoint.shape[0]):
  1597. keypoint = gt_keypoint[i]
  1598. for j in range(int(keypoint.shape[0] / 2)):
  1599. x1 = round(keypoint[2 * j]).astype(np.int32)
  1600. y1 = round(keypoint[2 * j + 1]).astype(np.int32)
  1601. draw.ellipse(
  1602. (x1, y1, x1 + 5, y1 + 5),
  1603. fill='green',
  1604. outline='green')
  1605. save_path = os.path.join(self.output_dir, out_file_name)
  1606. image.save(save_path, quality=95)
  1607. return sample
  1608. @register_op
  1609. class Pad(BaseOperator):
  1610. def __init__(self,
  1611. size=None,
  1612. size_divisor=32,
  1613. pad_mode=0,
  1614. offsets=None,
  1615. fill_value=(127.5, 127.5, 127.5)):
  1616. """
  1617. Pad image to a specified size or multiple of size_divisor.
  1618. Args:
  1619. size (int, Sequence): image target size, if None, pad to multiple of size_divisor, default None
  1620. size_divisor (int): size divisor, default 32
  1621. pad_mode (int): pad mode, currently only supports four modes [-1, 0, 1, 2]. if -1, use specified offsets
  1622. if 0, only pad to right and bottom. if 1, pad according to center. if 2, only pad left and top
  1623. offsets (list): [offset_x, offset_y], specify offset while padding, only supported pad_mode=-1
  1624. fill_value (bool): rgb value of pad area, default (127.5, 127.5, 127.5)
  1625. """
  1626. super(Pad, self).__init__()
  1627. if not isinstance(size, (int, Sequence)):
  1628. raise TypeError(
  1629. "Type of target_size is invalid when random_size is True. \
  1630. Must be List, now is {}".format(type(size)))
  1631. if isinstance(size, int):
  1632. size = [size, size]
  1633. assert pad_mode in [
  1634. -1, 0, 1, 2
  1635. ], 'currently only supports four modes [-1, 0, 1, 2]'
  1636. if pad_mode == -1:
  1637. assert offsets, 'if pad_mode is -1, offsets should not be None'
  1638. self.size = size
  1639. self.size_divisor = size_divisor
  1640. self.pad_mode = pad_mode
  1641. self.fill_value = fill_value
  1642. self.offsets = offsets
  1643. def apply_segm(self, segms, offsets, im_size, size):
  1644. def _expand_poly(poly, x, y):
  1645. expanded_poly = np.array(poly)
  1646. expanded_poly[0::2] += x
  1647. expanded_poly[1::2] += y
  1648. return expanded_poly.tolist()
  1649. def _expand_rle(rle, x, y, height, width, h, w):
  1650. if 'counts' in rle and type(rle['counts']) == list:
  1651. rle = mask_util.frPyObjects(rle, height, width)
  1652. mask = mask_util.decode(rle)
  1653. expanded_mask = np.full((h, w), 0).astype(mask.dtype)
  1654. expanded_mask[y:y + height, x:x + width] = mask
  1655. rle = mask_util.encode(
  1656. np.array(
  1657. expanded_mask, order='F', dtype=np.uint8))
  1658. return rle
  1659. x, y = offsets
  1660. height, width = im_size
  1661. h, w = size
  1662. expanded_segms = []
  1663. for segm in segms:
  1664. if is_poly(segm):
  1665. # Polygon format
  1666. expanded_segms.append(
  1667. [_expand_poly(poly, x, y) for poly in segm])
  1668. else:
  1669. # RLE format
  1670. import pycocotools.mask as mask_util
  1671. expanded_segms.append(
  1672. _expand_rle(segm, x, y, height, width, h, w))
  1673. return expanded_segms
  1674. def apply_bbox(self, bbox, offsets):
  1675. return bbox + np.array(offsets * 2, dtype=np.float32)
  1676. def apply_keypoint(self, keypoints, offsets):
  1677. n = len(keypoints[0]) // 2
  1678. return keypoints + np.array(offsets * n, dtype=np.float32)
  1679. def apply_image(self, image, offsets, im_size, size):
  1680. x, y = offsets
  1681. im_h, im_w = im_size
  1682. h, w = size
  1683. canvas = np.ones((h, w, 3), dtype=np.float32)
  1684. canvas *= np.array(self.fill_value, dtype=np.float32)
  1685. canvas[y:y + im_h, x:x + im_w, :] = image.astype(np.float32)
  1686. return canvas
  1687. def apply(self, sample, context=None):
  1688. im = sample['image']
  1689. im_h, im_w = im.shape[:2]
  1690. if self.size:
  1691. h, w = self.size
  1692. assert (
  1693. im_h < h and im_w < w
  1694. ), '(h, w) of target size should be greater than (im_h, im_w)'
  1695. else:
  1696. h = np.ceil(im_h / self.size_divisor) * self.size_divisor
  1697. w = np.ceil(im_w / self.size_divisor) * self.size_divisor
  1698. if h == im_h and w == im_w:
  1699. return sample
  1700. if self.pad_mode == -1:
  1701. offset_x, offset_y = self.offsets
  1702. elif self.pad_mode == 0:
  1703. offset_y, offset_x = 0, 0
  1704. elif self.pad_mode == 1:
  1705. offset_y, offset_x = (h - im_h) // 2, (w - im_w) // 2
  1706. else:
  1707. offset_y, offset_x = h - im_h, w - im_w
  1708. offsets, im_size, size = [offset_x, offset_y], [im_h, im_w], [h, w]
  1709. sample['image'] = self.apply_image(im, offsets, im_size, size)
  1710. if self.pad_mode == 0:
  1711. return sample
  1712. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  1713. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], offsets)
  1714. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  1715. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], offsets,
  1716. im_size, size)
  1717. if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
  1718. sample['gt_keypoint'] = self.apply_keypoint(sample['gt_keypoint'],
  1719. offsets)
  1720. return sample
  1721. @register_op
  1722. class Poly2Mask(BaseOperator):
  1723. """
  1724. gt poly to mask annotations
  1725. """
  1726. def __init__(self):
  1727. super(Poly2Mask, self).__init__()
  1728. import pycocotools.mask as maskUtils
  1729. self.maskutils = maskUtils
  1730. def _poly2mask(self, mask_ann, img_h, img_w):
  1731. if isinstance(mask_ann, list):
  1732. # polygon -- a single object might consist of multiple parts
  1733. # we merge all parts into one mask rle code
  1734. rles = self.maskutils.frPyObjects(mask_ann, img_h, img_w)
  1735. rle = self.maskutils.merge(rles)
  1736. elif isinstance(mask_ann['counts'], list):
  1737. # uncompressed RLE
  1738. rle = self.maskutils.frPyObjects(mask_ann, img_h, img_w)
  1739. else:
  1740. # rle
  1741. rle = mask_ann
  1742. mask = self.maskutils.decode(rle)
  1743. return mask
  1744. def apply(self, sample, context=None):
  1745. assert 'gt_poly' in sample
  1746. im_h = sample['h']
  1747. im_w = sample['w']
  1748. masks = [
  1749. self._poly2mask(gt_poly, im_h, im_w)
  1750. for gt_poly in sample['gt_poly']
  1751. ]
  1752. sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
  1753. return sample
  1754. @register_op
  1755. class Rbox2Poly(BaseOperator):
  1756. """
  1757. Convert rbbox format to poly format.
  1758. """
  1759. def __init__(self):
  1760. super(Rbox2Poly, self).__init__()
  1761. def apply(self, sample, context=None):
  1762. assert 'gt_rbox' in sample
  1763. assert sample['gt_rbox'].shape[1] == 5
  1764. rrects = sample['gt_rbox']
  1765. x_ctr = rrects[:, 0]
  1766. y_ctr = rrects[:, 1]
  1767. width = rrects[:, 2]
  1768. height = rrects[:, 3]
  1769. x1 = x_ctr - width / 2.0
  1770. y1 = y_ctr - height / 2.0
  1771. x2 = x_ctr + width / 2.0
  1772. y2 = y_ctr + height / 2.0
  1773. sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
  1774. polys = bbox_utils.rbox2poly_np(rrects)
  1775. sample['gt_rbox2poly'] = polys
  1776. return sample
  1777. @register_op
  1778. class AugmentHSV(BaseOperator):
  1779. def __init__(self, fraction=0.50, is_bgr=True):
  1780. """
  1781. Augment the SV channel of image data.
  1782. Args:
  1783. fraction (float): the fraction for augment. Default: 0.5.
  1784. is_bgr (bool): whether the image is BGR mode. Default: True.
  1785. """
  1786. super(AugmentHSV, self).__init__()
  1787. self.fraction = fraction
  1788. self.is_bgr = is_bgr
  1789. def apply(self, sample, context=None):
  1790. img = sample['image']
  1791. if self.is_bgr:
  1792. img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
  1793. else:
  1794. img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
  1795. S = img_hsv[:, :, 1].astype(np.float32)
  1796. V = img_hsv[:, :, 2].astype(np.float32)
  1797. a = (random.random() * 2 - 1) * self.fraction + 1
  1798. S *= a
  1799. if a > 1:
  1800. np.clip(S, a_min=0, a_max=255, out=S)
  1801. a = (random.random() * 2 - 1) * self.fraction + 1
  1802. V *= a
  1803. if a > 1:
  1804. np.clip(V, a_min=0, a_max=255, out=V)
  1805. img_hsv[:, :, 1] = S.astype(np.uint8)
  1806. img_hsv[:, :, 2] = V.astype(np.uint8)
  1807. if self.is_bgr:
  1808. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
  1809. else:
  1810. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB, dst=img)
  1811. sample['image'] = img
  1812. return sample
  1813. @register_op
  1814. class Norm2PixelBbox(BaseOperator):
  1815. """
  1816. Transform the bounding box's coornidates which is in [0,1] to pixels.
  1817. """
  1818. def __init__(self):
  1819. super(Norm2PixelBbox, self).__init__()
  1820. def apply(self, sample, context=None):
  1821. assert 'gt_bbox' in sample
  1822. bbox = sample['gt_bbox']
  1823. height, width = sample['image'].shape[:2]
  1824. bbox[:, 0::2] = bbox[:, 0::2] * width
  1825. bbox[:, 1::2] = bbox[:, 1::2] * height
  1826. sample['gt_bbox'] = bbox
  1827. return sample
  1828. @register_op
  1829. class BboxCXCYWH2XYXY(BaseOperator):
  1830. """
  1831. Convert bbox CXCYWH format to XYXY format.
  1832. [center_x, center_y, width, height] -> [x0, y0, x1, y1]
  1833. """
  1834. def __init__(self):
  1835. super(BboxCXCYWH2XYXY, self).__init__()
  1836. def apply(self, sample, context=None):
  1837. assert 'gt_bbox' in sample
  1838. bbox0 = sample['gt_bbox']
  1839. bbox = bbox0.copy()
  1840. bbox[:, :2] = bbox0[:, :2] - bbox0[:, 2:4] / 2.
  1841. bbox[:, 2:4] = bbox0[:, :2] + bbox0[:, 2:4] / 2.
  1842. sample['gt_bbox'] = bbox
  1843. return sample
  1844. @register_op
  1845. class RandomResizeCrop(BaseOperator):
  1846. """Random resize and crop image and bboxes.
  1847. Args:
  1848. resizes (list): resize image to one of resizes. if keep_ratio is True and mode is
  1849. 'long', resize the image's long side to the maximum of target_size, if keep_ratio is
  1850. True and mode is 'short', resize the image's short side to the minimum of target_size.
  1851. cropsizes (list): crop sizes after resize, [(min_crop_1, max_crop_1), ...]
  1852. mode (str): resize mode, `long` or `short`. Details see resizes.
  1853. prob (float): probability of this op.
  1854. keep_ratio (bool): whether keep_ratio or not, default true
  1855. interp (int): the interpolation method
  1856. thresholds (list): iou thresholds for decide a valid bbox crop.
  1857. num_attempts (int): number of tries before giving up.
  1858. allow_no_crop (bool): allow return without actually cropping them.
  1859. cover_all_box (bool): ensure all bboxes are covered in the final crop.
  1860. is_mask_crop(bool): whether crop the segmentation.
  1861. """
  1862. def __init__(
  1863. self,
  1864. resizes,
  1865. cropsizes,
  1866. prob=0.5,
  1867. mode='short',
  1868. keep_ratio=True,
  1869. interp=cv2.INTER_LINEAR,
  1870. num_attempts=3,
  1871. cover_all_box=False,
  1872. allow_no_crop=False,
  1873. thresholds=[0.3, 0.5, 0.7],
  1874. is_mask_crop=False, ):
  1875. super(RandomResizeCrop, self).__init__()
  1876. self.resizes = resizes
  1877. self.cropsizes = cropsizes
  1878. self.prob = prob
  1879. self.mode = mode
  1880. self.resizer = Resize(0, keep_ratio=keep_ratio, interp=interp)
  1881. self.croper = RandomCrop(
  1882. num_attempts=num_attempts,
  1883. cover_all_box=cover_all_box,
  1884. thresholds=thresholds,
  1885. allow_no_crop=allow_no_crop,
  1886. is_mask_crop=is_mask_crop)
  1887. def _format_size(self, size):
  1888. if isinstance(size, Integral):
  1889. size = (size, size)
  1890. return size
  1891. def apply(self, sample, context=None):
  1892. if random.random() < self.prob:
  1893. _resize = self._format_size(random.choice(self.resizes))
  1894. _cropsize = self._format_size(random.choice(self.cropsizes))
  1895. sample = self._resize(
  1896. self.resizer,
  1897. sample,
  1898. size=_resize,
  1899. mode=self.mode,
  1900. context=context)
  1901. sample = self._random_crop(
  1902. self.croper, sample, size=_cropsize, context=context)
  1903. return sample
  1904. @staticmethod
  1905. def _random_crop(croper, sample, size, context=None):
  1906. if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
  1907. return sample
  1908. self = croper
  1909. h, w = sample['image'].shape[:2]
  1910. gt_bbox = sample['gt_bbox']
  1911. cropsize = size
  1912. min_crop = min(cropsize)
  1913. max_crop = max(cropsize)
  1914. thresholds = list(self.thresholds)
  1915. np.random.shuffle(thresholds)
  1916. for thresh in thresholds:
  1917. found = False
  1918. for _ in range(self.num_attempts):
  1919. crop_h = random.randint(min_crop, min(h, max_crop))
  1920. crop_w = random.randint(min_crop, min(w, max_crop))
  1921. crop_y = random.randint(0, h - crop_h)
  1922. crop_x = random.randint(0, w - crop_w)
  1923. crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
  1924. iou = self._iou_matrix(
  1925. gt_bbox, np.array(
  1926. [crop_box], dtype=np.float32))
  1927. if iou.max() < thresh:
  1928. continue
  1929. if self.cover_all_box and iou.min() < thresh:
  1930. continue
  1931. cropped_box, valid_ids = self._crop_box_with_center_constraint(
  1932. gt_bbox, np.array(
  1933. crop_box, dtype=np.float32))
  1934. if valid_ids.size > 0:
  1935. found = True
  1936. break
  1937. if found:
  1938. if self.is_mask_crop and 'gt_poly' in sample and len(sample[
  1939. 'gt_poly']) > 0:
  1940. crop_polys = self.crop_segms(
  1941. sample['gt_poly'],
  1942. valid_ids,
  1943. np.array(
  1944. crop_box, dtype=np.int64),
  1945. h,
  1946. w)
  1947. if [] in crop_polys:
  1948. delete_id = list()
  1949. valid_polys = list()
  1950. for id, crop_poly in enumerate(crop_polys):
  1951. if crop_poly == []:
  1952. delete_id.append(id)
  1953. else:
  1954. valid_polys.append(crop_poly)
  1955. valid_ids = np.delete(valid_ids, delete_id)
  1956. if len(valid_polys) == 0:
  1957. return sample
  1958. sample['gt_poly'] = valid_polys
  1959. else:
  1960. sample['gt_poly'] = crop_polys
  1961. if 'gt_segm' in sample:
  1962. sample['gt_segm'] = self._crop_segm(sample['gt_segm'],
  1963. crop_box)
  1964. sample['gt_segm'] = np.take(
  1965. sample['gt_segm'], valid_ids, axis=0)
  1966. sample['image'] = self._crop_image(sample['image'], crop_box)
  1967. sample['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
  1968. sample['gt_class'] = np.take(
  1969. sample['gt_class'], valid_ids, axis=0)
  1970. if 'gt_score' in sample:
  1971. sample['gt_score'] = np.take(
  1972. sample['gt_score'], valid_ids, axis=0)
  1973. if 'is_crowd' in sample:
  1974. sample['is_crowd'] = np.take(
  1975. sample['is_crowd'], valid_ids, axis=0)
  1976. return sample
  1977. return sample
  1978. @staticmethod
  1979. def _resize(resizer, sample, size, mode='short', context=None):
  1980. self = resizer
  1981. im = sample['image']
  1982. target_size = size
  1983. if not isinstance(im, np.ndarray):
  1984. raise TypeError("{}: image type is not numpy.".format(self))
  1985. if len(im.shape) != 3:
  1986. raise ImageError('{}: image is not 3-dimensional.'.format(self))
  1987. # apply image
  1988. im_shape = im.shape
  1989. if self.keep_ratio:
  1990. im_size_min = np.min(im_shape[0:2])
  1991. im_size_max = np.max(im_shape[0:2])
  1992. target_size_min = np.min(target_size)
  1993. target_size_max = np.max(target_size)
  1994. if mode == 'long':
  1995. im_scale = min(target_size_min / im_size_min,
  1996. target_size_max / im_size_max)
  1997. else:
  1998. im_scale = max(target_size_min / im_size_min,
  1999. target_size_max / im_size_max)
  2000. resize_h = im_scale * float(im_shape[0])
  2001. resize_w = im_scale * float(im_shape[1])
  2002. im_scale_x = im_scale
  2003. im_scale_y = im_scale
  2004. else:
  2005. resize_h, resize_w = target_size
  2006. im_scale_y = resize_h / im_shape[0]
  2007. im_scale_x = resize_w / im_shape[1]
  2008. im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
  2009. sample['image'] = im
  2010. sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
  2011. if 'scale_factor' in sample:
  2012. scale_factor = sample['scale_factor']
  2013. sample['scale_factor'] = np.asarray(
  2014. [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
  2015. dtype=np.float32)
  2016. else:
  2017. sample['scale_factor'] = np.asarray(
  2018. [im_scale_y, im_scale_x], dtype=np.float32)
  2019. # apply bbox
  2020. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  2021. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'],
  2022. [im_scale_x, im_scale_y],
  2023. [resize_w, resize_h])
  2024. # apply rbox
  2025. if 'gt_rbox2poly' in sample:
  2026. if np.array(sample['gt_rbox2poly']).shape[1] != 8:
  2027. logger.warn(
  2028. "gt_rbox2poly's length shoule be 8, but actually is {}".
  2029. format(len(sample['gt_rbox2poly'])))
  2030. sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
  2031. [im_scale_x, im_scale_y],
  2032. [resize_w, resize_h])
  2033. # apply polygon
  2034. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  2035. sample['gt_poly'] = self.apply_segm(
  2036. sample['gt_poly'], im_shape[:2], [im_scale_x, im_scale_y])
  2037. # apply semantic
  2038. if 'semantic' in sample and sample['semantic']:
  2039. semantic = sample['semantic']
  2040. semantic = cv2.resize(
  2041. semantic.astype('float32'),
  2042. None,
  2043. None,
  2044. fx=im_scale_x,
  2045. fy=im_scale_y,
  2046. interpolation=self.interp)
  2047. semantic = np.asarray(semantic).astype('int32')
  2048. semantic = np.expand_dims(semantic, 0)
  2049. sample['semantic'] = semantic
  2050. # apply gt_segm
  2051. if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
  2052. masks = [
  2053. cv2.resize(
  2054. gt_segm,
  2055. None,
  2056. None,
  2057. fx=im_scale_x,
  2058. fy=im_scale_y,
  2059. interpolation=cv2.INTER_NEAREST)
  2060. for gt_segm in sample['gt_segm']
  2061. ]
  2062. sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
  2063. return sample
  2064. class RandomPerspective(BaseOperator):
  2065. """
  2066. Rotate, tranlate, scale, shear and perspect image and bboxes randomly,
  2067. refer to https://github.com/ultralytics/yolov5/blob/develop/utils/datasets.py
  2068. Args:
  2069. degree (int): rotation degree, uniformly sampled in [-degree, degree]
  2070. translate (float): translate fraction, translate_x and translate_y are uniformly sampled
  2071. in [0.5 - translate, 0.5 + translate]
  2072. scale (float): scale factor, uniformly sampled in [1 - scale, 1 + scale]
  2073. shear (int): shear degree, shear_x and shear_y are uniformly sampled in [-shear, shear]
  2074. perspective (float): perspective_x and perspective_y are uniformly sampled in [-perspective, perspective]
  2075. area_thr (float): the area threshold of bbox to be kept after transformation, default 0.25
  2076. fill_value (tuple): value used in case of a constant border, default (114, 114, 114)
  2077. """
  2078. def __init__(self,
  2079. degree=10,
  2080. translate=0.1,
  2081. scale=0.1,
  2082. shear=10,
  2083. perspective=0.0,
  2084. border=[0, 0],
  2085. area_thr=0.25,
  2086. fill_value=(114, 114, 114)):
  2087. super(RandomPerspective, self).__init__()
  2088. self.degree = degree
  2089. self.translate = translate
  2090. self.scale = scale
  2091. self.shear = shear
  2092. self.perspective = perspective
  2093. self.border = border
  2094. self.area_thr = area_thr
  2095. self.fill_value = fill_value
  2096. def apply(self, sample, context=None):
  2097. im = sample['image']
  2098. height = im.shape[0] + self.border[0] * 2
  2099. width = im.shape[1] + self.border[1] * 2
  2100. # center
  2101. C = np.eye(3)
  2102. C[0, 2] = -im.shape[1] / 2
  2103. C[1, 2] = -im.shape[0] / 2
  2104. # perspective
  2105. P = np.eye(3)
  2106. P[2, 0] = random.uniform(-self.perspective, self.perspective)
  2107. P[2, 1] = random.uniform(-self.perspective, self.perspective)
  2108. # Rotation and scale
  2109. R = np.eye(3)
  2110. a = random.uniform(-self.degree, self.degree)
  2111. s = random.uniform(1 - self.scale, 1 + self.scale)
  2112. R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
  2113. # Shear
  2114. S = np.eye(3)
  2115. # shear x (deg)
  2116. S[0, 1] = math.tan(
  2117. random.uniform(-self.shear, self.shear) * math.pi / 180)
  2118. # shear y (deg)
  2119. S[1, 0] = math.tan(
  2120. random.uniform(-self.shear, self.shear) * math.pi / 180)
  2121. # Translation
  2122. T = np.eye(3)
  2123. T[0, 2] = random.uniform(0.5 - self.translate,
  2124. 0.5 + self.translate) * width
  2125. T[1, 2] = random.uniform(0.5 - self.translate,
  2126. 0.5 + self.translate) * height
  2127. # matmul
  2128. # M = T @ S @ R @ P @ C
  2129. M = np.eye(3)
  2130. for cM in [T, S, R, P, C]:
  2131. M = np.matmul(M, cM)
  2132. if (self.border[0] != 0) or (self.border[1] != 0) or (
  2133. M != np.eye(3)).any():
  2134. if self.perspective:
  2135. im = cv2.warpPerspective(
  2136. im, M, dsize=(width, height), borderValue=self.fill_value)
  2137. else:
  2138. im = cv2.warpAffine(
  2139. im,
  2140. M[:2],
  2141. dsize=(width, height),
  2142. borderValue=self.fill_value)
  2143. sample['image'] = im
  2144. if sample['gt_bbox'].shape[0] > 0:
  2145. sample = transform_bbox(
  2146. sample,
  2147. M,
  2148. width,
  2149. height,
  2150. area_thr=self.area_thr,
  2151. perspective=self.perspective)
  2152. return sample
  2153. @register_op
  2154. class Mosaic(BaseOperator):
  2155. """
  2156. Mosaic Data Augmentation, refer to https://github.com/ultralytics/yolov5/blob/develop/utils/datasets.py
  2157. """
  2158. def __init__(self,
  2159. target_size,
  2160. mosaic_border=None,
  2161. fill_value=(114, 114, 114)):
  2162. super(Mosaic, self).__init__()
  2163. self.target_size = target_size
  2164. if mosaic_border is None:
  2165. mosaic_border = (-target_size // 2, -target_size // 2)
  2166. self.mosaic_border = mosaic_border
  2167. self.fill_value = fill_value
  2168. def __call__(self, sample, context=None):
  2169. if not isinstance(sample, Sequence):
  2170. return sample
  2171. s = self.target_size
  2172. yc, xc = [
  2173. int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border
  2174. ]
  2175. boxes = [x['gt_bbox'] for x in sample]
  2176. labels = [x['gt_class'] for x in sample]
  2177. for i in range(len(sample)):
  2178. im = sample[i]['image']
  2179. h, w, c = im.shape
  2180. if i == 0: # top left
  2181. image = np.ones(
  2182. (s * 2, s * 2, c), dtype=np.uint8) * self.fill_value
  2183. # xmin, ymin, xmax, ymax (dst image)
  2184. x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc
  2185. # xmin, ymin, xmax, ymax (src image)
  2186. x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h
  2187. elif i == 1: # top right
  2188. x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
  2189. x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
  2190. elif i == 2: # bottom left
  2191. x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
  2192. x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(
  2193. y2a - y1a, h)
  2194. elif i == 3: # bottom right
  2195. x1a, y1a, x2a, y2a = xc, yc, min(xc + w,
  2196. s * 2), min(s * 2, yc + h)
  2197. x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
  2198. image[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b]
  2199. padw = x1a - x1b
  2200. padh = y1a - y1b
  2201. boxes[i] = boxes[i] + (padw, padh, padw, padh)
  2202. boxes = np.concatenate(boxes, axis=0)
  2203. boxes = np.clip(boxes, 0, s * 2)
  2204. labels = np.concatenate(labels, axis=0)
  2205. if 'is_crowd' in sample[0]:
  2206. is_crowd = np.concatenate([x['is_crowd'] for x in sample], axis=0)
  2207. if 'difficult' in sample[0]:
  2208. difficult = np.concatenate(
  2209. [x['difficult'] for x in sample], axis=0)
  2210. sample = sample[0]
  2211. sample['image'] = image.astype(np.uint8)
  2212. sample['gt_bbox'] = boxes
  2213. sample['gt_class'] = labels
  2214. if 'is_crowd' in sample:
  2215. sample['is_crowd'] = is_crowd
  2216. if 'difficult' in sample:
  2217. sample['difficult'] = difficult
  2218. return sample
  2219. @register_op
  2220. class RandomSelect(BaseOperator):
  2221. """
  2222. Randomly choose a transformation between transforms1 and transforms2,
  2223. and the probability of choosing transforms1 is p.
  2224. """
  2225. def __init__(self, transforms1, transforms2, p=0.5):
  2226. super(RandomSelect, self).__init__()
  2227. self.transforms1 = Compose(transforms1)
  2228. self.transforms2 = Compose(transforms2)
  2229. self.p = p
  2230. def apply(self, sample, context=None):
  2231. if random.random() < self.p:
  2232. return self.transforms1(sample)
  2233. return self.transforms2(sample)
  2234. @register_op
  2235. class RandomShortSideResize(BaseOperator):
  2236. def __init__(self,
  2237. short_side_sizes,
  2238. max_size=None,
  2239. interp=cv2.INTER_LINEAR,
  2240. random_interp=False):
  2241. """
  2242. Resize the image randomly according to the short side. If max_size is not None,
  2243. the long side is scaled according to max_size. The whole process will be keep ratio.
  2244. Args:
  2245. short_side_sizes (list|tuple): Image target short side size.
  2246. max_size (int): The size of the longest side of image after resize.
  2247. interp (int): The interpolation method.
  2248. random_interp (bool): Whether random select interpolation method.
  2249. """
  2250. super(RandomShortSideResize, self).__init__()
  2251. assert isinstance(short_side_sizes,
  2252. Sequence), "short_side_sizes must be List or Tuple"
  2253. self.short_side_sizes = short_side_sizes
  2254. self.max_size = max_size
  2255. self.interp = interp
  2256. self.random_interp = random_interp
  2257. self.interps = [
  2258. cv2.INTER_NEAREST,
  2259. cv2.INTER_LINEAR,
  2260. cv2.INTER_AREA,
  2261. cv2.INTER_CUBIC,
  2262. cv2.INTER_LANCZOS4,
  2263. ]
  2264. def get_size_with_aspect_ratio(self, image_shape, size, max_size=None):
  2265. h, w = image_shape
  2266. if max_size is not None:
  2267. min_original_size = float(min((w, h)))
  2268. max_original_size = float(max((w, h)))
  2269. if max_original_size / min_original_size * size > max_size:
  2270. size = int(
  2271. round(max_size * min_original_size / max_original_size))
  2272. if (w <= h and w == size) or (h <= w and h == size):
  2273. return (w, h)
  2274. if w < h:
  2275. ow = size
  2276. oh = int(size * h / w)
  2277. else:
  2278. oh = size
  2279. ow = int(size * w / h)
  2280. return (ow, oh)
  2281. def resize(self,
  2282. sample,
  2283. target_size,
  2284. max_size=None,
  2285. interp=cv2.INTER_LINEAR):
  2286. im = sample['image']
  2287. if not isinstance(im, np.ndarray):
  2288. raise TypeError("{}: image type is not numpy.".format(self))
  2289. if len(im.shape) != 3:
  2290. raise ImageError('{}: image is not 3-dimensional.'.format(self))
  2291. target_size = self.get_size_with_aspect_ratio(im.shape[:2],
  2292. target_size, max_size)
  2293. im_scale_y, im_scale_x = target_size[1] / im.shape[0], target_size[
  2294. 0] / im.shape[1]
  2295. sample['image'] = cv2.resize(im, target_size, interpolation=interp)
  2296. sample['im_shape'] = np.asarray(target_size[::-1], dtype=np.float32)
  2297. if 'scale_factor' in sample:
  2298. scale_factor = sample['scale_factor']
  2299. sample['scale_factor'] = np.asarray(
  2300. [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
  2301. dtype=np.float32)
  2302. else:
  2303. sample['scale_factor'] = np.asarray(
  2304. [im_scale_y, im_scale_x], dtype=np.float32)
  2305. # apply bbox
  2306. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  2307. sample['gt_bbox'] = self.apply_bbox(
  2308. sample['gt_bbox'], [im_scale_x, im_scale_y], target_size)
  2309. # apply polygon
  2310. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  2311. sample['gt_poly'] = self.apply_segm(
  2312. sample['gt_poly'], im.shape[:2], [im_scale_x, im_scale_y])
  2313. # apply semantic
  2314. if 'semantic' in sample and sample['semantic']:
  2315. semantic = sample['semantic']
  2316. semantic = cv2.resize(
  2317. semantic.astype('float32'),
  2318. target_size,
  2319. interpolation=self.interp)
  2320. semantic = np.asarray(semantic).astype('int32')
  2321. semantic = np.expand_dims(semantic, 0)
  2322. sample['semantic'] = semantic
  2323. # apply gt_segm
  2324. if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
  2325. masks = [
  2326. cv2.resize(
  2327. gt_segm, target_size, interpolation=cv2.INTER_NEAREST)
  2328. for gt_segm in sample['gt_segm']
  2329. ]
  2330. sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
  2331. return sample
  2332. def apply_bbox(self, bbox, scale, size):
  2333. im_scale_x, im_scale_y = scale
  2334. resize_w, resize_h = size
  2335. bbox[:, 0::2] *= im_scale_x
  2336. bbox[:, 1::2] *= im_scale_y
  2337. bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w)
  2338. bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h)
  2339. return bbox.astype('float32')
  2340. def apply_segm(self, segms, im_size, scale):
  2341. def _resize_poly(poly, im_scale_x, im_scale_y):
  2342. resized_poly = np.array(poly).astype('float32')
  2343. resized_poly[0::2] *= im_scale_x
  2344. resized_poly[1::2] *= im_scale_y
  2345. return resized_poly.tolist()
  2346. def _resize_rle(rle, im_h, im_w, im_scale_x, im_scale_y):
  2347. if 'counts' in rle and type(rle['counts']) == list:
  2348. rle = mask_util.frPyObjects(rle, im_h, im_w)
  2349. mask = mask_util.decode(rle)
  2350. mask = cv2.resize(
  2351. mask,
  2352. None,
  2353. None,
  2354. fx=im_scale_x,
  2355. fy=im_scale_y,
  2356. interpolation=self.interp)
  2357. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  2358. return rle
  2359. im_h, im_w = im_size
  2360. im_scale_x, im_scale_y = scale
  2361. resized_segms = []
  2362. for segm in segms:
  2363. if is_poly(segm):
  2364. # Polygon format
  2365. resized_segms.append([
  2366. _resize_poly(poly, im_scale_x, im_scale_y) for poly in segm
  2367. ])
  2368. else:
  2369. # RLE format
  2370. import pycocotools.mask as mask_util
  2371. resized_segms.append(
  2372. _resize_rle(segm, im_h, im_w, im_scale_x, im_scale_y))
  2373. return resized_segms
  2374. def apply(self, sample, context=None):
  2375. target_size = random.choice(self.short_side_sizes)
  2376. interp = random.choice(
  2377. self.interps) if self.random_interp else self.interp
  2378. return self.resize(sample, target_size, self.max_size, interp)
  2379. @register_op
  2380. class RandomSizeCrop(BaseOperator):
  2381. """
  2382. Cut the image randomly according to `min_size` and `max_size`
  2383. """
  2384. def __init__(self, min_size, max_size):
  2385. super(RandomSizeCrop, self).__init__()
  2386. self.min_size = min_size
  2387. self.max_size = max_size
  2388. from paddle.vision.transforms.functional import crop as paddle_crop
  2389. self.paddle_crop = paddle_crop
  2390. @staticmethod
  2391. def get_crop_params(img_shape, output_size):
  2392. """Get parameters for ``crop`` for a random crop.
  2393. Args:
  2394. img_shape (list|tuple): Image's height and width.
  2395. output_size (list|tuple): Expected output size of the crop.
  2396. Returns:
  2397. tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
  2398. """
  2399. h, w = img_shape
  2400. th, tw = output_size
  2401. if h + 1 < th or w + 1 < tw:
  2402. raise ValueError(
  2403. "Required crop size {} is larger then input image size {}".
  2404. format((th, tw), (h, w)))
  2405. if w == tw and h == th:
  2406. return 0, 0, h, w
  2407. i = random.randint(0, h - th + 1)
  2408. j = random.randint(0, w - tw + 1)
  2409. return i, j, th, tw
  2410. def crop(self, sample, region):
  2411. image_shape = sample['image'].shape[:2]
  2412. sample['image'] = self.paddle_crop(sample['image'], *region)
  2413. keep_index = None
  2414. # apply bbox
  2415. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  2416. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], region)
  2417. bbox = sample['gt_bbox'].reshape([-1, 2, 2])
  2418. area = (bbox[:, 1, :] - bbox[:, 0, :]).prod(axis=1)
  2419. keep_index = np.where(area > 0)[0]
  2420. sample['gt_bbox'] = sample['gt_bbox'][keep_index] if len(
  2421. keep_index) > 0 else np.zeros(
  2422. [0, 4], dtype=np.float32)
  2423. sample['gt_class'] = sample['gt_class'][keep_index] if len(
  2424. keep_index) > 0 else np.zeros(
  2425. [0, 1], dtype=np.float32)
  2426. if 'gt_score' in sample:
  2427. sample['gt_score'] = sample['gt_score'][keep_index] if len(
  2428. keep_index) > 0 else np.zeros(
  2429. [0, 1], dtype=np.float32)
  2430. if 'is_crowd' in sample:
  2431. sample['is_crowd'] = sample['is_crowd'][keep_index] if len(
  2432. keep_index) > 0 else np.zeros(
  2433. [0, 1], dtype=np.float32)
  2434. # apply polygon
  2435. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  2436. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], region,
  2437. image_shape)
  2438. if keep_index is not None:
  2439. sample['gt_poly'] = sample['gt_poly'][keep_index]
  2440. # apply gt_segm
  2441. if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
  2442. i, j, h, w = region
  2443. sample['gt_segm'] = sample['gt_segm'][:, i:i + h, j:j + w]
  2444. if keep_index is not None:
  2445. sample['gt_segm'] = sample['gt_segm'][keep_index]
  2446. return sample
  2447. def apply_bbox(self, bbox, region):
  2448. i, j, h, w = region
  2449. region_size = np.asarray([w, h])
  2450. crop_bbox = bbox - np.asarray([j, i, j, i])
  2451. crop_bbox = np.minimum(crop_bbox.reshape([-1, 2, 2]), region_size)
  2452. crop_bbox = crop_bbox.clip(min=0)
  2453. return crop_bbox.reshape([-1, 4]).astype('float32')
  2454. def apply_segm(self, segms, region, image_shape):
  2455. def _crop_poly(segm, crop):
  2456. xmin, ymin, xmax, ymax = crop
  2457. crop_coord = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]
  2458. crop_p = np.array(crop_coord).reshape(4, 2)
  2459. crop_p = Polygon(crop_p)
  2460. crop_segm = list()
  2461. for poly in segm:
  2462. poly = np.array(poly).reshape(len(poly) // 2, 2)
  2463. polygon = Polygon(poly)
  2464. if not polygon.is_valid:
  2465. exterior = polygon.exterior
  2466. multi_lines = exterior.intersection(exterior)
  2467. polygons = shapely.ops.polygonize(multi_lines)
  2468. polygon = MultiPolygon(polygons)
  2469. multi_polygon = list()
  2470. if isinstance(polygon, MultiPolygon):
  2471. multi_polygon = copy.deepcopy(polygon)
  2472. else:
  2473. multi_polygon.append(copy.deepcopy(polygon))
  2474. for per_polygon in multi_polygon:
  2475. inter = per_polygon.intersection(crop_p)
  2476. if not inter:
  2477. continue
  2478. if isinstance(inter, (MultiPolygon, GeometryCollection)):
  2479. for part in inter:
  2480. if not isinstance(part, Polygon):
  2481. continue
  2482. part = np.squeeze(
  2483. np.array(part.exterior.coords[:-1]).reshape(
  2484. 1, -1))
  2485. part[0::2] -= xmin
  2486. part[1::2] -= ymin
  2487. crop_segm.append(part.tolist())
  2488. elif isinstance(inter, Polygon):
  2489. crop_poly = np.squeeze(
  2490. np.array(inter.exterior.coords[:-1]).reshape(1,
  2491. -1))
  2492. crop_poly[0::2] -= xmin
  2493. crop_poly[1::2] -= ymin
  2494. crop_segm.append(crop_poly.tolist())
  2495. else:
  2496. continue
  2497. return crop_segm
  2498. def _crop_rle(rle, crop, height, width):
  2499. if 'counts' in rle and type(rle['counts']) == list:
  2500. rle = mask_util.frPyObjects(rle, height, width)
  2501. mask = mask_util.decode(rle)
  2502. mask = mask[crop[1]:crop[3], crop[0]:crop[2]]
  2503. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  2504. return rle
  2505. i, j, h, w = region
  2506. crop = [j, i, j + w, i + h]
  2507. height, width = image_shape
  2508. crop_segms = []
  2509. for segm in segms:
  2510. if is_poly(segm):
  2511. import copy
  2512. import shapely.ops
  2513. from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
  2514. # Polygon format
  2515. crop_segms.append(_crop_poly(segm, crop))
  2516. else:
  2517. # RLE format
  2518. import pycocotools.mask as mask_util
  2519. crop_segms.append(_crop_rle(segm, crop, height, width))
  2520. return crop_segms
  2521. def apply(self, sample, context=None):
  2522. h = random.randint(self.min_size,
  2523. min(sample['image'].shape[0], self.max_size))
  2524. w = random.randint(self.min_size,
  2525. min(sample['image'].shape[1], self.max_size))
  2526. region = self.get_crop_params(sample['image'].shape[:2], [h, w])
  2527. return self.crop(sample, region)