operators.py 110 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # function:
  15. # operators to process sample,
  16. # eg: decode/resize/crop image
  17. from __future__ import absolute_import
  18. from __future__ import print_function
  19. from __future__ import division
  20. try:
  21. from collections.abc import Sequence
  22. except Exception:
  23. from collections import Sequence
  24. from numbers import Number, Integral
  25. import uuid
  26. import random
  27. import math
  28. import numpy as np
  29. import os
  30. import copy
  31. import logging
  32. import cv2
  33. from PIL import Image, ImageDraw
  34. import pickle
  35. import threading
  36. MUTEX = threading.Lock()
  37. from paddlex.ppdet.core.workspace import serializable
  38. from paddlex.ppdet.modeling import bbox_utils
  39. from ..reader import Compose
  40. from .op_helper import (satisfy_sample_constraint, filter_and_process,
  41. generate_sample_bbox, clip_bbox, data_anchor_sampling,
  42. satisfy_sample_constraint_coverage,
  43. crop_image_sampling, generate_sample_bbox_square,
  44. bbox_area_sampling, is_poly, transform_bbox)
  45. from paddlex.ppdet.utils.logger import setup_logger
  46. logger = setup_logger(__name__)
  47. registered_ops = []
  48. def register_op(cls):
  49. registered_ops.append(cls.__name__)
  50. if not hasattr(BaseOperator, cls.__name__):
  51. setattr(BaseOperator, cls.__name__, cls)
  52. else:
  53. raise KeyError("The {} class has been registered.".format(
  54. cls.__name__))
  55. return serializable(cls)
  56. class BboxError(ValueError):
  57. pass
  58. class ImageError(ValueError):
  59. pass
  60. class BaseOperator(object):
  61. def __init__(self, name=None):
  62. if name is None:
  63. name = self.__class__.__name__
  64. self._id = name + '_' + str(uuid.uuid4())[-6:]
  65. def apply(self, sample, context=None):
  66. """ Process a sample.
  67. Args:
  68. sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
  69. context (dict): info about this sample processing
  70. Returns:
  71. result (dict): a processed sample
  72. """
  73. return sample
  74. def __call__(self, sample, context=None):
  75. """ Process a sample.
  76. Args:
  77. sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
  78. context (dict): info about this sample processing
  79. Returns:
  80. result (dict): a processed sample
  81. """
  82. if isinstance(sample, Sequence):
  83. for i in range(len(sample)):
  84. sample[i] = self.apply(sample[i], context)
  85. else:
  86. sample = self.apply(sample, context)
  87. return sample
  88. def __str__(self):
  89. return str(self._id)
  90. @register_op
  91. class Decode(BaseOperator):
  92. def __init__(self):
  93. """ Transform the image data to numpy format following the rgb format
  94. """
  95. super(Decode, self).__init__()
  96. def apply(self, sample, context=None):
  97. """ load image if 'im_file' field is not empty but 'image' is"""
  98. if 'image' not in sample:
  99. with open(sample['im_file'], 'rb') as f:
  100. sample['image'] = f.read()
  101. sample.pop('im_file')
  102. im = sample['image']
  103. data = np.frombuffer(im, dtype='uint8')
  104. im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
  105. if 'keep_ori_im' in sample and sample['keep_ori_im']:
  106. sample['ori_image'] = im
  107. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  108. sample['image'] = im
  109. if 'h' not in sample:
  110. sample['h'] = im.shape[0]
  111. elif sample['h'] != im.shape[0]:
  112. logger.warning(
  113. "The actual image height: {} is not equal to the "
  114. "height: {} in annotation, and update sample['h'] by actual "
  115. "image height.".format(im.shape[0], sample['h']))
  116. sample['h'] = im.shape[0]
  117. if 'w' not in sample:
  118. sample['w'] = im.shape[1]
  119. elif sample['w'] != im.shape[1]:
  120. logger.warning(
  121. "The actual image width: {} is not equal to the "
  122. "width: {} in annotation, and update sample['w'] by actual "
  123. "image width.".format(im.shape[1], sample['w']))
  124. sample['w'] = im.shape[1]
  125. sample['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
  126. sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
  127. return sample
  128. def _make_dirs(dirname):
  129. try:
  130. from pathlib import Path
  131. except ImportError:
  132. from pathlib2 import Path
  133. Path(dirname).mkdir(exist_ok=True)
  134. @register_op
  135. class DecodeCache(BaseOperator):
  136. def __init__(self, cache_root=None):
  137. '''decode image and caching
  138. '''
  139. super(DecodeCache, self).__init__()
  140. self.use_cache = False if cache_root is None else True
  141. self.cache_root = cache_root
  142. if cache_root is not None:
  143. _make_dirs(cache_root)
  144. def apply(self, sample, context=None):
  145. if self.use_cache and os.path.exists(
  146. self.cache_path(self.cache_root, sample['im_file'])):
  147. path = self.cache_path(self.cache_root, sample['im_file'])
  148. im = self.load(path)
  149. else:
  150. if 'image' not in sample:
  151. with open(sample['im_file'], 'rb') as f:
  152. sample['image'] = f.read()
  153. im = sample['image']
  154. data = np.frombuffer(im, dtype='uint8')
  155. im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
  156. if 'keep_ori_im' in sample and sample['keep_ori_im']:
  157. sample['ori_image'] = im
  158. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  159. if self.use_cache and not os.path.exists(
  160. self.cache_path(self.cache_root, sample['im_file'])):
  161. path = self.cache_path(self.cache_root, sample['im_file'])
  162. self.dump(im, path)
  163. sample['image'] = im
  164. sample['h'] = im.shape[0]
  165. sample['w'] = im.shape[1]
  166. sample['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
  167. sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
  168. return sample
  169. @staticmethod
  170. def cache_path(dir_oot, im_file):
  171. return os.path.join(dir_oot, os.path.basename(im_file) + '.pkl')
  172. @staticmethod
  173. def load(path):
  174. with open(path, 'rb') as f:
  175. im = pickle.load(f)
  176. return im
  177. @staticmethod
  178. def dump(obj, path):
  179. MUTEX.acquire()
  180. try:
  181. with open(path, 'wb') as f:
  182. pickle.dump(obj, f)
  183. except Exception as e:
  184. logger.warning('dump {} occurs exception {}'.format(path, str(e)))
  185. finally:
  186. MUTEX.release()
  187. @register_op
  188. class Permute(BaseOperator):
  189. def __init__(self):
  190. """
  191. Change the channel to be (C, H, W)
  192. """
  193. super(Permute, self).__init__()
  194. def apply(self, sample, context=None):
  195. im = sample['image']
  196. im = im.transpose((2, 0, 1))
  197. sample['image'] = im
  198. return sample
  199. @register_op
  200. class Lighting(BaseOperator):
  201. """
  202. Lighting the image by eigenvalues and eigenvectors
  203. Args:
  204. eigval (list): eigenvalues
  205. eigvec (list): eigenvectors
  206. alphastd (float): random weight of lighting, 0.1 by default
  207. """
  208. def __init__(self, eigval, eigvec, alphastd=0.1):
  209. super(Lighting, self).__init__()
  210. self.alphastd = alphastd
  211. self.eigval = np.array(eigval).astype('float32')
  212. self.eigvec = np.array(eigvec).astype('float32')
  213. def apply(self, sample, context=None):
  214. alpha = np.random.normal(scale=self.alphastd, size=(3, ))
  215. sample['image'] += np.dot(self.eigvec, self.eigval * alpha)
  216. return sample
  217. @register_op
  218. class RandomErasingImage(BaseOperator):
  219. def __init__(self, prob=0.5, lower=0.02, higher=0.4, aspect_ratio=0.3):
  220. """
  221. Random Erasing Data Augmentation, see https://arxiv.org/abs/1708.04896
  222. Args:
  223. prob (float): probability to carry out random erasing
  224. lower (float): lower limit of the erasing area ratio
  225. higher (float): upper limit of the erasing area ratio
  226. aspect_ratio (float): aspect ratio of the erasing region
  227. """
  228. super(RandomErasingImage, self).__init__()
  229. self.prob = prob
  230. self.lower = lower
  231. self.higher = higher
  232. self.aspect_ratio = aspect_ratio
  233. def apply(self, sample):
  234. gt_bbox = sample['gt_bbox']
  235. im = sample['image']
  236. if not isinstance(im, np.ndarray):
  237. raise TypeError("{}: image is not a numpy array.".format(self))
  238. if len(im.shape) != 3:
  239. raise ImageError("{}: image is not 3-dimensional.".format(self))
  240. for idx in range(gt_bbox.shape[0]):
  241. if self.prob <= np.random.rand():
  242. continue
  243. x1, y1, x2, y2 = gt_bbox[idx, :]
  244. w_bbox = x2 - x1
  245. h_bbox = y2 - y1
  246. area = w_bbox * h_bbox
  247. target_area = random.uniform(self.lower, self.higher) * area
  248. aspect_ratio = random.uniform(self.aspect_ratio,
  249. 1 / self.aspect_ratio)
  250. h = int(round(math.sqrt(target_area * aspect_ratio)))
  251. w = int(round(math.sqrt(target_area / aspect_ratio)))
  252. if w < w_bbox and h < h_bbox:
  253. off_y1 = random.randint(0, int(h_bbox - h))
  254. off_x1 = random.randint(0, int(w_bbox - w))
  255. im[int(y1 + off_y1):int(y1 + off_y1 + h), int(x1 + off_x1):int(
  256. x1 + off_x1 + w), :] = 0
  257. sample['image'] = im
  258. return sample
  259. @register_op
  260. class NormalizeImage(BaseOperator):
  261. def __init__(self,
  262. mean=[0.485, 0.456, 0.406],
  263. std=[1, 1, 1],
  264. is_scale=True):
  265. """
  266. Args:
  267. mean (list): the pixel mean
  268. std (list): the pixel variance
  269. """
  270. super(NormalizeImage, self).__init__()
  271. self.mean = mean
  272. self.std = std
  273. self.is_scale = is_scale
  274. if not (isinstance(self.mean, list) and isinstance(self.std, list) and
  275. isinstance(self.is_scale, bool)):
  276. raise TypeError("{}: input type is invalid.".format(self))
  277. from functools import reduce
  278. if reduce(lambda x, y: x * y, self.std) == 0:
  279. raise ValueError('{}: std is invalid!'.format(self))
  280. def apply(self, sample, context=None):
  281. """Normalize the image.
  282. Operators:
  283. 1.(optional) Scale the image to [0,1]
  284. 2. Each pixel minus mean and is divided by std
  285. """
  286. im = sample['image']
  287. im = im.astype(np.float32, copy=False)
  288. mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
  289. std = np.array(self.std)[np.newaxis, np.newaxis, :]
  290. if self.is_scale:
  291. im = im / 255.0
  292. im -= mean
  293. im /= std
  294. sample['image'] = im
  295. return sample
  296. @register_op
  297. class GridMask(BaseOperator):
  298. def __init__(self,
  299. use_h=True,
  300. use_w=True,
  301. rotate=1,
  302. offset=False,
  303. ratio=0.5,
  304. mode=1,
  305. prob=0.7,
  306. upper_iter=360000):
  307. """
  308. GridMask Data Augmentation, see https://arxiv.org/abs/2001.04086
  309. Args:
  310. use_h (bool): whether to mask vertically
  311. use_w (boo;): whether to mask horizontally
  312. rotate (float): angle for the mask to rotate
  313. offset (float): mask offset
  314. ratio (float): mask ratio
  315. mode (int): gridmask mode
  316. prob (float): max probability to carry out gridmask
  317. upper_iter (int): suggested to be equal to global max_iter
  318. """
  319. super(GridMask, self).__init__()
  320. self.use_h = use_h
  321. self.use_w = use_w
  322. self.rotate = rotate
  323. self.offset = offset
  324. self.ratio = ratio
  325. self.mode = mode
  326. self.prob = prob
  327. self.upper_iter = upper_iter
  328. from .gridmask_utils import Gridmask
  329. self.gridmask_op = Gridmask(
  330. use_h,
  331. use_w,
  332. rotate=rotate,
  333. offset=offset,
  334. ratio=ratio,
  335. mode=mode,
  336. prob=prob,
  337. upper_iter=upper_iter)
  338. def apply(self, sample, context=None):
  339. sample['image'] = self.gridmask_op(sample['image'],
  340. sample['curr_iter'])
  341. return sample
  342. @register_op
  343. class RandomDistort(BaseOperator):
  344. """Random color distortion.
  345. Args:
  346. hue (list): hue settings. in [lower, upper, probability] format.
  347. saturation (list): saturation settings. in [lower, upper, probability] format.
  348. contrast (list): contrast settings. in [lower, upper, probability] format.
  349. brightness (list): brightness settings. in [lower, upper, probability] format.
  350. random_apply (bool): whether to apply in random (yolo) or fixed (SSD)
  351. order.
  352. count (int): the number of doing distrot
  353. random_channel (bool): whether to swap channels randomly
  354. """
  355. def __init__(self,
  356. hue=[-18, 18, 0.5],
  357. saturation=[0.5, 1.5, 0.5],
  358. contrast=[0.5, 1.5, 0.5],
  359. brightness=[0.5, 1.5, 0.5],
  360. random_apply=True,
  361. count=4,
  362. random_channel=False):
  363. super(RandomDistort, self).__init__()
  364. self.hue = hue
  365. self.saturation = saturation
  366. self.contrast = contrast
  367. self.brightness = brightness
  368. self.random_apply = random_apply
  369. self.count = count
  370. self.random_channel = random_channel
  371. def apply_hue(self, img):
  372. low, high, prob = self.hue
  373. if np.random.uniform(0., 1.) < prob:
  374. return img
  375. img = img.astype(np.float32)
  376. # it works, but result differ from HSV version
  377. delta = np.random.uniform(low, high)
  378. u = np.cos(delta * np.pi)
  379. w = np.sin(delta * np.pi)
  380. bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]])
  381. tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321],
  382. [0.211, -0.523, 0.311]])
  383. ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
  384. [1.0, -1.107, 1.705]])
  385. t = np.dot(np.dot(ityiq, bt), tyiq).T
  386. img = np.dot(img, t)
  387. return img
  388. def apply_saturation(self, img):
  389. low, high, prob = self.saturation
  390. if np.random.uniform(0., 1.) < prob:
  391. return img
  392. delta = np.random.uniform(low, high)
  393. img = img.astype(np.float32)
  394. # it works, but result differ from HSV version
  395. gray = img * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
  396. gray = gray.sum(axis=2, keepdims=True)
  397. gray *= (1.0 - delta)
  398. img *= delta
  399. img += gray
  400. return img
  401. def apply_contrast(self, img):
  402. low, high, prob = self.contrast
  403. if np.random.uniform(0., 1.) < prob:
  404. return img
  405. delta = np.random.uniform(low, high)
  406. img = img.astype(np.float32)
  407. img *= delta
  408. return img
  409. def apply_brightness(self, img):
  410. low, high, prob = self.brightness
  411. if np.random.uniform(0., 1.) < prob:
  412. return img
  413. delta = np.random.uniform(low, high)
  414. img = img.astype(np.float32)
  415. img += delta
  416. return img
  417. def apply(self, sample, context=None):
  418. img = sample['image']
  419. if self.random_apply:
  420. functions = [
  421. self.apply_brightness, self.apply_contrast,
  422. self.apply_saturation, self.apply_hue
  423. ]
  424. distortions = np.random.permutation(functions)[:self.count]
  425. for func in distortions:
  426. img = func(img)
  427. sample['image'] = img
  428. return sample
  429. img = self.apply_brightness(img)
  430. mode = np.random.randint(0, 2)
  431. if mode:
  432. img = self.apply_contrast(img)
  433. img = self.apply_saturation(img)
  434. img = self.apply_hue(img)
  435. if not mode:
  436. img = self.apply_contrast(img)
  437. if self.random_channel:
  438. if np.random.randint(0, 2):
  439. img = img[..., np.random.permutation(3)]
  440. sample['image'] = img
  441. return sample
  442. @register_op
  443. class AutoAugment(BaseOperator):
  444. def __init__(self, autoaug_type="v1"):
  445. """
  446. Args:
  447. autoaug_type (str): autoaug type, support v0, v1, v2, v3, test
  448. """
  449. super(AutoAugment, self).__init__()
  450. self.autoaug_type = autoaug_type
  451. def apply(self, sample, context=None):
  452. """
  453. Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172
  454. """
  455. im = sample['image']
  456. gt_bbox = sample['gt_bbox']
  457. if not isinstance(im, np.ndarray):
  458. raise TypeError("{}: image is not a numpy array.".format(self))
  459. if len(im.shape) != 3:
  460. raise ImageError("{}: image is not 3-dimensional.".format(self))
  461. if len(gt_bbox) == 0:
  462. return sample
  463. height, width, _ = im.shape
  464. norm_gt_bbox = np.ones_like(gt_bbox, dtype=np.float32)
  465. norm_gt_bbox[:, 0] = gt_bbox[:, 1] / float(height)
  466. norm_gt_bbox[:, 1] = gt_bbox[:, 0] / float(width)
  467. norm_gt_bbox[:, 2] = gt_bbox[:, 3] / float(height)
  468. norm_gt_bbox[:, 3] = gt_bbox[:, 2] / float(width)
  469. from .autoaugment_utils import distort_image_with_autoaugment
  470. im, norm_gt_bbox = distort_image_with_autoaugment(im, norm_gt_bbox,
  471. self.autoaug_type)
  472. gt_bbox[:, 0] = norm_gt_bbox[:, 1] * float(width)
  473. gt_bbox[:, 1] = norm_gt_bbox[:, 0] * float(height)
  474. gt_bbox[:, 2] = norm_gt_bbox[:, 3] * float(width)
  475. gt_bbox[:, 3] = norm_gt_bbox[:, 2] * float(height)
  476. sample['image'] = im
  477. sample['gt_bbox'] = gt_bbox
  478. return sample
  479. @register_op
  480. class RandomFlip(BaseOperator):
  481. def __init__(self, prob=0.5):
  482. """
  483. Args:
  484. prob (float): the probability of flipping image
  485. """
  486. super(RandomFlip, self).__init__()
  487. self.prob = prob
  488. if not (isinstance(self.prob, float)):
  489. raise TypeError("{}: input type is invalid.".format(self))
  490. def apply_segm(self, segms, height, width):
  491. def _flip_poly(poly, width):
  492. flipped_poly = np.array(poly)
  493. flipped_poly[0::2] = width - np.array(poly[0::2])
  494. return flipped_poly.tolist()
  495. def _flip_rle(rle, height, width):
  496. if 'counts' in rle and type(rle['counts']) == list:
  497. rle = mask_util.frPyObjects(rle, height, width)
  498. mask = mask_util.decode(rle)
  499. mask = mask[:, ::-1]
  500. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  501. return rle
  502. flipped_segms = []
  503. for segm in segms:
  504. if is_poly(segm):
  505. # Polygon format
  506. flipped_segms.append(
  507. [_flip_poly(poly, width) for poly in segm])
  508. else:
  509. # RLE format
  510. import pycocotools.mask as mask_util
  511. flipped_segms.append(_flip_rle(segm, height, width))
  512. return flipped_segms
  513. def apply_keypoint(self, gt_keypoint, width):
  514. for i in range(gt_keypoint.shape[1]):
  515. if i % 2 == 0:
  516. old_x = gt_keypoint[:, i].copy()
  517. gt_keypoint[:, i] = width - old_x
  518. return gt_keypoint
  519. def apply_image(self, image):
  520. return image[:, ::-1, :]
  521. def apply_bbox(self, bbox, width):
  522. oldx1 = bbox[:, 0].copy()
  523. oldx2 = bbox[:, 2].copy()
  524. bbox[:, 0] = width - oldx2
  525. bbox[:, 2] = width - oldx1
  526. return bbox
  527. def apply_rbox(self, bbox, width):
  528. oldx1 = bbox[:, 0].copy()
  529. oldx2 = bbox[:, 2].copy()
  530. oldx3 = bbox[:, 4].copy()
  531. oldx4 = bbox[:, 6].copy()
  532. bbox[:, 0] = width - oldx1
  533. bbox[:, 2] = width - oldx2
  534. bbox[:, 4] = width - oldx3
  535. bbox[:, 6] = width - oldx4
  536. bbox = [bbox_utils.get_best_begin_point_single(e) for e in bbox]
  537. return bbox
  538. def apply(self, sample, context=None):
  539. """Filp the image and bounding box.
  540. Operators:
  541. 1. Flip the image numpy.
  542. 2. Transform the bboxes' x coordinates.
  543. (Must judge whether the coordinates are normalized!)
  544. 3. Transform the segmentations' x coordinates.
  545. (Must judge whether the coordinates are normalized!)
  546. Output:
  547. sample: the image, bounding box and segmentation part
  548. in sample are flipped.
  549. """
  550. if np.random.uniform(0, 1) < self.prob:
  551. im = sample['image']
  552. height, width = im.shape[:2]
  553. im = self.apply_image(im)
  554. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  555. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], width)
  556. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  557. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], height,
  558. width)
  559. if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
  560. sample['gt_keypoint'] = self.apply_keypoint(
  561. sample['gt_keypoint'], width)
  562. if 'semantic' in sample and sample['semantic']:
  563. sample['semantic'] = sample['semantic'][:, ::-1]
  564. if 'gt_segm' in sample and sample['gt_segm'].any():
  565. sample['gt_segm'] = sample['gt_segm'][:, :, ::-1]
  566. if 'gt_rbox2poly' in sample and sample['gt_rbox2poly'].any():
  567. sample['gt_rbox2poly'] = self.apply_rbox(
  568. sample['gt_rbox2poly'], width)
  569. sample['flipped'] = True
  570. sample['image'] = im
  571. return sample
  572. @register_op
  573. class Resize(BaseOperator):
  574. def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
  575. """
  576. Resize image to target size. if keep_ratio is True,
  577. resize the image's long side to the maximum of target_size
  578. if keep_ratio is False, resize the image to target size(h, w)
  579. Args:
  580. target_size (int|list): image target size
  581. keep_ratio (bool): whether keep_ratio or not, default true
  582. interp (int): the interpolation method
  583. """
  584. super(Resize, self).__init__()
  585. self.keep_ratio = keep_ratio
  586. self.interp = interp
  587. if not isinstance(target_size, (Integral, Sequence)):
  588. raise TypeError(
  589. "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
  590. format(type(target_size)))
  591. if isinstance(target_size, Integral):
  592. target_size = [target_size, target_size]
  593. self.target_size = target_size
  594. def apply_image(self, image, scale):
  595. im_scale_x, im_scale_y = scale
  596. return cv2.resize(
  597. image,
  598. None,
  599. None,
  600. fx=im_scale_x,
  601. fy=im_scale_y,
  602. interpolation=self.interp)
  603. def apply_bbox(self, bbox, scale, size):
  604. im_scale_x, im_scale_y = scale
  605. resize_w, resize_h = size
  606. bbox[:, 0::2] *= im_scale_x
  607. bbox[:, 1::2] *= im_scale_y
  608. bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w)
  609. bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h)
  610. return bbox
  611. def apply_segm(self, segms, im_size, scale):
  612. def _resize_poly(poly, im_scale_x, im_scale_y):
  613. resized_poly = np.array(poly).astype('float32')
  614. resized_poly[0::2] *= im_scale_x
  615. resized_poly[1::2] *= im_scale_y
  616. return resized_poly.tolist()
  617. def _resize_rle(rle, im_h, im_w, im_scale_x, im_scale_y):
  618. if 'counts' in rle and type(rle['counts']) == list:
  619. rle = mask_util.frPyObjects(rle, im_h, im_w)
  620. mask = mask_util.decode(rle)
  621. mask = cv2.resize(
  622. mask,
  623. None,
  624. None,
  625. fx=im_scale_x,
  626. fy=im_scale_y,
  627. interpolation=self.interp)
  628. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  629. return rle
  630. im_h, im_w = im_size
  631. im_scale_x, im_scale_y = scale
  632. resized_segms = []
  633. for segm in segms:
  634. if is_poly(segm):
  635. # Polygon format
  636. resized_segms.append([
  637. _resize_poly(poly, im_scale_x, im_scale_y) for poly in segm
  638. ])
  639. else:
  640. # RLE format
  641. import pycocotools.mask as mask_util
  642. resized_segms.append(
  643. _resize_rle(segm, im_h, im_w, im_scale_x, im_scale_y))
  644. return resized_segms
  645. def apply(self, sample, context=None):
  646. """ Resize the image numpy.
  647. """
  648. im = sample['image']
  649. if not isinstance(im, np.ndarray):
  650. raise TypeError("{}: image type is not numpy.".format(self))
  651. if len(im.shape) != 3:
  652. raise ImageError('{}: image is not 3-dimensional.'.format(self))
  653. # apply image
  654. im_shape = im.shape
  655. if self.keep_ratio:
  656. im_size_min = np.min(im_shape[0:2])
  657. im_size_max = np.max(im_shape[0:2])
  658. target_size_min = np.min(self.target_size)
  659. target_size_max = np.max(self.target_size)
  660. im_scale = min(target_size_min / im_size_min,
  661. target_size_max / im_size_max)
  662. resize_h = im_scale * float(im_shape[0])
  663. resize_w = im_scale * float(im_shape[1])
  664. im_scale_x = im_scale
  665. im_scale_y = im_scale
  666. else:
  667. resize_h, resize_w = self.target_size
  668. im_scale_y = resize_h / im_shape[0]
  669. im_scale_x = resize_w / im_shape[1]
  670. im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
  671. sample['image'] = im
  672. sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
  673. if 'scale_factor' in sample:
  674. scale_factor = sample['scale_factor']
  675. sample['scale_factor'] = np.asarray(
  676. [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
  677. dtype=np.float32)
  678. else:
  679. sample['scale_factor'] = np.asarray(
  680. [im_scale_y, im_scale_x], dtype=np.float32)
  681. # apply bbox
  682. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  683. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'],
  684. [im_scale_x, im_scale_y],
  685. [resize_w, resize_h])
  686. # apply rbox
  687. if 'gt_rbox2poly' in sample:
  688. if np.array(sample['gt_rbox2poly']).shape[1] != 8:
  689. logger.warning(
  690. "gt_rbox2poly's length shoule be 8, but actually is {}".
  691. format(len(sample['gt_rbox2poly'])))
  692. sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
  693. [im_scale_x, im_scale_y],
  694. [resize_w, resize_h])
  695. # apply polygon
  696. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  697. sample['gt_poly'] = self.apply_segm(
  698. sample['gt_poly'], im_shape[:2], [im_scale_x, im_scale_y])
  699. # apply semantic
  700. if 'semantic' in sample and sample['semantic']:
  701. semantic = sample['semantic']
  702. semantic = cv2.resize(
  703. semantic.astype('float32'),
  704. None,
  705. None,
  706. fx=im_scale_x,
  707. fy=im_scale_y,
  708. interpolation=self.interp)
  709. semantic = np.asarray(semantic).astype('int32')
  710. semantic = np.expand_dims(semantic, 0)
  711. sample['semantic'] = semantic
  712. # apply gt_segm
  713. if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
  714. masks = [
  715. cv2.resize(
  716. gt_segm,
  717. None,
  718. None,
  719. fx=im_scale_x,
  720. fy=im_scale_y,
  721. interpolation=cv2.INTER_NEAREST)
  722. for gt_segm in sample['gt_segm']
  723. ]
  724. sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
  725. return sample
  726. @register_op
  727. class MultiscaleTestResize(BaseOperator):
  728. def __init__(self,
  729. origin_target_size=[800, 1333],
  730. target_size=[],
  731. interp=cv2.INTER_LINEAR,
  732. use_flip=True):
  733. """
  734. Rescale image to the each size in target size, and capped at max_size.
  735. Args:
  736. origin_target_size (list): origin target size of image
  737. target_size (list): A list of target sizes of image.
  738. interp (int): the interpolation method.
  739. use_flip (bool): whether use flip augmentation.
  740. """
  741. super(MultiscaleTestResize, self).__init__()
  742. self.interp = interp
  743. self.use_flip = use_flip
  744. if not isinstance(target_size, Sequence):
  745. raise TypeError(
  746. "Type of target_size is invalid. Must be List or Tuple, now is {}".
  747. format(type(target_size)))
  748. self.target_size = target_size
  749. if not isinstance(origin_target_size, Sequence):
  750. raise TypeError(
  751. "Type of origin_target_size is invalid. Must be List or Tuple, now is {}".
  752. format(type(origin_target_size)))
  753. self.origin_target_size = origin_target_size
  754. def apply(self, sample, context=None):
  755. """ Resize the image numpy for multi-scale test.
  756. """
  757. samples = []
  758. resizer = Resize(
  759. self.origin_target_size, keep_ratio=True, interp=self.interp)
  760. samples.append(resizer(sample.copy(), context))
  761. if self.use_flip:
  762. flipper = RandomFlip(1.1)
  763. samples.append(flipper(sample.copy(), context=context))
  764. for size in self.target_size:
  765. resizer = Resize(size, keep_ratio=True, interp=self.interp)
  766. samples.append(resizer(sample.copy(), context))
  767. return samples
  768. @register_op
  769. class RandomResize(BaseOperator):
  770. def __init__(self,
  771. target_size,
  772. keep_ratio=True,
  773. interp=cv2.INTER_LINEAR,
  774. random_size=True,
  775. random_interp=False):
  776. """
  777. Resize image to target size randomly. random target_size and interpolation method
  778. Args:
  779. target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
  780. keep_ratio (bool): whether keep_raio or not, default true
  781. interp (int): the interpolation method
  782. random_size (bool): whether random select target size of image
  783. random_interp (bool): whether random select interpolation method
  784. """
  785. super(RandomResize, self).__init__()
  786. self.keep_ratio = keep_ratio
  787. self.interp = interp
  788. self.interps = [
  789. cv2.INTER_NEAREST,
  790. cv2.INTER_LINEAR,
  791. cv2.INTER_AREA,
  792. cv2.INTER_CUBIC,
  793. cv2.INTER_LANCZOS4,
  794. ]
  795. assert isinstance(target_size, (
  796. Integral, Sequence)), "target_size must be Integer, List or Tuple"
  797. if random_size and not isinstance(target_size, Sequence):
  798. raise TypeError(
  799. "Type of target_size is invalid when random_size is True. Must be List or Tuple, now is {}".
  800. format(type(target_size)))
  801. self.target_size = target_size
  802. self.random_size = random_size
  803. self.random_interp = random_interp
  804. def apply(self, sample, context=None):
  805. """ Resize the image numpy.
  806. """
  807. if self.random_size:
  808. target_size = random.choice(self.target_size)
  809. else:
  810. target_size = self.target_size
  811. if self.random_interp:
  812. interp = random.choice(self.interps)
  813. else:
  814. interp = self.interp
  815. resizer = Resize(target_size, self.keep_ratio, interp)
  816. return resizer(sample, context=context)
  817. @register_op
  818. class RandomExpand(BaseOperator):
  819. """Random expand the canvas.
  820. Args:
  821. ratio (float): maximum expansion ratio.
  822. prob (float): probability to expand.
  823. fill_value (list): color value used to fill the canvas. in RGB order.
  824. """
  825. def __init__(self, ratio=4., prob=0.5, fill_value=(127.5, 127.5, 127.5)):
  826. super(RandomExpand, self).__init__()
  827. assert ratio > 1.01, "expand ratio must be larger than 1.01"
  828. self.ratio = ratio
  829. self.prob = prob
  830. assert isinstance(fill_value, (Number, Sequence)), \
  831. "fill value must be either float or sequence"
  832. if isinstance(fill_value, Number):
  833. fill_value = (fill_value, ) * 3
  834. if not isinstance(fill_value, tuple):
  835. fill_value = tuple(fill_value)
  836. self.fill_value = fill_value
  837. def apply(self, sample, context=None):
  838. if np.random.uniform(0., 1.) < self.prob:
  839. return sample
  840. im = sample['image']
  841. height, width = im.shape[:2]
  842. ratio = np.random.uniform(1., self.ratio)
  843. h = int(height * ratio)
  844. w = int(width * ratio)
  845. if not h > height or not w > width:
  846. return sample
  847. y = np.random.randint(0, h - height)
  848. x = np.random.randint(0, w - width)
  849. offsets, size = [x, y], [h, w]
  850. pad = Pad(size,
  851. pad_mode=-1,
  852. offsets=offsets,
  853. fill_value=self.fill_value)
  854. return pad(sample, context=context)
  855. @register_op
  856. class CropWithSampling(BaseOperator):
  857. def __init__(self, batch_sampler, satisfy_all=False, avoid_no_bbox=True):
  858. """
  859. Args:
  860. batch_sampler (list): Multiple sets of different
  861. parameters for cropping.
  862. satisfy_all (bool): whether all boxes must satisfy.
  863. e.g.[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
  864. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
  865. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
  866. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
  867. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
  868. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
  869. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
  870. [max sample, max trial, min scale, max scale,
  871. min aspect ratio, max aspect ratio,
  872. min overlap, max overlap]
  873. avoid_no_bbox (bool): whether to to avoid the
  874. situation where the box does not appear.
  875. """
  876. super(CropWithSampling, self).__init__()
  877. self.batch_sampler = batch_sampler
  878. self.satisfy_all = satisfy_all
  879. self.avoid_no_bbox = avoid_no_bbox
  880. def apply(self, sample, context):
  881. """
  882. Crop the image and modify bounding box.
  883. Operators:
  884. 1. Scale the image width and height.
  885. 2. Crop the image according to a radom sample.
  886. 3. Rescale the bounding box.
  887. 4. Determine if the new bbox is satisfied in the new image.
  888. Returns:
  889. sample: the image, bounding box are replaced.
  890. """
  891. assert 'image' in sample, "image data not found"
  892. im = sample['image']
  893. gt_bbox = sample['gt_bbox']
  894. gt_class = sample['gt_class']
  895. im_height, im_width = im.shape[:2]
  896. gt_score = None
  897. if 'gt_score' in sample:
  898. gt_score = sample['gt_score']
  899. sampled_bbox = []
  900. gt_bbox = gt_bbox.tolist()
  901. for sampler in self.batch_sampler:
  902. found = 0
  903. for i in range(sampler[1]):
  904. if found >= sampler[0]:
  905. break
  906. sample_bbox = generate_sample_bbox(sampler)
  907. if satisfy_sample_constraint(sampler, sample_bbox, gt_bbox,
  908. self.satisfy_all):
  909. sampled_bbox.append(sample_bbox)
  910. found = found + 1
  911. im = np.array(im)
  912. while sampled_bbox:
  913. idx = int(np.random.uniform(0, len(sampled_bbox)))
  914. sample_bbox = sampled_bbox.pop(idx)
  915. sample_bbox = clip_bbox(sample_bbox)
  916. crop_bbox, crop_class, crop_score = \
  917. filter_and_process(sample_bbox, gt_bbox, gt_class, scores=gt_score)
  918. if self.avoid_no_bbox:
  919. if len(crop_bbox) < 1:
  920. continue
  921. xmin = int(sample_bbox[0] * im_width)
  922. xmax = int(sample_bbox[2] * im_width)
  923. ymin = int(sample_bbox[1] * im_height)
  924. ymax = int(sample_bbox[3] * im_height)
  925. im = im[ymin:ymax, xmin:xmax]
  926. sample['image'] = im
  927. sample['gt_bbox'] = crop_bbox
  928. sample['gt_class'] = crop_class
  929. sample['gt_score'] = crop_score
  930. return sample
  931. return sample
  932. @register_op
  933. class CropWithDataAchorSampling(BaseOperator):
  934. def __init__(self,
  935. batch_sampler,
  936. anchor_sampler=None,
  937. target_size=None,
  938. das_anchor_scales=[16, 32, 64, 128],
  939. sampling_prob=0.5,
  940. min_size=8.,
  941. avoid_no_bbox=True):
  942. """
  943. Args:
  944. anchor_sampler (list): anchor_sampling sets of different
  945. parameters for cropping.
  946. batch_sampler (list): Multiple sets of different
  947. parameters for cropping.
  948. e.g.[[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]]
  949. [[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  950. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  951. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  952. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  953. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]]
  954. [max sample, max trial, min scale, max scale,
  955. min aspect ratio, max aspect ratio,
  956. min overlap, max overlap, min coverage, max coverage]
  957. target_size (int): target image size.
  958. das_anchor_scales (list[float]): a list of anchor scales in data
  959. anchor smapling.
  960. min_size (float): minimum size of sampled bbox.
  961. avoid_no_bbox (bool): whether to to avoid the
  962. situation where the box does not appear.
  963. """
  964. super(CropWithDataAchorSampling, self).__init__()
  965. self.anchor_sampler = anchor_sampler
  966. self.batch_sampler = batch_sampler
  967. self.target_size = target_size
  968. self.sampling_prob = sampling_prob
  969. self.min_size = min_size
  970. self.avoid_no_bbox = avoid_no_bbox
  971. self.das_anchor_scales = np.array(das_anchor_scales)
  972. def apply(self, sample, context):
  973. """
  974. Crop the image and modify bounding box.
  975. Operators:
  976. 1. Scale the image width and height.
  977. 2. Crop the image according to a radom sample.
  978. 3. Rescale the bounding box.
  979. 4. Determine if the new bbox is satisfied in the new image.
  980. Returns:
  981. sample: the image, bounding box are replaced.
  982. """
  983. assert 'image' in sample, "image data not found"
  984. im = sample['image']
  985. gt_bbox = sample['gt_bbox']
  986. gt_class = sample['gt_class']
  987. image_height, image_width = im.shape[:2]
  988. gt_bbox[:, 0] /= image_width
  989. gt_bbox[:, 1] /= image_height
  990. gt_bbox[:, 2] /= image_width
  991. gt_bbox[:, 3] /= image_height
  992. gt_score = None
  993. if 'gt_score' in sample:
  994. gt_score = sample['gt_score']
  995. sampled_bbox = []
  996. gt_bbox = gt_bbox.tolist()
  997. prob = np.random.uniform(0., 1.)
  998. if prob > self.sampling_prob: # anchor sampling
  999. assert self.anchor_sampler
  1000. for sampler in self.anchor_sampler:
  1001. found = 0
  1002. for i in range(sampler[1]):
  1003. if found >= sampler[0]:
  1004. break
  1005. sample_bbox = data_anchor_sampling(
  1006. gt_bbox, image_width, image_height,
  1007. self.das_anchor_scales, self.target_size)
  1008. if sample_bbox == 0:
  1009. break
  1010. if satisfy_sample_constraint_coverage(sampler, sample_bbox,
  1011. gt_bbox):
  1012. sampled_bbox.append(sample_bbox)
  1013. found = found + 1
  1014. im = np.array(im)
  1015. while sampled_bbox:
  1016. idx = int(np.random.uniform(0, len(sampled_bbox)))
  1017. sample_bbox = sampled_bbox.pop(idx)
  1018. if 'gt_keypoint' in sample.keys():
  1019. keypoints = (sample['gt_keypoint'],
  1020. sample['keypoint_ignore'])
  1021. crop_bbox, crop_class, crop_score, gt_keypoints = \
  1022. filter_and_process(sample_bbox, gt_bbox, gt_class,
  1023. scores=gt_score,
  1024. keypoints=keypoints)
  1025. else:
  1026. crop_bbox, crop_class, crop_score = filter_and_process(
  1027. sample_bbox, gt_bbox, gt_class, scores=gt_score)
  1028. crop_bbox, crop_class, crop_score = bbox_area_sampling(
  1029. crop_bbox, crop_class, crop_score, self.target_size,
  1030. self.min_size)
  1031. if self.avoid_no_bbox:
  1032. if len(crop_bbox) < 1:
  1033. continue
  1034. im = crop_image_sampling(im, sample_bbox, image_width,
  1035. image_height, self.target_size)
  1036. height, width = im.shape[:2]
  1037. crop_bbox[:, 0] *= width
  1038. crop_bbox[:, 1] *= height
  1039. crop_bbox[:, 2] *= width
  1040. crop_bbox[:, 3] *= height
  1041. sample['image'] = im
  1042. sample['gt_bbox'] = crop_bbox
  1043. sample['gt_class'] = crop_class
  1044. if 'gt_score' in sample:
  1045. sample['gt_score'] = crop_score
  1046. if 'gt_keypoint' in sample.keys():
  1047. sample['gt_keypoint'] = gt_keypoints[0]
  1048. sample['keypoint_ignore'] = gt_keypoints[1]
  1049. return sample
  1050. return sample
  1051. else:
  1052. for sampler in self.batch_sampler:
  1053. found = 0
  1054. for i in range(sampler[1]):
  1055. if found >= sampler[0]:
  1056. break
  1057. sample_bbox = generate_sample_bbox_square(
  1058. sampler, image_width, image_height)
  1059. if satisfy_sample_constraint_coverage(sampler, sample_bbox,
  1060. gt_bbox):
  1061. sampled_bbox.append(sample_bbox)
  1062. found = found + 1
  1063. im = np.array(im)
  1064. while sampled_bbox:
  1065. idx = int(np.random.uniform(0, len(sampled_bbox)))
  1066. sample_bbox = sampled_bbox.pop(idx)
  1067. sample_bbox = clip_bbox(sample_bbox)
  1068. if 'gt_keypoint' in sample.keys():
  1069. keypoints = (sample['gt_keypoint'],
  1070. sample['keypoint_ignore'])
  1071. crop_bbox, crop_class, crop_score, gt_keypoints = \
  1072. filter_and_process(sample_bbox, gt_bbox, gt_class,
  1073. scores=gt_score,
  1074. keypoints=keypoints)
  1075. else:
  1076. crop_bbox, crop_class, crop_score = filter_and_process(
  1077. sample_bbox, gt_bbox, gt_class, scores=gt_score)
  1078. # sampling bbox according the bbox area
  1079. crop_bbox, crop_class, crop_score = bbox_area_sampling(
  1080. crop_bbox, crop_class, crop_score, self.target_size,
  1081. self.min_size)
  1082. if self.avoid_no_bbox:
  1083. if len(crop_bbox) < 1:
  1084. continue
  1085. xmin = int(sample_bbox[0] * image_width)
  1086. xmax = int(sample_bbox[2] * image_width)
  1087. ymin = int(sample_bbox[1] * image_height)
  1088. ymax = int(sample_bbox[3] * image_height)
  1089. im = im[ymin:ymax, xmin:xmax]
  1090. height, width = im.shape[:2]
  1091. crop_bbox[:, 0] *= width
  1092. crop_bbox[:, 1] *= height
  1093. crop_bbox[:, 2] *= width
  1094. crop_bbox[:, 3] *= height
  1095. sample['image'] = im
  1096. sample['gt_bbox'] = crop_bbox
  1097. sample['gt_class'] = crop_class
  1098. if 'gt_score' in sample:
  1099. sample['gt_score'] = crop_score
  1100. if 'gt_keypoint' in sample.keys():
  1101. sample['gt_keypoint'] = gt_keypoints[0]
  1102. sample['keypoint_ignore'] = gt_keypoints[1]
  1103. return sample
  1104. return sample
  1105. @register_op
  1106. class RandomCrop(BaseOperator):
  1107. """Random crop image and bboxes.
  1108. Args:
  1109. aspect_ratio (list): aspect ratio of cropped region.
  1110. in [min, max] format.
  1111. thresholds (list): iou thresholds for decide a valid bbox crop.
  1112. scaling (list): ratio between a cropped region and the original image.
  1113. in [min, max] format.
  1114. num_attempts (int): number of tries before giving up.
  1115. allow_no_crop (bool): allow return without actually cropping them.
  1116. cover_all_box (bool): ensure all bboxes are covered in the final crop.
  1117. is_mask_crop(bool): whether crop the segmentation.
  1118. """
  1119. def __init__(self,
  1120. aspect_ratio=[.5, 2.],
  1121. thresholds=[.0, .1, .3, .5, .7, .9],
  1122. scaling=[.3, 1.],
  1123. num_attempts=50,
  1124. allow_no_crop=True,
  1125. cover_all_box=False,
  1126. is_mask_crop=False):
  1127. super(RandomCrop, self).__init__()
  1128. self.aspect_ratio = aspect_ratio
  1129. self.thresholds = thresholds
  1130. self.scaling = scaling
  1131. self.num_attempts = num_attempts
  1132. self.allow_no_crop = allow_no_crop
  1133. self.cover_all_box = cover_all_box
  1134. self.is_mask_crop = is_mask_crop
  1135. def crop_segms(self, segms, valid_ids, crop, height, width):
  1136. def _crop_poly(segm, crop):
  1137. xmin, ymin, xmax, ymax = crop
  1138. crop_coord = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]
  1139. crop_p = np.array(crop_coord).reshape(4, 2)
  1140. crop_p = Polygon(crop_p)
  1141. crop_segm = list()
  1142. for poly in segm:
  1143. poly = np.array(poly).reshape(len(poly) // 2, 2)
  1144. polygon = Polygon(poly)
  1145. if not polygon.is_valid:
  1146. exterior = polygon.exterior
  1147. multi_lines = exterior.intersection(exterior)
  1148. polygons = shapely.ops.polygonize(multi_lines)
  1149. polygon = MultiPolygon(polygons)
  1150. multi_polygon = list()
  1151. if isinstance(polygon, MultiPolygon):
  1152. multi_polygon = copy.deepcopy(polygon)
  1153. else:
  1154. multi_polygon.append(copy.deepcopy(polygon))
  1155. for per_polygon in multi_polygon:
  1156. inter = per_polygon.intersection(crop_p)
  1157. if not inter:
  1158. continue
  1159. if isinstance(inter, (MultiPolygon, GeometryCollection)):
  1160. for part in inter:
  1161. if not isinstance(part, Polygon):
  1162. continue
  1163. part = np.squeeze(
  1164. np.array(part.exterior.coords[:-1]).reshape(
  1165. 1, -1))
  1166. part[0::2] -= xmin
  1167. part[1::2] -= ymin
  1168. crop_segm.append(part.tolist())
  1169. elif isinstance(inter, Polygon):
  1170. crop_poly = np.squeeze(
  1171. np.array(inter.exterior.coords[:-1]).reshape(1,
  1172. -1))
  1173. crop_poly[0::2] -= xmin
  1174. crop_poly[1::2] -= ymin
  1175. crop_segm.append(crop_poly.tolist())
  1176. else:
  1177. continue
  1178. return crop_segm
  1179. def _crop_rle(rle, crop, height, width):
  1180. if 'counts' in rle and type(rle['counts']) == list:
  1181. rle = mask_util.frPyObjects(rle, height, width)
  1182. mask = mask_util.decode(rle)
  1183. mask = mask[crop[1]:crop[3], crop[0]:crop[2]]
  1184. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  1185. return rle
  1186. crop_segms = []
  1187. for id in valid_ids:
  1188. segm = segms[id]
  1189. if is_poly(segm):
  1190. import copy
  1191. import shapely.ops
  1192. from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
  1193. logging.getLogger("shapely").setLevel(logging.WARNING)
  1194. # Polygon format
  1195. crop_segms.append(_crop_poly(segm, crop))
  1196. else:
  1197. # RLE format
  1198. import pycocotools.mask as mask_util
  1199. crop_segms.append(_crop_rle(segm, crop, height, width))
  1200. return crop_segms
  1201. def apply(self, sample, context=None):
  1202. if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
  1203. return sample
  1204. h, w = sample['image'].shape[:2]
  1205. gt_bbox = sample['gt_bbox']
  1206. # NOTE Original method attempts to generate one candidate for each
  1207. # threshold then randomly sample one from the resulting list.
  1208. # Here a short circuit approach is taken, i.e., randomly choose a
  1209. # threshold and attempt to find a valid crop, and simply return the
  1210. # first one found.
  1211. # The probability is not exactly the same, kinda resembling the
  1212. # "Monty Hall" problem. Actually carrying out the attempts will affect
  1213. # observability (just like opening doors in the "Monty Hall" game).
  1214. thresholds = list(self.thresholds)
  1215. if self.allow_no_crop:
  1216. thresholds.append('no_crop')
  1217. np.random.shuffle(thresholds)
  1218. for thresh in thresholds:
  1219. if thresh == 'no_crop':
  1220. return sample
  1221. found = False
  1222. for i in range(self.num_attempts):
  1223. scale = np.random.uniform(*self.scaling)
  1224. if self.aspect_ratio is not None:
  1225. min_ar, max_ar = self.aspect_ratio
  1226. aspect_ratio = np.random.uniform(
  1227. max(min_ar, scale**2), min(max_ar, scale**-2))
  1228. h_scale = scale / np.sqrt(aspect_ratio)
  1229. w_scale = scale * np.sqrt(aspect_ratio)
  1230. else:
  1231. h_scale = np.random.uniform(*self.scaling)
  1232. w_scale = np.random.uniform(*self.scaling)
  1233. crop_h = h * h_scale
  1234. crop_w = w * w_scale
  1235. if self.aspect_ratio is None:
  1236. if crop_h / crop_w < 0.5 or crop_h / crop_w > 2.0:
  1237. continue
  1238. crop_h = int(crop_h)
  1239. crop_w = int(crop_w)
  1240. crop_y = np.random.randint(0, h - crop_h)
  1241. crop_x = np.random.randint(0, w - crop_w)
  1242. crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
  1243. iou = self._iou_matrix(
  1244. gt_bbox, np.array(
  1245. [crop_box], dtype=np.float32))
  1246. if iou.max() < thresh:
  1247. continue
  1248. if self.cover_all_box and iou.min() < thresh:
  1249. continue
  1250. cropped_box, valid_ids = self._crop_box_with_center_constraint(
  1251. gt_bbox, np.array(
  1252. crop_box, dtype=np.float32))
  1253. if valid_ids.size > 0:
  1254. found = True
  1255. break
  1256. if found:
  1257. if self.is_mask_crop and 'gt_poly' in sample and len(sample[
  1258. 'gt_poly']) > 0:
  1259. crop_polys = self.crop_segms(
  1260. sample['gt_poly'],
  1261. valid_ids,
  1262. np.array(
  1263. crop_box, dtype=np.int64),
  1264. h,
  1265. w)
  1266. if [] in crop_polys:
  1267. delete_id = list()
  1268. valid_polys = list()
  1269. for id, crop_poly in enumerate(crop_polys):
  1270. if crop_poly == []:
  1271. delete_id.append(id)
  1272. else:
  1273. valid_polys.append(crop_poly)
  1274. valid_ids = np.delete(valid_ids, delete_id)
  1275. if len(valid_polys) == 0:
  1276. return sample
  1277. sample['gt_poly'] = valid_polys
  1278. else:
  1279. sample['gt_poly'] = crop_polys
  1280. if 'gt_segm' in sample:
  1281. sample['gt_segm'] = self._crop_segm(sample['gt_segm'],
  1282. crop_box)
  1283. sample['gt_segm'] = np.take(
  1284. sample['gt_segm'], valid_ids, axis=0)
  1285. sample['image'] = self._crop_image(sample['image'], crop_box)
  1286. sample['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
  1287. sample['gt_class'] = np.take(
  1288. sample['gt_class'], valid_ids, axis=0)
  1289. if 'gt_score' in sample:
  1290. sample['gt_score'] = np.take(
  1291. sample['gt_score'], valid_ids, axis=0)
  1292. if 'is_crowd' in sample:
  1293. sample['is_crowd'] = np.take(
  1294. sample['is_crowd'], valid_ids, axis=0)
  1295. return sample
  1296. return sample
  1297. def _iou_matrix(self, a, b):
  1298. tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
  1299. br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
  1300. area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2)
  1301. area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
  1302. area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
  1303. area_o = (area_a[:, np.newaxis] + area_b - area_i)
  1304. return area_i / (area_o + 1e-10)
  1305. def _crop_box_with_center_constraint(self, box, crop):
  1306. cropped_box = box.copy()
  1307. cropped_box[:, :2] = np.maximum(box[:, :2], crop[:2])
  1308. cropped_box[:, 2:] = np.minimum(box[:, 2:], crop[2:])
  1309. cropped_box[:, :2] -= crop[:2]
  1310. cropped_box[:, 2:] -= crop[:2]
  1311. centers = (box[:, :2] + box[:, 2:]) / 2
  1312. valid = np.logical_and(crop[:2] <= centers,
  1313. centers < crop[2:]).all(axis=1)
  1314. valid = np.logical_and(
  1315. valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
  1316. return cropped_box, np.where(valid)[0]
  1317. def _crop_image(self, img, crop):
  1318. x1, y1, x2, y2 = crop
  1319. return img[y1:y2, x1:x2, :]
  1320. def _crop_segm(self, segm, crop):
  1321. x1, y1, x2, y2 = crop
  1322. return segm[:, y1:y2, x1:x2]
  1323. @register_op
  1324. class RandomScaledCrop(BaseOperator):
  1325. """Resize image and bbox based on long side (with optional random scaling),
  1326. then crop or pad image to target size.
  1327. Args:
  1328. target_dim (int): target size.
  1329. scale_range (list): random scale range.
  1330. interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
  1331. """
  1332. def __init__(self,
  1333. target_dim=512,
  1334. scale_range=[.1, 2.],
  1335. interp=cv2.INTER_LINEAR):
  1336. super(RandomScaledCrop, self).__init__()
  1337. self.target_dim = target_dim
  1338. self.scale_range = scale_range
  1339. self.interp = interp
  1340. def apply(self, sample, context=None):
  1341. img = sample['image']
  1342. h, w = img.shape[:2]
  1343. random_scale = np.random.uniform(*self.scale_range)
  1344. dim = self.target_dim
  1345. random_dim = int(dim * random_scale)
  1346. dim_max = max(h, w)
  1347. scale = random_dim / dim_max
  1348. resize_w = w * scale
  1349. resize_h = h * scale
  1350. offset_x = int(max(0, np.random.uniform(0., resize_w - dim)))
  1351. offset_y = int(max(0, np.random.uniform(0., resize_h - dim)))
  1352. img = cv2.resize(img, (resize_w, resize_h), interpolation=self.interp)
  1353. img = np.array(img)
  1354. canvas = np.zeros((dim, dim, 3), dtype=img.dtype)
  1355. canvas[:min(dim, resize_h), :min(dim, resize_w), :] = img[
  1356. offset_y:offset_y + dim, offset_x:offset_x + dim, :]
  1357. sample['image'] = canvas
  1358. sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
  1359. scale_factor = sample['sacle_factor']
  1360. sample['scale_factor'] = np.asarray(
  1361. [scale_factor[0] * scale, scale_factor[1] * scale],
  1362. dtype=np.float32)
  1363. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  1364. scale_array = np.array([scale, scale] * 2, dtype=np.float32)
  1365. shift_array = np.array([offset_x, offset_y] * 2, dtype=np.float32)
  1366. boxes = sample['gt_bbox'] * scale_array - shift_array
  1367. boxes = np.clip(boxes, 0, dim - 1)
  1368. # filter boxes with no area
  1369. area = np.prod(boxes[..., 2:] - boxes[..., :2], axis=1)
  1370. valid = (area > 1.).nonzero()[0]
  1371. sample['gt_bbox'] = boxes[valid]
  1372. sample['gt_class'] = sample['gt_class'][valid]
  1373. return sample
  1374. @register_op
  1375. class Cutmix(BaseOperator):
  1376. def __init__(self, alpha=1.5, beta=1.5):
  1377. """
  1378. CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
  1379. Cutmix image and gt_bbbox/gt_score
  1380. Args:
  1381. alpha (float): alpha parameter of beta distribute
  1382. beta (float): beta parameter of beta distribute
  1383. """
  1384. super(Cutmix, self).__init__()
  1385. self.alpha = alpha
  1386. self.beta = beta
  1387. if self.alpha <= 0.0:
  1388. raise ValueError("alpha shold be positive in {}".format(self))
  1389. if self.beta <= 0.0:
  1390. raise ValueError("beta shold be positive in {}".format(self))
  1391. def apply_image(self, img1, img2, factor):
  1392. """ _rand_bbox """
  1393. h = max(img1.shape[0], img2.shape[0])
  1394. w = max(img1.shape[1], img2.shape[1])
  1395. cut_rat = np.sqrt(1. - factor)
  1396. cut_w = np.int32(w * cut_rat)
  1397. cut_h = np.int32(h * cut_rat)
  1398. # uniform
  1399. cx = np.random.randint(w)
  1400. cy = np.random.randint(h)
  1401. bbx1 = np.clip(cx - cut_w // 2, 0, w - 1)
  1402. bby1 = np.clip(cy - cut_h // 2, 0, h - 1)
  1403. bbx2 = np.clip(cx + cut_w // 2, 0, w - 1)
  1404. bby2 = np.clip(cy + cut_h // 2, 0, h - 1)
  1405. img_1_pad = np.zeros((h, w, img1.shape[2]), 'float32')
  1406. img_1_pad[:img1.shape[0], :img1.shape[1], :] = \
  1407. img1.astype('float32')
  1408. img_2_pad = np.zeros((h, w, img2.shape[2]), 'float32')
  1409. img_2_pad[:img2.shape[0], :img2.shape[1], :] = \
  1410. img2.astype('float32')
  1411. img_1_pad[bby1:bby2, bbx1:bbx2, :] = img_2_pad[bby1:bby2, bbx1:bbx2, :]
  1412. return img_1_pad
  1413. def __call__(self, sample, context=None):
  1414. if not isinstance(sample, Sequence):
  1415. return sample
  1416. assert len(sample) == 2, 'cutmix need two samples'
  1417. factor = np.random.beta(self.alpha, self.beta)
  1418. factor = max(0.0, min(1.0, factor))
  1419. if factor >= 1.0:
  1420. return sample[0]
  1421. if factor <= 0.0:
  1422. return sample[1]
  1423. img1 = sample[0]['image']
  1424. img2 = sample[1]['image']
  1425. img = self.apply_image(img1, img2, factor)
  1426. gt_bbox1 = sample[0]['gt_bbox']
  1427. gt_bbox2 = sample[1]['gt_bbox']
  1428. gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
  1429. gt_class1 = sample[0]['gt_class']
  1430. gt_class2 = sample[1]['gt_class']
  1431. gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
  1432. gt_score1 = np.ones_like(sample[0]['gt_class'])
  1433. gt_score2 = np.ones_like(sample[1]['gt_class'])
  1434. gt_score = np.concatenate(
  1435. (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
  1436. result = copy.deepcopy(sample[0])
  1437. result['image'] = img
  1438. result['gt_bbox'] = gt_bbox
  1439. result['gt_score'] = gt_score
  1440. result['gt_class'] = gt_class
  1441. if 'is_crowd' in sample[0]:
  1442. is_crowd1 = sample[0]['is_crowd']
  1443. is_crowd2 = sample[1]['is_crowd']
  1444. is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
  1445. result['is_crowd'] = is_crowd
  1446. if 'difficult' in sample[0]:
  1447. is_difficult1 = sample[0]['difficult']
  1448. is_difficult2 = sample[1]['difficult']
  1449. is_difficult = np.concatenate(
  1450. (is_difficult1, is_difficult2), axis=0)
  1451. result['difficult'] = is_difficult
  1452. return result
  1453. @register_op
  1454. class Mixup(BaseOperator):
  1455. def __init__(self, alpha=1.5, beta=1.5):
  1456. """ Mixup image and gt_bbbox/gt_score
  1457. Args:
  1458. alpha (float): alpha parameter of beta distribute
  1459. beta (float): beta parameter of beta distribute
  1460. """
  1461. super(Mixup, self).__init__()
  1462. self.alpha = alpha
  1463. self.beta = beta
  1464. if self.alpha <= 0.0:
  1465. raise ValueError("alpha shold be positive in {}".format(self))
  1466. if self.beta <= 0.0:
  1467. raise ValueError("beta shold be positive in {}".format(self))
  1468. def apply_image(self, img1, img2, factor):
  1469. h = max(img1.shape[0], img2.shape[0])
  1470. w = max(img1.shape[1], img2.shape[1])
  1471. img = np.zeros((h, w, img1.shape[2]), 'float32')
  1472. img[:img1.shape[0], :img1.shape[1], :] = \
  1473. img1.astype('float32') * factor
  1474. img[:img2.shape[0], :img2.shape[1], :] += \
  1475. img2.astype('float32') * (1.0 - factor)
  1476. return img.astype('uint8')
  1477. def __call__(self, sample, context=None):
  1478. if not isinstance(sample, Sequence):
  1479. return sample
  1480. assert len(sample) == 2, 'mixup need two samples'
  1481. factor = np.random.beta(self.alpha, self.beta)
  1482. factor = max(0.0, min(1.0, factor))
  1483. if factor >= 1.0:
  1484. return sample[0]
  1485. if factor <= 0.0:
  1486. return sample[1]
  1487. im = self.apply_image(sample[0]['image'], sample[1]['image'], factor)
  1488. result = copy.deepcopy(sample[0])
  1489. result['image'] = im
  1490. # apply bbox and score
  1491. if 'gt_bbox' in sample[0]:
  1492. gt_bbox1 = sample[0]['gt_bbox']
  1493. gt_bbox2 = sample[1]['gt_bbox']
  1494. gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
  1495. result['gt_bbox'] = gt_bbox
  1496. if 'gt_class' in sample[0]:
  1497. gt_class1 = sample[0]['gt_class']
  1498. gt_class2 = sample[1]['gt_class']
  1499. gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
  1500. result['gt_class'] = gt_class
  1501. gt_score1 = np.ones_like(sample[0]['gt_class'])
  1502. gt_score2 = np.ones_like(sample[1]['gt_class'])
  1503. gt_score = np.concatenate(
  1504. (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
  1505. result['gt_score'] = gt_score
  1506. if 'is_crowd' in sample[0]:
  1507. is_crowd1 = sample[0]['is_crowd']
  1508. is_crowd2 = sample[1]['is_crowd']
  1509. is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
  1510. result['is_crowd'] = is_crowd
  1511. if 'difficult' in sample[0]:
  1512. is_difficult1 = sample[0]['difficult']
  1513. is_difficult2 = sample[1]['difficult']
  1514. is_difficult = np.concatenate(
  1515. (is_difficult1, is_difficult2), axis=0)
  1516. result['difficult'] = is_difficult
  1517. if 'gt_ide' in sample[0]:
  1518. gt_ide1 = sample[0]['gt_ide']
  1519. gt_ide2 = sample[1]['gt_ide']
  1520. gt_ide = np.concatenate((gt_ide1, gt_ide2), axis=0)
  1521. result['gt_ide'] = gt_ide
  1522. return result
  1523. @register_op
  1524. class NormalizeBox(BaseOperator):
  1525. """Transform the bounding box's coornidates to [0,1]."""
  1526. def __init__(self):
  1527. super(NormalizeBox, self).__init__()
  1528. def apply(self, sample, context):
  1529. im = sample['image']
  1530. gt_bbox = sample['gt_bbox']
  1531. height, width, _ = im.shape
  1532. for i in range(gt_bbox.shape[0]):
  1533. gt_bbox[i][0] = gt_bbox[i][0] / width
  1534. gt_bbox[i][1] = gt_bbox[i][1] / height
  1535. gt_bbox[i][2] = gt_bbox[i][2] / width
  1536. gt_bbox[i][3] = gt_bbox[i][3] / height
  1537. sample['gt_bbox'] = gt_bbox
  1538. if 'gt_keypoint' in sample.keys():
  1539. gt_keypoint = sample['gt_keypoint']
  1540. for i in range(gt_keypoint.shape[1]):
  1541. if i % 2:
  1542. gt_keypoint[:, i] = gt_keypoint[:, i] / height
  1543. else:
  1544. gt_keypoint[:, i] = gt_keypoint[:, i] / width
  1545. sample['gt_keypoint'] = gt_keypoint
  1546. return sample
  1547. @register_op
  1548. class BboxXYXY2XYWH(BaseOperator):
  1549. """
  1550. Convert bbox XYXY format to XYWH format.
  1551. """
  1552. def __init__(self):
  1553. super(BboxXYXY2XYWH, self).__init__()
  1554. def apply(self, sample, context=None):
  1555. assert 'gt_bbox' in sample
  1556. bbox = sample['gt_bbox']
  1557. bbox[:, 2:4] = bbox[:, 2:4] - bbox[:, :2]
  1558. bbox[:, :2] = bbox[:, :2] + bbox[:, 2:4] / 2.
  1559. sample['gt_bbox'] = bbox
  1560. return sample
  1561. @register_op
  1562. class PadBox(BaseOperator):
  1563. def __init__(self, num_max_boxes=50):
  1564. """
  1565. Pad zeros to bboxes if number of bboxes is less than num_max_boxes.
  1566. Args:
  1567. num_max_boxes (int): the max number of bboxes
  1568. """
  1569. self.num_max_boxes = num_max_boxes
  1570. super(PadBox, self).__init__()
  1571. def apply(self, sample, context=None):
  1572. assert 'gt_bbox' in sample
  1573. bbox = sample['gt_bbox']
  1574. gt_num = min(self.num_max_boxes, len(bbox))
  1575. num_max = self.num_max_boxes
  1576. # fields = context['fields'] if context else []
  1577. pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
  1578. if gt_num > 0:
  1579. pad_bbox[:gt_num, :] = bbox[:gt_num, :]
  1580. sample['gt_bbox'] = pad_bbox
  1581. if 'gt_class' in sample:
  1582. pad_class = np.zeros((num_max, ), dtype=np.int32)
  1583. if gt_num > 0:
  1584. pad_class[:gt_num] = sample['gt_class'][:gt_num, 0]
  1585. sample['gt_class'] = pad_class
  1586. if 'gt_score' in sample:
  1587. pad_score = np.zeros((num_max, ), dtype=np.float32)
  1588. if gt_num > 0:
  1589. pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
  1590. sample['gt_score'] = pad_score
  1591. # in training, for example in op ExpandImage,
  1592. # the bbox and gt_class is expandded, but the difficult is not,
  1593. # so, judging by it's length
  1594. if 'difficult' in sample:
  1595. pad_diff = np.zeros((num_max, ), dtype=np.int32)
  1596. if gt_num > 0:
  1597. pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
  1598. sample['difficult'] = pad_diff
  1599. if 'is_crowd' in sample:
  1600. pad_crowd = np.zeros((num_max, ), dtype=np.int32)
  1601. if gt_num > 0:
  1602. pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
  1603. sample['is_crowd'] = pad_crowd
  1604. if 'gt_ide' in sample:
  1605. pad_ide = np.zeros((num_max, ), dtype=np.int32)
  1606. if gt_num > 0:
  1607. pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
  1608. sample['gt_ide'] = pad_ide
  1609. return sample
  1610. @register_op
  1611. class DebugVisibleImage(BaseOperator):
  1612. """
  1613. In debug mode, visualize images according to `gt_box`.
  1614. (Currently only supported when not cropping and flipping image.)
  1615. """
  1616. def __init__(self, output_dir='output/debug', is_normalized=False):
  1617. super(DebugVisibleImage, self).__init__()
  1618. self.is_normalized = is_normalized
  1619. self.output_dir = output_dir
  1620. if not os.path.isdir(output_dir):
  1621. os.makedirs(output_dir)
  1622. if not isinstance(self.is_normalized, bool):
  1623. raise TypeError("{}: input type is invalid.".format(self))
  1624. def apply(self, sample, context=None):
  1625. image = Image.fromarray(sample['image'].astype(np.uint8))
  1626. out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
  1627. width = sample['w']
  1628. height = sample['h']
  1629. gt_bbox = sample['gt_bbox']
  1630. gt_class = sample['gt_class']
  1631. draw = ImageDraw.Draw(image)
  1632. for i in range(gt_bbox.shape[0]):
  1633. if self.is_normalized:
  1634. gt_bbox[i][0] = gt_bbox[i][0] * width
  1635. gt_bbox[i][1] = gt_bbox[i][1] * height
  1636. gt_bbox[i][2] = gt_bbox[i][2] * width
  1637. gt_bbox[i][3] = gt_bbox[i][3] * height
  1638. xmin, ymin, xmax, ymax = gt_bbox[i]
  1639. draw.line(
  1640. [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
  1641. (xmin, ymin)],
  1642. width=2,
  1643. fill='green')
  1644. # draw label
  1645. text = str(gt_class[i][0])
  1646. tw, th = draw.textsize(text)
  1647. draw.rectangle(
  1648. [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
  1649. draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
  1650. if 'gt_keypoint' in sample.keys():
  1651. gt_keypoint = sample['gt_keypoint']
  1652. if self.is_normalized:
  1653. for i in range(gt_keypoint.shape[1]):
  1654. if i % 2:
  1655. gt_keypoint[:, i] = gt_keypoint[:, i] * height
  1656. else:
  1657. gt_keypoint[:, i] = gt_keypoint[:, i] * width
  1658. for i in range(gt_keypoint.shape[0]):
  1659. keypoint = gt_keypoint[i]
  1660. for j in range(int(keypoint.shape[0] / 2)):
  1661. x1 = round(keypoint[2 * j]).astype(np.int32)
  1662. y1 = round(keypoint[2 * j + 1]).astype(np.int32)
  1663. draw.ellipse(
  1664. (x1, y1, x1 + 5, y1 + 5),
  1665. fill='green',
  1666. outline='green')
  1667. save_path = os.path.join(self.output_dir, out_file_name)
  1668. image.save(save_path, quality=95)
  1669. return sample
  1670. @register_op
  1671. class Pad(BaseOperator):
  1672. def __init__(self,
  1673. size=None,
  1674. size_divisor=32,
  1675. pad_mode=0,
  1676. offsets=None,
  1677. fill_value=(127.5, 127.5, 127.5)):
  1678. """
  1679. Pad image to a specified size or multiple of size_divisor.
  1680. Args:
  1681. size (int, Sequence): image target size, if None, pad to multiple of size_divisor, default None
  1682. size_divisor (int): size divisor, default 32
  1683. pad_mode (int): pad mode, currently only supports four modes [-1, 0, 1, 2]. if -1, use specified offsets
  1684. if 0, only pad to right and bottom. if 1, pad according to center. if 2, only pad left and top
  1685. offsets (list): [offset_x, offset_y], specify offset while padding, only supported pad_mode=-1
  1686. fill_value (bool): rgb value of pad area, default (127.5, 127.5, 127.5)
  1687. """
  1688. super(Pad, self).__init__()
  1689. if not isinstance(size, (int, Sequence)):
  1690. raise TypeError(
  1691. "Type of target_size is invalid when random_size is True. \
  1692. Must be List, now is {}".format(type(size)))
  1693. if isinstance(size, int):
  1694. size = [size, size]
  1695. assert pad_mode in [
  1696. -1, 0, 1, 2
  1697. ], 'currently only supports four modes [-1, 0, 1, 2]'
  1698. if pad_mode == -1:
  1699. assert offsets, 'if pad_mode is -1, offsets should not be None'
  1700. self.size = size
  1701. self.size_divisor = size_divisor
  1702. self.pad_mode = pad_mode
  1703. self.fill_value = fill_value
  1704. self.offsets = offsets
  1705. def apply_segm(self, segms, offsets, im_size, size):
  1706. def _expand_poly(poly, x, y):
  1707. expanded_poly = np.array(poly)
  1708. expanded_poly[0::2] += x
  1709. expanded_poly[1::2] += y
  1710. return expanded_poly.tolist()
  1711. def _expand_rle(rle, x, y, height, width, h, w):
  1712. if 'counts' in rle and type(rle['counts']) == list:
  1713. rle = mask_util.frPyObjects(rle, height, width)
  1714. mask = mask_util.decode(rle)
  1715. expanded_mask = np.full((h, w), 0).astype(mask.dtype)
  1716. expanded_mask[y:y + height, x:x + width] = mask
  1717. rle = mask_util.encode(
  1718. np.array(
  1719. expanded_mask, order='F', dtype=np.uint8))
  1720. return rle
  1721. x, y = offsets
  1722. height, width = im_size
  1723. h, w = size
  1724. expanded_segms = []
  1725. for segm in segms:
  1726. if is_poly(segm):
  1727. # Polygon format
  1728. expanded_segms.append(
  1729. [_expand_poly(poly, x, y) for poly in segm])
  1730. else:
  1731. # RLE format
  1732. import pycocotools.mask as mask_util
  1733. expanded_segms.append(
  1734. _expand_rle(segm, x, y, height, width, h, w))
  1735. return expanded_segms
  1736. def apply_bbox(self, bbox, offsets):
  1737. return bbox + np.array(offsets * 2, dtype=np.float32)
  1738. def apply_keypoint(self, keypoints, offsets):
  1739. n = len(keypoints[0]) // 2
  1740. return keypoints + np.array(offsets * n, dtype=np.float32)
  1741. def apply_image(self, image, offsets, im_size, size):
  1742. x, y = offsets
  1743. im_h, im_w = im_size
  1744. h, w = size
  1745. canvas = np.ones((h, w, 3), dtype=np.float32)
  1746. canvas *= np.array(self.fill_value, dtype=np.float32)
  1747. canvas[y:y + im_h, x:x + im_w, :] = image.astype(np.float32)
  1748. return canvas
  1749. def apply(self, sample, context=None):
  1750. im = sample['image']
  1751. im_h, im_w = im.shape[:2]
  1752. if self.size:
  1753. h, w = self.size
  1754. assert (
  1755. im_h < h and im_w < w
  1756. ), '(h, w) of target size should be greater than (im_h, im_w)'
  1757. else:
  1758. h = np.ceil(im_h / self.size_divisor) * self.size_divisor
  1759. w = np.ceil(im_w / self.size_divisor) * self.size_divisor
  1760. if h == im_h and w == im_w:
  1761. return sample
  1762. if self.pad_mode == -1:
  1763. offset_x, offset_y = self.offsets
  1764. elif self.pad_mode == 0:
  1765. offset_y, offset_x = 0, 0
  1766. elif self.pad_mode == 1:
  1767. offset_y, offset_x = (h - im_h) // 2, (w - im_w) // 2
  1768. else:
  1769. offset_y, offset_x = h - im_h, w - im_w
  1770. offsets, im_size, size = [offset_x, offset_y], [im_h, im_w], [h, w]
  1771. sample['image'] = self.apply_image(im, offsets, im_size, size)
  1772. if self.pad_mode == 0:
  1773. return sample
  1774. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  1775. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], offsets)
  1776. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  1777. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], offsets,
  1778. im_size, size)
  1779. if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
  1780. sample['gt_keypoint'] = self.apply_keypoint(sample['gt_keypoint'],
  1781. offsets)
  1782. return sample
  1783. @register_op
  1784. class Poly2Mask(BaseOperator):
  1785. """
  1786. gt poly to mask annotations
  1787. """
  1788. def __init__(self):
  1789. super(Poly2Mask, self).__init__()
  1790. import pycocotools.mask as maskUtils
  1791. self.maskutils = maskUtils
  1792. def _poly2mask(self, mask_ann, img_h, img_w):
  1793. if isinstance(mask_ann, list):
  1794. # polygon -- a single object might consist of multiple parts
  1795. # we merge all parts into one mask rle code
  1796. rles = self.maskutils.frPyObjects(mask_ann, img_h, img_w)
  1797. rle = self.maskutils.merge(rles)
  1798. elif isinstance(mask_ann['counts'], list):
  1799. # uncompressed RLE
  1800. rle = self.maskutils.frPyObjects(mask_ann, img_h, img_w)
  1801. else:
  1802. # rle
  1803. rle = mask_ann
  1804. mask = self.maskutils.decode(rle)
  1805. return mask
  1806. def apply(self, sample, context=None):
  1807. assert 'gt_poly' in sample
  1808. im_h = sample['h']
  1809. im_w = sample['w']
  1810. masks = [
  1811. self._poly2mask(gt_poly, im_h, im_w)
  1812. for gt_poly in sample['gt_poly']
  1813. ]
  1814. sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
  1815. return sample
  1816. @register_op
  1817. class Rbox2Poly(BaseOperator):
  1818. """
  1819. Convert rbbox format to poly format.
  1820. """
  1821. def __init__(self):
  1822. super(Rbox2Poly, self).__init__()
  1823. def apply(self, sample, context=None):
  1824. assert 'gt_rbox' in sample
  1825. assert sample['gt_rbox'].shape[1] == 5
  1826. rrects = sample['gt_rbox']
  1827. x_ctr = rrects[:, 0]
  1828. y_ctr = rrects[:, 1]
  1829. width = rrects[:, 2]
  1830. height = rrects[:, 3]
  1831. x1 = x_ctr - width / 2.0
  1832. y1 = y_ctr - height / 2.0
  1833. x2 = x_ctr + width / 2.0
  1834. y2 = y_ctr + height / 2.0
  1835. sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
  1836. polys = bbox_utils.rbox2poly_np(rrects)
  1837. sample['gt_rbox2poly'] = polys
  1838. return sample
  1839. @register_op
  1840. class AugmentHSV(BaseOperator):
  1841. def __init__(self, fraction=0.50, is_bgr=True):
  1842. """
  1843. Augment the SV channel of image data.
  1844. Args:
  1845. fraction (float): the fraction for augment. Default: 0.5.
  1846. is_bgr (bool): whether the image is BGR mode. Default: True.
  1847. """
  1848. super(AugmentHSV, self).__init__()
  1849. self.fraction = fraction
  1850. self.is_bgr = is_bgr
  1851. def apply(self, sample, context=None):
  1852. img = sample['image']
  1853. if self.is_bgr:
  1854. img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
  1855. else:
  1856. img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
  1857. S = img_hsv[:, :, 1].astype(np.float32)
  1858. V = img_hsv[:, :, 2].astype(np.float32)
  1859. a = (random.random() * 2 - 1) * self.fraction + 1
  1860. S *= a
  1861. if a > 1:
  1862. np.clip(S, a_min=0, a_max=255, out=S)
  1863. a = (random.random() * 2 - 1) * self.fraction + 1
  1864. V *= a
  1865. if a > 1:
  1866. np.clip(V, a_min=0, a_max=255, out=V)
  1867. img_hsv[:, :, 1] = S.astype(np.uint8)
  1868. img_hsv[:, :, 2] = V.astype(np.uint8)
  1869. if self.is_bgr:
  1870. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
  1871. else:
  1872. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB, dst=img)
  1873. sample['image'] = img
  1874. return sample
  1875. @register_op
  1876. class Norm2PixelBbox(BaseOperator):
  1877. """
  1878. Transform the bounding box's coornidates which is in [0,1] to pixels.
  1879. """
  1880. def __init__(self):
  1881. super(Norm2PixelBbox, self).__init__()
  1882. def apply(self, sample, context=None):
  1883. assert 'gt_bbox' in sample
  1884. bbox = sample['gt_bbox']
  1885. height, width = sample['image'].shape[:2]
  1886. bbox[:, 0::2] = bbox[:, 0::2] * width
  1887. bbox[:, 1::2] = bbox[:, 1::2] * height
  1888. sample['gt_bbox'] = bbox
  1889. return sample
  1890. @register_op
  1891. class BboxCXCYWH2XYXY(BaseOperator):
  1892. """
  1893. Convert bbox CXCYWH format to XYXY format.
  1894. [center_x, center_y, width, height] -> [x0, y0, x1, y1]
  1895. """
  1896. def __init__(self):
  1897. super(BboxCXCYWH2XYXY, self).__init__()
  1898. def apply(self, sample, context=None):
  1899. assert 'gt_bbox' in sample
  1900. bbox0 = sample['gt_bbox']
  1901. bbox = bbox0.copy()
  1902. bbox[:, :2] = bbox0[:, :2] - bbox0[:, 2:4] / 2.
  1903. bbox[:, 2:4] = bbox0[:, :2] + bbox0[:, 2:4] / 2.
  1904. sample['gt_bbox'] = bbox
  1905. return sample
  1906. @register_op
  1907. class RandomResizeCrop(BaseOperator):
  1908. """Random resize and crop image and bboxes.
  1909. Args:
  1910. resizes (list): resize image to one of resizes. if keep_ratio is True and mode is
  1911. 'long', resize the image's long side to the maximum of target_size, if keep_ratio is
  1912. True and mode is 'short', resize the image's short side to the minimum of target_size.
  1913. cropsizes (list): crop sizes after resize, [(min_crop_1, max_crop_1), ...]
  1914. mode (str): resize mode, `long` or `short`. Details see resizes.
  1915. prob (float): probability of this op.
  1916. keep_ratio (bool): whether keep_ratio or not, default true
  1917. interp (int): the interpolation method
  1918. thresholds (list): iou thresholds for decide a valid bbox crop.
  1919. num_attempts (int): number of tries before giving up.
  1920. allow_no_crop (bool): allow return without actually cropping them.
  1921. cover_all_box (bool): ensure all bboxes are covered in the final crop.
  1922. is_mask_crop(bool): whether crop the segmentation.
  1923. """
  1924. def __init__(
  1925. self,
  1926. resizes,
  1927. cropsizes,
  1928. prob=0.5,
  1929. mode='short',
  1930. keep_ratio=True,
  1931. interp=cv2.INTER_LINEAR,
  1932. num_attempts=3,
  1933. cover_all_box=False,
  1934. allow_no_crop=False,
  1935. thresholds=[0.3, 0.5, 0.7],
  1936. is_mask_crop=False, ):
  1937. super(RandomResizeCrop, self).__init__()
  1938. self.resizes = resizes
  1939. self.cropsizes = cropsizes
  1940. self.prob = prob
  1941. self.mode = mode
  1942. self.resizer = Resize(0, keep_ratio=keep_ratio, interp=interp)
  1943. self.croper = RandomCrop(
  1944. num_attempts=num_attempts,
  1945. cover_all_box=cover_all_box,
  1946. thresholds=thresholds,
  1947. allow_no_crop=allow_no_crop,
  1948. is_mask_crop=is_mask_crop)
  1949. def _format_size(self, size):
  1950. if isinstance(size, Integral):
  1951. size = (size, size)
  1952. return size
  1953. def apply(self, sample, context=None):
  1954. if random.random() < self.prob:
  1955. _resize = self._format_size(random.choice(self.resizes))
  1956. _cropsize = self._format_size(random.choice(self.cropsizes))
  1957. sample = self._resize(
  1958. self.resizer,
  1959. sample,
  1960. size=_resize,
  1961. mode=self.mode,
  1962. context=context)
  1963. sample = self._random_crop(
  1964. self.croper, sample, size=_cropsize, context=context)
  1965. return sample
  1966. @staticmethod
  1967. def _random_crop(croper, sample, size, context=None):
  1968. if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
  1969. return sample
  1970. self = croper
  1971. h, w = sample['image'].shape[:2]
  1972. gt_bbox = sample['gt_bbox']
  1973. cropsize = size
  1974. min_crop = min(cropsize)
  1975. max_crop = max(cropsize)
  1976. thresholds = list(self.thresholds)
  1977. np.random.shuffle(thresholds)
  1978. for thresh in thresholds:
  1979. found = False
  1980. for _ in range(self.num_attempts):
  1981. crop_h = random.randint(min_crop, min(h, max_crop))
  1982. crop_w = random.randint(min_crop, min(w, max_crop))
  1983. crop_y = random.randint(0, h - crop_h)
  1984. crop_x = random.randint(0, w - crop_w)
  1985. crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
  1986. iou = self._iou_matrix(
  1987. gt_bbox, np.array(
  1988. [crop_box], dtype=np.float32))
  1989. if iou.max() < thresh:
  1990. continue
  1991. if self.cover_all_box and iou.min() < thresh:
  1992. continue
  1993. cropped_box, valid_ids = self._crop_box_with_center_constraint(
  1994. gt_bbox, np.array(
  1995. crop_box, dtype=np.float32))
  1996. if valid_ids.size > 0:
  1997. found = True
  1998. break
  1999. if found:
  2000. if self.is_mask_crop and 'gt_poly' in sample and len(sample[
  2001. 'gt_poly']) > 0:
  2002. crop_polys = self.crop_segms(
  2003. sample['gt_poly'],
  2004. valid_ids,
  2005. np.array(
  2006. crop_box, dtype=np.int64),
  2007. h,
  2008. w)
  2009. if [] in crop_polys:
  2010. delete_id = list()
  2011. valid_polys = list()
  2012. for id, crop_poly in enumerate(crop_polys):
  2013. if crop_poly == []:
  2014. delete_id.append(id)
  2015. else:
  2016. valid_polys.append(crop_poly)
  2017. valid_ids = np.delete(valid_ids, delete_id)
  2018. if len(valid_polys) == 0:
  2019. return sample
  2020. sample['gt_poly'] = valid_polys
  2021. else:
  2022. sample['gt_poly'] = crop_polys
  2023. if 'gt_segm' in sample:
  2024. sample['gt_segm'] = self._crop_segm(sample['gt_segm'],
  2025. crop_box)
  2026. sample['gt_segm'] = np.take(
  2027. sample['gt_segm'], valid_ids, axis=0)
  2028. sample['image'] = self._crop_image(sample['image'], crop_box)
  2029. sample['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
  2030. sample['gt_class'] = np.take(
  2031. sample['gt_class'], valid_ids, axis=0)
  2032. if 'gt_score' in sample:
  2033. sample['gt_score'] = np.take(
  2034. sample['gt_score'], valid_ids, axis=0)
  2035. if 'is_crowd' in sample:
  2036. sample['is_crowd'] = np.take(
  2037. sample['is_crowd'], valid_ids, axis=0)
  2038. return sample
  2039. return sample
  2040. @staticmethod
  2041. def _resize(resizer, sample, size, mode='short', context=None):
  2042. self = resizer
  2043. im = sample['image']
  2044. target_size = size
  2045. if not isinstance(im, np.ndarray):
  2046. raise TypeError("{}: image type is not numpy.".format(self))
  2047. if len(im.shape) != 3:
  2048. raise ImageError('{}: image is not 3-dimensional.'.format(self))
  2049. # apply image
  2050. im_shape = im.shape
  2051. if self.keep_ratio:
  2052. im_size_min = np.min(im_shape[0:2])
  2053. im_size_max = np.max(im_shape[0:2])
  2054. target_size_min = np.min(target_size)
  2055. target_size_max = np.max(target_size)
  2056. if mode == 'long':
  2057. im_scale = min(target_size_min / im_size_min,
  2058. target_size_max / im_size_max)
  2059. else:
  2060. im_scale = max(target_size_min / im_size_min,
  2061. target_size_max / im_size_max)
  2062. resize_h = im_scale * float(im_shape[0])
  2063. resize_w = im_scale * float(im_shape[1])
  2064. im_scale_x = im_scale
  2065. im_scale_y = im_scale
  2066. else:
  2067. resize_h, resize_w = target_size
  2068. im_scale_y = resize_h / im_shape[0]
  2069. im_scale_x = resize_w / im_shape[1]
  2070. im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
  2071. sample['image'] = im
  2072. sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
  2073. if 'scale_factor' in sample:
  2074. scale_factor = sample['scale_factor']
  2075. sample['scale_factor'] = np.asarray(
  2076. [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
  2077. dtype=np.float32)
  2078. else:
  2079. sample['scale_factor'] = np.asarray(
  2080. [im_scale_y, im_scale_x], dtype=np.float32)
  2081. # apply bbox
  2082. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  2083. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'],
  2084. [im_scale_x, im_scale_y],
  2085. [resize_w, resize_h])
  2086. # apply rbox
  2087. if 'gt_rbox2poly' in sample:
  2088. if np.array(sample['gt_rbox2poly']).shape[1] != 8:
  2089. logger.warn(
  2090. "gt_rbox2poly's length shoule be 8, but actually is {}".
  2091. format(len(sample['gt_rbox2poly'])))
  2092. sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
  2093. [im_scale_x, im_scale_y],
  2094. [resize_w, resize_h])
  2095. # apply polygon
  2096. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  2097. sample['gt_poly'] = self.apply_segm(
  2098. sample['gt_poly'], im_shape[:2], [im_scale_x, im_scale_y])
  2099. # apply semantic
  2100. if 'semantic' in sample and sample['semantic']:
  2101. semantic = sample['semantic']
  2102. semantic = cv2.resize(
  2103. semantic.astype('float32'),
  2104. None,
  2105. None,
  2106. fx=im_scale_x,
  2107. fy=im_scale_y,
  2108. interpolation=self.interp)
  2109. semantic = np.asarray(semantic).astype('int32')
  2110. semantic = np.expand_dims(semantic, 0)
  2111. sample['semantic'] = semantic
  2112. # apply gt_segm
  2113. if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
  2114. masks = [
  2115. cv2.resize(
  2116. gt_segm,
  2117. None,
  2118. None,
  2119. fx=im_scale_x,
  2120. fy=im_scale_y,
  2121. interpolation=cv2.INTER_NEAREST)
  2122. for gt_segm in sample['gt_segm']
  2123. ]
  2124. sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
  2125. return sample
  2126. class RandomPerspective(BaseOperator):
  2127. """
  2128. Rotate, tranlate, scale, shear and perspect image and bboxes randomly,
  2129. refer to https://github.com/ultralytics/yolov5/blob/develop/utils/datasets.py
  2130. Args:
  2131. degree (int): rotation degree, uniformly sampled in [-degree, degree]
  2132. translate (float): translate fraction, translate_x and translate_y are uniformly sampled
  2133. in [0.5 - translate, 0.5 + translate]
  2134. scale (float): scale factor, uniformly sampled in [1 - scale, 1 + scale]
  2135. shear (int): shear degree, shear_x and shear_y are uniformly sampled in [-shear, shear]
  2136. perspective (float): perspective_x and perspective_y are uniformly sampled in [-perspective, perspective]
  2137. area_thr (float): the area threshold of bbox to be kept after transformation, default 0.25
  2138. fill_value (tuple): value used in case of a constant border, default (114, 114, 114)
  2139. """
  2140. def __init__(self,
  2141. degree=10,
  2142. translate=0.1,
  2143. scale=0.1,
  2144. shear=10,
  2145. perspective=0.0,
  2146. border=[0, 0],
  2147. area_thr=0.25,
  2148. fill_value=(114, 114, 114)):
  2149. super(RandomPerspective, self).__init__()
  2150. self.degree = degree
  2151. self.translate = translate
  2152. self.scale = scale
  2153. self.shear = shear
  2154. self.perspective = perspective
  2155. self.border = border
  2156. self.area_thr = area_thr
  2157. self.fill_value = fill_value
  2158. def apply(self, sample, context=None):
  2159. im = sample['image']
  2160. height = im.shape[0] + self.border[0] * 2
  2161. width = im.shape[1] + self.border[1] * 2
  2162. # center
  2163. C = np.eye(3)
  2164. C[0, 2] = -im.shape[1] / 2
  2165. C[1, 2] = -im.shape[0] / 2
  2166. # perspective
  2167. P = np.eye(3)
  2168. P[2, 0] = random.uniform(-self.perspective, self.perspective)
  2169. P[2, 1] = random.uniform(-self.perspective, self.perspective)
  2170. # Rotation and scale
  2171. R = np.eye(3)
  2172. a = random.uniform(-self.degree, self.degree)
  2173. s = random.uniform(1 - self.scale, 1 + self.scale)
  2174. R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
  2175. # Shear
  2176. S = np.eye(3)
  2177. # shear x (deg)
  2178. S[0, 1] = math.tan(
  2179. random.uniform(-self.shear, self.shear) * math.pi / 180)
  2180. # shear y (deg)
  2181. S[1, 0] = math.tan(
  2182. random.uniform(-self.shear, self.shear) * math.pi / 180)
  2183. # Translation
  2184. T = np.eye(3)
  2185. T[0, 2] = random.uniform(0.5 - self.translate,
  2186. 0.5 + self.translate) * width
  2187. T[1, 2] = random.uniform(0.5 - self.translate,
  2188. 0.5 + self.translate) * height
  2189. # matmul
  2190. # M = T @ S @ R @ P @ C
  2191. M = np.eye(3)
  2192. for cM in [T, S, R, P, C]:
  2193. M = np.matmul(M, cM)
  2194. if (self.border[0] != 0) or (self.border[1] != 0) or (
  2195. M != np.eye(3)).any():
  2196. if self.perspective:
  2197. im = cv2.warpPerspective(
  2198. im, M, dsize=(width, height), borderValue=self.fill_value)
  2199. else:
  2200. im = cv2.warpAffine(
  2201. im,
  2202. M[:2],
  2203. dsize=(width, height),
  2204. borderValue=self.fill_value)
  2205. sample['image'] = im
  2206. if sample['gt_bbox'].shape[0] > 0:
  2207. sample = transform_bbox(
  2208. sample,
  2209. M,
  2210. width,
  2211. height,
  2212. area_thr=self.area_thr,
  2213. perspective=self.perspective)
  2214. return sample
  2215. @register_op
  2216. class Mosaic(BaseOperator):
  2217. """
  2218. Mosaic Data Augmentation, refer to https://github.com/ultralytics/yolov5/blob/develop/utils/datasets.py
  2219. """
  2220. def __init__(self,
  2221. target_size,
  2222. mosaic_border=None,
  2223. fill_value=(114, 114, 114)):
  2224. super(Mosaic, self).__init__()
  2225. self.target_size = target_size
  2226. if mosaic_border is None:
  2227. mosaic_border = (-target_size // 2, -target_size // 2)
  2228. self.mosaic_border = mosaic_border
  2229. self.fill_value = fill_value
  2230. def __call__(self, sample, context=None):
  2231. if not isinstance(sample, Sequence):
  2232. return sample
  2233. s = self.target_size
  2234. yc, xc = [
  2235. int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border
  2236. ]
  2237. boxes = [x['gt_bbox'] for x in sample]
  2238. labels = [x['gt_class'] for x in sample]
  2239. for i in range(len(sample)):
  2240. im = sample[i]['image']
  2241. h, w, c = im.shape
  2242. if i == 0: # top left
  2243. image = np.ones(
  2244. (s * 2, s * 2, c), dtype=np.uint8) * self.fill_value
  2245. # xmin, ymin, xmax, ymax (dst image)
  2246. x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc
  2247. # xmin, ymin, xmax, ymax (src image)
  2248. x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h
  2249. elif i == 1: # top right
  2250. x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
  2251. x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
  2252. elif i == 2: # bottom left
  2253. x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
  2254. x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(
  2255. y2a - y1a, h)
  2256. elif i == 3: # bottom right
  2257. x1a, y1a, x2a, y2a = xc, yc, min(xc + w,
  2258. s * 2), min(s * 2, yc + h)
  2259. x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
  2260. image[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b]
  2261. padw = x1a - x1b
  2262. padh = y1a - y1b
  2263. boxes[i] = boxes[i] + (padw, padh, padw, padh)
  2264. boxes = np.concatenate(boxes, axis=0)
  2265. boxes = np.clip(boxes, 0, s * 2)
  2266. labels = np.concatenate(labels, axis=0)
  2267. if 'is_crowd' in sample[0]:
  2268. is_crowd = np.concatenate([x['is_crowd'] for x in sample], axis=0)
  2269. if 'difficult' in sample[0]:
  2270. difficult = np.concatenate(
  2271. [x['difficult'] for x in sample], axis=0)
  2272. sample = sample[0]
  2273. sample['image'] = image.astype(np.uint8)
  2274. sample['gt_bbox'] = boxes
  2275. sample['gt_class'] = labels
  2276. if 'is_crowd' in sample:
  2277. sample['is_crowd'] = is_crowd
  2278. if 'difficult' in sample:
  2279. sample['difficult'] = difficult
  2280. return sample
  2281. @register_op
  2282. class RandomSelect(BaseOperator):
  2283. """
  2284. Randomly choose a transformation between transforms1 and transforms2,
  2285. and the probability of choosing transforms1 is p.
  2286. """
  2287. def __init__(self, transforms1, transforms2, p=0.5):
  2288. super(RandomSelect, self).__init__()
  2289. self.transforms1 = Compose(transforms1)
  2290. self.transforms2 = Compose(transforms2)
  2291. self.p = p
  2292. def apply(self, sample, context=None):
  2293. if random.random() < self.p:
  2294. return self.transforms1(sample)
  2295. return self.transforms2(sample)
  2296. @register_op
  2297. class RandomShortSideResize(BaseOperator):
  2298. def __init__(self,
  2299. short_side_sizes,
  2300. max_size=None,
  2301. interp=cv2.INTER_LINEAR,
  2302. random_interp=False):
  2303. """
  2304. Resize the image randomly according to the short side. If max_size is not None,
  2305. the long side is scaled according to max_size. The whole process will be keep ratio.
  2306. Args:
  2307. short_side_sizes (list|tuple): Image target short side size.
  2308. max_size (int): The size of the longest side of image after resize.
  2309. interp (int): The interpolation method.
  2310. random_interp (bool): Whether random select interpolation method.
  2311. """
  2312. super(RandomShortSideResize, self).__init__()
  2313. assert isinstance(short_side_sizes,
  2314. Sequence), "short_side_sizes must be List or Tuple"
  2315. self.short_side_sizes = short_side_sizes
  2316. self.max_size = max_size
  2317. self.interp = interp
  2318. self.random_interp = random_interp
  2319. self.interps = [
  2320. cv2.INTER_NEAREST,
  2321. cv2.INTER_LINEAR,
  2322. cv2.INTER_AREA,
  2323. cv2.INTER_CUBIC,
  2324. cv2.INTER_LANCZOS4,
  2325. ]
  2326. def get_size_with_aspect_ratio(self, image_shape, size, max_size=None):
  2327. h, w = image_shape
  2328. if max_size is not None:
  2329. min_original_size = float(min((w, h)))
  2330. max_original_size = float(max((w, h)))
  2331. if max_original_size / min_original_size * size > max_size:
  2332. size = int(
  2333. round(max_size * min_original_size / max_original_size))
  2334. if (w <= h and w == size) or (h <= w and h == size):
  2335. return (w, h)
  2336. if w < h:
  2337. ow = size
  2338. oh = int(size * h / w)
  2339. else:
  2340. oh = size
  2341. ow = int(size * w / h)
  2342. return (ow, oh)
  2343. def resize(self,
  2344. sample,
  2345. target_size,
  2346. max_size=None,
  2347. interp=cv2.INTER_LINEAR):
  2348. im = sample['image']
  2349. if not isinstance(im, np.ndarray):
  2350. raise TypeError("{}: image type is not numpy.".format(self))
  2351. if len(im.shape) != 3:
  2352. raise ImageError('{}: image is not 3-dimensional.'.format(self))
  2353. target_size = self.get_size_with_aspect_ratio(im.shape[:2],
  2354. target_size, max_size)
  2355. im_scale_y, im_scale_x = target_size[1] / im.shape[0], target_size[
  2356. 0] / im.shape[1]
  2357. sample['image'] = cv2.resize(im, target_size, interpolation=interp)
  2358. sample['im_shape'] = np.asarray(target_size[::-1], dtype=np.float32)
  2359. if 'scale_factor' in sample:
  2360. scale_factor = sample['scale_factor']
  2361. sample['scale_factor'] = np.asarray(
  2362. [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
  2363. dtype=np.float32)
  2364. else:
  2365. sample['scale_factor'] = np.asarray(
  2366. [im_scale_y, im_scale_x], dtype=np.float32)
  2367. # apply bbox
  2368. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  2369. sample['gt_bbox'] = self.apply_bbox(
  2370. sample['gt_bbox'], [im_scale_x, im_scale_y], target_size)
  2371. # apply polygon
  2372. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  2373. sample['gt_poly'] = self.apply_segm(
  2374. sample['gt_poly'], im.shape[:2], [im_scale_x, im_scale_y])
  2375. # apply semantic
  2376. if 'semantic' in sample and sample['semantic']:
  2377. semantic = sample['semantic']
  2378. semantic = cv2.resize(
  2379. semantic.astype('float32'),
  2380. target_size,
  2381. interpolation=self.interp)
  2382. semantic = np.asarray(semantic).astype('int32')
  2383. semantic = np.expand_dims(semantic, 0)
  2384. sample['semantic'] = semantic
  2385. # apply gt_segm
  2386. if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
  2387. masks = [
  2388. cv2.resize(
  2389. gt_segm, target_size, interpolation=cv2.INTER_NEAREST)
  2390. for gt_segm in sample['gt_segm']
  2391. ]
  2392. sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
  2393. return sample
  2394. def apply_bbox(self, bbox, scale, size):
  2395. im_scale_x, im_scale_y = scale
  2396. resize_w, resize_h = size
  2397. bbox[:, 0::2] *= im_scale_x
  2398. bbox[:, 1::2] *= im_scale_y
  2399. bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w)
  2400. bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h)
  2401. return bbox.astype('float32')
  2402. def apply_segm(self, segms, im_size, scale):
  2403. def _resize_poly(poly, im_scale_x, im_scale_y):
  2404. resized_poly = np.array(poly).astype('float32')
  2405. resized_poly[0::2] *= im_scale_x
  2406. resized_poly[1::2] *= im_scale_y
  2407. return resized_poly.tolist()
  2408. def _resize_rle(rle, im_h, im_w, im_scale_x, im_scale_y):
  2409. if 'counts' in rle and type(rle['counts']) == list:
  2410. rle = mask_util.frPyObjects(rle, im_h, im_w)
  2411. mask = mask_util.decode(rle)
  2412. mask = cv2.resize(
  2413. mask,
  2414. None,
  2415. None,
  2416. fx=im_scale_x,
  2417. fy=im_scale_y,
  2418. interpolation=self.interp)
  2419. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  2420. return rle
  2421. im_h, im_w = im_size
  2422. im_scale_x, im_scale_y = scale
  2423. resized_segms = []
  2424. for segm in segms:
  2425. if is_poly(segm):
  2426. # Polygon format
  2427. resized_segms.append([
  2428. _resize_poly(poly, im_scale_x, im_scale_y) for poly in segm
  2429. ])
  2430. else:
  2431. # RLE format
  2432. import pycocotools.mask as mask_util
  2433. resized_segms.append(
  2434. _resize_rle(segm, im_h, im_w, im_scale_x, im_scale_y))
  2435. return resized_segms
  2436. def apply(self, sample, context=None):
  2437. target_size = random.choice(self.short_side_sizes)
  2438. interp = random.choice(
  2439. self.interps) if self.random_interp else self.interp
  2440. return self.resize(sample, target_size, self.max_size, interp)
  2441. @register_op
  2442. class RandomSizeCrop(BaseOperator):
  2443. """
  2444. Cut the image randomly according to `min_size` and `max_size`
  2445. """
  2446. def __init__(self, min_size, max_size):
  2447. super(RandomSizeCrop, self).__init__()
  2448. self.min_size = min_size
  2449. self.max_size = max_size
  2450. from paddle.vision.transforms.functional import crop as paddle_crop
  2451. self.paddle_crop = paddle_crop
  2452. @staticmethod
  2453. def get_crop_params(img_shape, output_size):
  2454. """Get parameters for ``crop`` for a random crop.
  2455. Args:
  2456. img_shape (list|tuple): Image's height and width.
  2457. output_size (list|tuple): Expected output size of the crop.
  2458. Returns:
  2459. tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
  2460. """
  2461. h, w = img_shape
  2462. th, tw = output_size
  2463. if h + 1 < th or w + 1 < tw:
  2464. raise ValueError(
  2465. "Required crop size {} is larger then input image size {}".
  2466. format((th, tw), (h, w)))
  2467. if w == tw and h == th:
  2468. return 0, 0, h, w
  2469. i = random.randint(0, h - th + 1)
  2470. j = random.randint(0, w - tw + 1)
  2471. return i, j, th, tw
  2472. def crop(self, sample, region):
  2473. image_shape = sample['image'].shape[:2]
  2474. sample['image'] = self.paddle_crop(sample['image'], *region)
  2475. keep_index = None
  2476. # apply bbox
  2477. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  2478. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], region)
  2479. bbox = sample['gt_bbox'].reshape([-1, 2, 2])
  2480. area = (bbox[:, 1, :] - bbox[:, 0, :]).prod(axis=1)
  2481. keep_index = np.where(area > 0)[0]
  2482. sample['gt_bbox'] = sample['gt_bbox'][keep_index] if len(
  2483. keep_index) > 0 else np.zeros(
  2484. [0, 4], dtype=np.float32)
  2485. sample['gt_class'] = sample['gt_class'][keep_index] if len(
  2486. keep_index) > 0 else np.zeros(
  2487. [0, 1], dtype=np.float32)
  2488. if 'gt_score' in sample:
  2489. sample['gt_score'] = sample['gt_score'][keep_index] if len(
  2490. keep_index) > 0 else np.zeros(
  2491. [0, 1], dtype=np.float32)
  2492. if 'is_crowd' in sample:
  2493. sample['is_crowd'] = sample['is_crowd'][keep_index] if len(
  2494. keep_index) > 0 else np.zeros(
  2495. [0, 1], dtype=np.float32)
  2496. # apply polygon
  2497. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  2498. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], region,
  2499. image_shape)
  2500. if keep_index is not None:
  2501. sample['gt_poly'] = sample['gt_poly'][keep_index]
  2502. # apply gt_segm
  2503. if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
  2504. i, j, h, w = region
  2505. sample['gt_segm'] = sample['gt_segm'][:, i:i + h, j:j + w]
  2506. if keep_index is not None:
  2507. sample['gt_segm'] = sample['gt_segm'][keep_index]
  2508. return sample
  2509. def apply_bbox(self, bbox, region):
  2510. i, j, h, w = region
  2511. region_size = np.asarray([w, h])
  2512. crop_bbox = bbox - np.asarray([j, i, j, i])
  2513. crop_bbox = np.minimum(crop_bbox.reshape([-1, 2, 2]), region_size)
  2514. crop_bbox = crop_bbox.clip(min=0)
  2515. return crop_bbox.reshape([-1, 4]).astype('float32')
  2516. def apply_segm(self, segms, region, image_shape):
  2517. def _crop_poly(segm, crop):
  2518. xmin, ymin, xmax, ymax = crop
  2519. crop_coord = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]
  2520. crop_p = np.array(crop_coord).reshape(4, 2)
  2521. crop_p = Polygon(crop_p)
  2522. crop_segm = list()
  2523. for poly in segm:
  2524. poly = np.array(poly).reshape(len(poly) // 2, 2)
  2525. polygon = Polygon(poly)
  2526. if not polygon.is_valid:
  2527. exterior = polygon.exterior
  2528. multi_lines = exterior.intersection(exterior)
  2529. polygons = shapely.ops.polygonize(multi_lines)
  2530. polygon = MultiPolygon(polygons)
  2531. multi_polygon = list()
  2532. if isinstance(polygon, MultiPolygon):
  2533. multi_polygon = copy.deepcopy(polygon)
  2534. else:
  2535. multi_polygon.append(copy.deepcopy(polygon))
  2536. for per_polygon in multi_polygon:
  2537. inter = per_polygon.intersection(crop_p)
  2538. if not inter:
  2539. continue
  2540. if isinstance(inter, (MultiPolygon, GeometryCollection)):
  2541. for part in inter:
  2542. if not isinstance(part, Polygon):
  2543. continue
  2544. part = np.squeeze(
  2545. np.array(part.exterior.coords[:-1]).reshape(
  2546. 1, -1))
  2547. part[0::2] -= xmin
  2548. part[1::2] -= ymin
  2549. crop_segm.append(part.tolist())
  2550. elif isinstance(inter, Polygon):
  2551. crop_poly = np.squeeze(
  2552. np.array(inter.exterior.coords[:-1]).reshape(1,
  2553. -1))
  2554. crop_poly[0::2] -= xmin
  2555. crop_poly[1::2] -= ymin
  2556. crop_segm.append(crop_poly.tolist())
  2557. else:
  2558. continue
  2559. return crop_segm
  2560. def _crop_rle(rle, crop, height, width):
  2561. if 'counts' in rle and type(rle['counts']) == list:
  2562. rle = mask_util.frPyObjects(rle, height, width)
  2563. mask = mask_util.decode(rle)
  2564. mask = mask[crop[1]:crop[3], crop[0]:crop[2]]
  2565. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  2566. return rle
  2567. i, j, h, w = region
  2568. crop = [j, i, j + w, i + h]
  2569. height, width = image_shape
  2570. crop_segms = []
  2571. for segm in segms:
  2572. if is_poly(segm):
  2573. import copy
  2574. import shapely.ops
  2575. from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
  2576. # Polygon format
  2577. crop_segms.append(_crop_poly(segm, crop))
  2578. else:
  2579. # RLE format
  2580. import pycocotools.mask as mask_util
  2581. crop_segms.append(_crop_rle(segm, crop, height, width))
  2582. return crop_segms
  2583. def apply(self, sample, context=None):
  2584. h = random.randint(self.min_size,
  2585. min(sample['image'].shape[0], self.max_size))
  2586. w = random.randint(self.min_size,
  2587. min(sample['image'].shape[1], self.max_size))
  2588. region = self.get_crop_params(sample['image'].shape[:2], [h, w])
  2589. return self.crop(sample, region)