ops.py 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966
  1. # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import numpy as np
  15. from numbers import Integral
  16. import math
  17. import six
  18. import paddle
  19. from paddle import fluid
  20. def bbox_overlaps(boxes_1, boxes_2):
  21. '''
  22. bbox_overlaps
  23. boxes_1: x1, y, x2, y2
  24. boxes_2: x1, y, x2, y2
  25. '''
  26. assert boxes_1.shape[1] == 4 and boxes_2.shape[1] == 4
  27. num_1 = boxes_1.shape[0]
  28. num_2 = boxes_2.shape[0]
  29. x1_1 = boxes_1[:, 0:1]
  30. y1_1 = boxes_1[:, 1:2]
  31. x2_1 = boxes_1[:, 2:3]
  32. y2_1 = boxes_1[:, 3:4]
  33. area_1 = (x2_1 - x1_1 + 1) * (y2_1 - y1_1 + 1)
  34. x1_2 = boxes_2[:, 0].transpose()
  35. y1_2 = boxes_2[:, 1].transpose()
  36. x2_2 = boxes_2[:, 2].transpose()
  37. y2_2 = boxes_2[:, 3].transpose()
  38. area_2 = (x2_2 - x1_2 + 1) * (y2_2 - y1_2 + 1)
  39. xx1 = np.maximum(x1_1, x1_2)
  40. yy1 = np.maximum(y1_1, y1_2)
  41. xx2 = np.minimum(x2_1, x2_2)
  42. yy2 = np.minimum(y2_1, y2_2)
  43. w = np.maximum(0.0, xx2 - xx1 + 1)
  44. h = np.maximum(0.0, yy2 - yy1 + 1)
  45. inter = w * h
  46. ovr = inter / (area_1 + area_2 - inter)
  47. return ovr
  48. def box_to_delta(ex_boxes, gt_boxes, weights):
  49. """ box_to_delta """
  50. ex_w = ex_boxes[:, 2] - ex_boxes[:, 0] + 1
  51. ex_h = ex_boxes[:, 3] - ex_boxes[:, 1] + 1
  52. ex_ctr_x = ex_boxes[:, 0] + 0.5 * ex_w
  53. ex_ctr_y = ex_boxes[:, 1] + 0.5 * ex_h
  54. gt_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1
  55. gt_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1
  56. gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_w
  57. gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_h
  58. dx = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0]
  59. dy = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1]
  60. dw = (np.log(gt_w / ex_w)) / weights[2]
  61. dh = (np.log(gt_h / ex_h)) / weights[3]
  62. targets = np.vstack([dx, dy, dw, dh]).transpose()
  63. return targets
  64. def DropBlock(input, block_size, keep_prob, is_test):
  65. if is_test:
  66. return input
  67. def CalculateGamma(input, block_size, keep_prob):
  68. input_shape = fluid.layers.shape(input)
  69. feat_shape_tmp = fluid.layers.slice(input_shape, [0], [3], [4])
  70. feat_shape_tmp = fluid.layers.cast(feat_shape_tmp, dtype="float32")
  71. feat_shape_t = fluid.layers.reshape(feat_shape_tmp, [1, 1, 1, 1])
  72. feat_area = fluid.layers.pow(feat_shape_t, factor=2)
  73. block_shape_t = fluid.layers.fill_constant(
  74. shape=[1, 1, 1, 1], value=block_size, dtype='float32')
  75. block_area = fluid.layers.pow(block_shape_t, factor=2)
  76. useful_shape_t = feat_shape_t - block_shape_t + 1
  77. useful_area = fluid.layers.pow(useful_shape_t, factor=2)
  78. upper_t = feat_area * (1 - keep_prob)
  79. bottom_t = block_area * useful_area
  80. output = upper_t / bottom_t
  81. return output
  82. gamma = CalculateGamma(input, block_size=block_size, keep_prob=keep_prob)
  83. input_shape = fluid.layers.shape(input)
  84. p = fluid.layers.expand_as(gamma, input)
  85. input_shape_tmp = fluid.layers.cast(input_shape, dtype="int64")
  86. random_matrix = fluid.layers.uniform_random(
  87. input_shape_tmp, dtype='float32', min=0.0, max=1.0)
  88. one_zero_m = fluid.layers.less_than(random_matrix, p)
  89. one_zero_m.stop_gradient = True
  90. one_zero_m = fluid.layers.cast(one_zero_m, dtype="float32")
  91. mask_flag = fluid.layers.pool2d(
  92. one_zero_m,
  93. pool_size=block_size,
  94. pool_type='max',
  95. pool_stride=1,
  96. pool_padding=block_size // 2)
  97. mask = 1.0 - mask_flag
  98. elem_numel = fluid.layers.reduce_prod(input_shape)
  99. elem_numel_m = fluid.layers.cast(elem_numel, dtype="float32")
  100. elem_numel_m.stop_gradient = True
  101. elem_sum = fluid.layers.reduce_sum(mask)
  102. elem_sum_m = fluid.layers.cast(elem_sum, dtype="float32")
  103. elem_sum_m.stop_gradient = True
  104. output = input * mask * elem_numel_m / elem_sum_m
  105. return output
  106. class MultiClassNMS(object):
  107. def __init__(self,
  108. score_threshold=.05,
  109. nms_top_k=-1,
  110. keep_top_k=100,
  111. nms_threshold=.5,
  112. normalized=False,
  113. nms_eta=1.0,
  114. background_label=0):
  115. super(MultiClassNMS, self).__init__()
  116. self.score_threshold = score_threshold
  117. self.nms_top_k = nms_top_k
  118. self.keep_top_k = keep_top_k
  119. self.nms_threshold = nms_threshold
  120. self.normalized = normalized
  121. self.nms_eta = nms_eta
  122. self.background_label = background_label
  123. def __call__(self, bboxes, scores):
  124. return fluid.layers.multiclass_nms(
  125. bboxes=bboxes,
  126. scores=scores,
  127. score_threshold=self.score_threshold,
  128. nms_top_k=self.nms_top_k,
  129. keep_top_k=self.keep_top_k,
  130. normalized=self.normalized,
  131. nms_threshold=self.nms_threshold,
  132. nms_eta=self.nms_eta,
  133. background_label=self.background_label)
  134. class MatrixNMS(object):
  135. def __init__(self,
  136. score_threshold=.05,
  137. post_threshold=.05,
  138. nms_top_k=-1,
  139. keep_top_k=100,
  140. use_gaussian=False,
  141. gaussian_sigma=2.,
  142. normalized=False,
  143. background_label=0):
  144. super(MatrixNMS, self).__init__()
  145. self.score_threshold = score_threshold
  146. self.post_threshold = post_threshold
  147. self.nms_top_k = nms_top_k
  148. self.keep_top_k = keep_top_k
  149. self.normalized = normalized
  150. self.use_gaussian = use_gaussian
  151. self.gaussian_sigma = gaussian_sigma
  152. self.background_label = background_label
  153. def __call__(self, bboxes, scores):
  154. return paddle.fluid.layers.matrix_nms(
  155. bboxes=bboxes,
  156. scores=scores,
  157. score_threshold=self.score_threshold,
  158. post_threshold=self.post_threshold,
  159. nms_top_k=self.nms_top_k,
  160. keep_top_k=self.keep_top_k,
  161. normalized=self.normalized,
  162. use_gaussian=self.use_gaussian,
  163. gaussian_sigma=self.gaussian_sigma,
  164. background_label=self.background_label)
  165. class MultiClassSoftNMS(object):
  166. def __init__(
  167. self,
  168. score_threshold=0.01,
  169. keep_top_k=300,
  170. softnms_sigma=0.5,
  171. normalized=False,
  172. background_label=0, ):
  173. super(MultiClassSoftNMS, self).__init__()
  174. self.score_threshold = score_threshold
  175. self.keep_top_k = keep_top_k
  176. self.softnms_sigma = softnms_sigma
  177. self.normalized = normalized
  178. self.background_label = background_label
  179. def __call__(self, bboxes, scores):
  180. def create_tmp_var(program, name, dtype, shape, lod_level):
  181. return program.current_block().create_var(
  182. name=name, dtype=dtype, shape=shape, lod_level=lod_level)
  183. def _soft_nms_for_cls(dets, sigma, thres):
  184. """soft_nms_for_cls"""
  185. dets_final = []
  186. while len(dets) > 0:
  187. maxpos = np.argmax(dets[:, 0])
  188. dets_final.append(dets[maxpos].copy())
  189. ts, tx1, ty1, tx2, ty2 = dets[maxpos]
  190. scores = dets[:, 0]
  191. # force remove bbox at maxpos
  192. scores[maxpos] = -1
  193. x1 = dets[:, 1]
  194. y1 = dets[:, 2]
  195. x2 = dets[:, 3]
  196. y2 = dets[:, 4]
  197. eta = 0 if self.normalized else 1
  198. areas = (x2 - x1 + eta) * (y2 - y1 + eta)
  199. xx1 = np.maximum(tx1, x1)
  200. yy1 = np.maximum(ty1, y1)
  201. xx2 = np.minimum(tx2, x2)
  202. yy2 = np.minimum(ty2, y2)
  203. w = np.maximum(0.0, xx2 - xx1 + eta)
  204. h = np.maximum(0.0, yy2 - yy1 + eta)
  205. inter = w * h
  206. ovr = inter / (areas + areas[maxpos] - inter)
  207. weight = np.exp(-(ovr * ovr) / sigma)
  208. scores = scores * weight
  209. idx_keep = np.where(scores >= thres)
  210. dets[:, 0] = scores
  211. dets = dets[idx_keep]
  212. dets_final = np.array(dets_final).reshape(-1, 5)
  213. return dets_final
  214. def _soft_nms(bboxes, scores):
  215. class_nums = scores.shape[-1]
  216. softnms_thres = self.score_threshold
  217. softnms_sigma = self.softnms_sigma
  218. keep_top_k = self.keep_top_k
  219. cls_boxes = [[] for _ in range(class_nums)]
  220. cls_ids = [[] for _ in range(class_nums)]
  221. start_idx = 1 if self.background_label == 0 else 0
  222. for j in range(start_idx, class_nums):
  223. inds = np.where(scores[:, j] >= softnms_thres)[0]
  224. scores_j = scores[inds, j]
  225. rois_j = bboxes[inds, j, :] if len(
  226. bboxes.shape) > 2 else bboxes[inds, :]
  227. dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
  228. np.float32, copy=False)
  229. cls_rank = np.argsort(-dets_j[:, 0])
  230. dets_j = dets_j[cls_rank]
  231. cls_boxes[j] = _soft_nms_for_cls(
  232. dets_j, sigma=softnms_sigma, thres=softnms_thres)
  233. cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
  234. 1)
  235. cls_boxes = np.vstack(cls_boxes[start_idx:])
  236. cls_ids = np.vstack(cls_ids[start_idx:])
  237. pred_result = np.hstack([cls_ids, cls_boxes])
  238. # Limit to max_per_image detections **over all classes**
  239. image_scores = cls_boxes[:, 0]
  240. if len(image_scores) > keep_top_k:
  241. image_thresh = np.sort(image_scores)[-keep_top_k]
  242. keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
  243. pred_result = pred_result[keep, :]
  244. return pred_result
  245. def _batch_softnms(bboxes, scores):
  246. batch_offsets = bboxes.lod()
  247. bboxes = np.array(bboxes)
  248. scores = np.array(scores)
  249. out_offsets = [0]
  250. pred_res = []
  251. if len(batch_offsets) > 0:
  252. batch_offset = batch_offsets[0]
  253. for i in range(len(batch_offset) - 1):
  254. s, e = batch_offset[i], batch_offset[i + 1]
  255. pred = _soft_nms(bboxes[s:e], scores[s:e])
  256. out_offsets.append(pred.shape[0] + out_offsets[-1])
  257. pred_res.append(pred)
  258. else:
  259. assert len(bboxes.shape) == 3
  260. assert len(scores.shape) == 3
  261. for i in range(bboxes.shape[0]):
  262. pred = _soft_nms(bboxes[i], scores[i])
  263. out_offsets.append(pred.shape[0] + out_offsets[-1])
  264. pred_res.append(pred)
  265. res = fluid.LoDTensor()
  266. res.set_lod([out_offsets])
  267. if len(pred_res) == 0:
  268. pred_res = np.array([[1]], dtype=np.float32)
  269. res.set(np.vstack(pred_res).astype(np.float32), fluid.CPUPlace())
  270. return res
  271. pred_result = create_tmp_var(
  272. fluid.default_main_program(),
  273. name='softnms_pred_result',
  274. dtype='float32',
  275. shape=[-1, 6],
  276. lod_level=1)
  277. fluid.layers.py_func(
  278. func=_batch_softnms, x=[bboxes, scores], out=pred_result)
  279. return pred_result
  280. class MultiClassDiouNMS(object):
  281. def __init__(
  282. self,
  283. score_threshold=0.05,
  284. keep_top_k=100,
  285. nms_threshold=0.5,
  286. normalized=False,
  287. background_label=0, ):
  288. super(MultiClassDiouNMS, self).__init__()
  289. self.score_threshold = score_threshold
  290. self.nms_threshold = nms_threshold
  291. self.keep_top_k = keep_top_k
  292. self.normalized = normalized
  293. self.background_label = background_label
  294. def __call__(self, bboxes, scores):
  295. def create_tmp_var(program, name, dtype, shape, lod_level):
  296. return program.current_block().create_var(
  297. name=name, dtype=dtype, shape=shape, lod_level=lod_level)
  298. def _calc_diou_term(dets1, dets2):
  299. eps = 1.e-10
  300. eta = 0 if self.normalized else 1
  301. x1, y1, x2, y2 = dets1[0], dets1[1], dets1[2], dets1[3]
  302. x1g, y1g, x2g, y2g = dets2[0], dets2[1], dets2[2], dets2[3]
  303. cx = (x1 + x2) / 2
  304. cy = (y1 + y2) / 2
  305. w = x2 - x1 + eta
  306. h = y2 - y1 + eta
  307. cxg = (x1g + x2g) / 2
  308. cyg = (y1g + y2g) / 2
  309. wg = x2g - x1g + eta
  310. hg = y2g - y1g + eta
  311. x2 = np.maximum(x1, x2)
  312. y2 = np.maximum(y1, y2)
  313. # A or B
  314. xc1 = np.minimum(x1, x1g)
  315. yc1 = np.minimum(y1, y1g)
  316. xc2 = np.maximum(x2, x2g)
  317. yc2 = np.maximum(y2, y2g)
  318. # DIOU term
  319. dist_intersection = (cx - cxg)**2 + (cy - cyg)**2
  320. dist_union = (xc2 - xc1)**2 + (yc2 - yc1)**2
  321. diou_term = (dist_intersection + eps) / (dist_union + eps)
  322. return diou_term
  323. def _diou_nms_for_cls(dets, thres):
  324. """_diou_nms_for_cls"""
  325. scores = dets[:, 0]
  326. x1 = dets[:, 1]
  327. y1 = dets[:, 2]
  328. x2 = dets[:, 3]
  329. y2 = dets[:, 4]
  330. eta = 0 if self.normalized else 1
  331. areas = (x2 - x1 + eta) * (y2 - y1 + eta)
  332. dt_num = dets.shape[0]
  333. order = np.array(range(dt_num))
  334. keep = []
  335. while order.size > 0:
  336. i = order[0]
  337. keep.append(i)
  338. xx1 = np.maximum(x1[i], x1[order[1:]])
  339. yy1 = np.maximum(y1[i], y1[order[1:]])
  340. xx2 = np.minimum(x2[i], x2[order[1:]])
  341. yy2 = np.minimum(y2[i], y2[order[1:]])
  342. w = np.maximum(0.0, xx2 - xx1 + eta)
  343. h = np.maximum(0.0, yy2 - yy1 + eta)
  344. inter = w * h
  345. ovr = inter / (areas[i] + areas[order[1:]] - inter)
  346. diou_term = _calc_diou_term([x1[i], y1[i], x2[i], y2[i]], [
  347. x1[order[1:]], y1[order[1:]], x2[order[1:]], y2[order[1:]]
  348. ])
  349. inds = np.where(ovr - diou_term <= thres)[0]
  350. order = order[inds + 1]
  351. dets_final = dets[keep]
  352. return dets_final
  353. def _diou_nms(bboxes, scores):
  354. bboxes = np.array(bboxes)
  355. scores = np.array(scores)
  356. class_nums = scores.shape[-1]
  357. score_threshold = self.score_threshold
  358. nms_threshold = self.nms_threshold
  359. keep_top_k = self.keep_top_k
  360. cls_boxes = [[] for _ in range(class_nums)]
  361. cls_ids = [[] for _ in range(class_nums)]
  362. start_idx = 1 if self.background_label == 0 else 0
  363. for j in range(start_idx, class_nums):
  364. inds = np.where(scores[:, j] >= score_threshold)[0]
  365. scores_j = scores[inds, j]
  366. rois_j = bboxes[inds, j, :]
  367. dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
  368. np.float32, copy=False)
  369. cls_rank = np.argsort(-dets_j[:, 0])
  370. dets_j = dets_j[cls_rank]
  371. cls_boxes[j] = _diou_nms_for_cls(dets_j, thres=nms_threshold)
  372. cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
  373. 1)
  374. cls_boxes = np.vstack(cls_boxes[start_idx:])
  375. cls_ids = np.vstack(cls_ids[start_idx:])
  376. pred_result = np.hstack([cls_ids, cls_boxes]).astype(np.float32)
  377. # Limit to max_per_image detections **over all classes**
  378. image_scores = cls_boxes[:, 0]
  379. if len(image_scores) > keep_top_k:
  380. image_thresh = np.sort(image_scores)[-keep_top_k]
  381. keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
  382. pred_result = pred_result[keep, :]
  383. res = fluid.LoDTensor()
  384. res.set_lod([[0, pred_result.shape[0]]])
  385. if pred_result.shape[0] == 0:
  386. pred_result = np.array([[1]], dtype=np.float32)
  387. res.set(pred_result, fluid.CPUPlace())
  388. return res
  389. pred_result = create_tmp_var(
  390. fluid.default_main_program(),
  391. name='diou_nms_pred_result',
  392. dtype='float32',
  393. shape=[-1, 6],
  394. lod_level=0)
  395. fluid.layers.py_func(
  396. func=_diou_nms, x=[bboxes, scores], out=pred_result)
  397. return pred_result
  398. class LibraBBoxAssigner(object):
  399. def __init__(self,
  400. batch_size_per_im=512,
  401. fg_fraction=.25,
  402. fg_thresh=.5,
  403. bg_thresh_hi=.5,
  404. bg_thresh_lo=0.,
  405. bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
  406. num_classes=81,
  407. shuffle_before_sample=True,
  408. is_cls_agnostic=False,
  409. num_bins=3):
  410. super(LibraBBoxAssigner, self).__init__()
  411. self.batch_size_per_im = batch_size_per_im
  412. self.fg_fraction = fg_fraction
  413. self.fg_thresh = fg_thresh
  414. self.bg_thresh_hi = bg_thresh_hi
  415. self.bg_thresh_lo = bg_thresh_lo
  416. self.bbox_reg_weights = bbox_reg_weights
  417. self.class_nums = num_classes
  418. self.use_random = shuffle_before_sample
  419. self.is_cls_agnostic = is_cls_agnostic
  420. self.num_bins = num_bins
  421. def __call__(
  422. self,
  423. rpn_rois,
  424. gt_classes,
  425. is_crowd,
  426. gt_boxes,
  427. im_info, ):
  428. return self.generate_proposal_label_libra(
  429. rpn_rois=rpn_rois,
  430. gt_classes=gt_classes,
  431. is_crowd=is_crowd,
  432. gt_boxes=gt_boxes,
  433. im_info=im_info,
  434. batch_size_per_im=self.batch_size_per_im,
  435. fg_fraction=self.fg_fraction,
  436. fg_thresh=self.fg_thresh,
  437. bg_thresh_hi=self.bg_thresh_hi,
  438. bg_thresh_lo=self.bg_thresh_lo,
  439. bbox_reg_weights=self.bbox_reg_weights,
  440. class_nums=self.class_nums,
  441. use_random=self.use_random,
  442. is_cls_agnostic=self.is_cls_agnostic,
  443. is_cascade_rcnn=False)
  444. def generate_proposal_label_libra(
  445. self, rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
  446. batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
  447. bg_thresh_lo, bbox_reg_weights, class_nums, use_random,
  448. is_cls_agnostic, is_cascade_rcnn):
  449. num_bins = self.num_bins
  450. def create_tmp_var(program, name, dtype, shape, lod_level=None):
  451. return program.current_block().create_var(
  452. name=name, dtype=dtype, shape=shape, lod_level=lod_level)
  453. def _sample_pos(max_overlaps, max_classes, pos_inds, num_expected):
  454. if len(pos_inds) <= num_expected:
  455. return pos_inds
  456. else:
  457. unique_gt_inds = np.unique(max_classes[pos_inds])
  458. num_gts = len(unique_gt_inds)
  459. num_per_gt = int(round(num_expected / float(num_gts)) + 1)
  460. sampled_inds = []
  461. for i in unique_gt_inds:
  462. inds = np.nonzero(max_classes == i)[0]
  463. before_len = len(inds)
  464. inds = list(set(inds) & set(pos_inds))
  465. after_len = len(inds)
  466. if len(inds) > num_per_gt:
  467. inds = np.random.choice(
  468. inds, size=num_per_gt, replace=False)
  469. sampled_inds.extend(list(inds)) # combine as a new sampler
  470. if len(sampled_inds) < num_expected:
  471. num_extra = num_expected - len(sampled_inds)
  472. extra_inds = np.array(
  473. list(set(pos_inds) - set(sampled_inds)))
  474. assert len(sampled_inds)+len(extra_inds) == len(pos_inds), \
  475. "sum of sampled_inds({}) and extra_inds({}) length must be equal with pos_inds({})!".format(
  476. len(sampled_inds), len(extra_inds), len(pos_inds))
  477. if len(extra_inds) > num_extra:
  478. extra_inds = np.random.choice(
  479. extra_inds, size=num_extra, replace=False)
  480. sampled_inds.extend(extra_inds.tolist())
  481. elif len(sampled_inds) > num_expected:
  482. sampled_inds = np.random.choice(
  483. sampled_inds, size=num_expected, replace=False)
  484. return sampled_inds
  485. def sample_via_interval(max_overlaps, full_set, num_expected,
  486. floor_thr, num_bins, bg_thresh_hi):
  487. max_iou = max_overlaps.max()
  488. iou_interval = (max_iou - floor_thr) / num_bins
  489. per_num_expected = int(num_expected / num_bins)
  490. sampled_inds = []
  491. for i in range(num_bins):
  492. start_iou = floor_thr + i * iou_interval
  493. end_iou = floor_thr + (i + 1) * iou_interval
  494. tmp_set = set(
  495. np.where(
  496. np.logical_and(max_overlaps >= start_iou, max_overlaps
  497. < end_iou))[0])
  498. tmp_inds = list(tmp_set & full_set)
  499. if len(tmp_inds) > per_num_expected:
  500. tmp_sampled_set = np.random.choice(
  501. tmp_inds, size=per_num_expected, replace=False)
  502. else:
  503. tmp_sampled_set = np.array(tmp_inds, dtype=np.int)
  504. sampled_inds.append(tmp_sampled_set)
  505. sampled_inds = np.concatenate(sampled_inds)
  506. if len(sampled_inds) < num_expected:
  507. num_extra = num_expected - len(sampled_inds)
  508. extra_inds = np.array(list(full_set - set(sampled_inds)))
  509. assert len(sampled_inds)+len(extra_inds) == len(full_set), \
  510. "sum of sampled_inds({}) and extra_inds({}) length must be equal with full_set({})!".format(
  511. len(sampled_inds), len(extra_inds), len(full_set))
  512. if len(extra_inds) > num_extra:
  513. extra_inds = np.random.choice(
  514. extra_inds, num_extra, replace=False)
  515. sampled_inds = np.concatenate([sampled_inds, extra_inds])
  516. return sampled_inds
  517. def _sample_neg(max_overlaps,
  518. max_classes,
  519. neg_inds,
  520. num_expected,
  521. floor_thr=-1,
  522. floor_fraction=0,
  523. num_bins=3,
  524. bg_thresh_hi=0.5):
  525. if len(neg_inds) <= num_expected:
  526. return neg_inds
  527. else:
  528. # balance sampling for negative samples
  529. neg_set = set(neg_inds)
  530. if floor_thr > 0:
  531. floor_set = set(
  532. np.where(
  533. np.logical_and(max_overlaps >= 0, max_overlaps <
  534. floor_thr))[0])
  535. iou_sampling_set = set(
  536. np.where(max_overlaps >= floor_thr)[0])
  537. elif floor_thr == 0:
  538. floor_set = set(np.where(max_overlaps == 0)[0])
  539. iou_sampling_set = set(
  540. np.where(max_overlaps > floor_thr)[0])
  541. else:
  542. floor_set = set()
  543. iou_sampling_set = set(
  544. np.where(max_overlaps > floor_thr)[0])
  545. floor_thr = 0
  546. floor_neg_inds = list(floor_set & neg_set)
  547. iou_sampling_neg_inds = list(iou_sampling_set & neg_set)
  548. num_expected_iou_sampling = int(num_expected *
  549. (1 - floor_fraction))
  550. if len(iou_sampling_neg_inds) > num_expected_iou_sampling:
  551. if num_bins >= 2:
  552. iou_sampled_inds = sample_via_interval(
  553. max_overlaps,
  554. set(iou_sampling_neg_inds),
  555. num_expected_iou_sampling, floor_thr, num_bins,
  556. bg_thresh_hi)
  557. else:
  558. iou_sampled_inds = np.random.choice(
  559. iou_sampling_neg_inds,
  560. size=num_expected_iou_sampling,
  561. replace=False)
  562. else:
  563. iou_sampled_inds = np.array(
  564. iou_sampling_neg_inds, dtype=np.int)
  565. num_expected_floor = num_expected - len(iou_sampled_inds)
  566. if len(floor_neg_inds) > num_expected_floor:
  567. sampled_floor_inds = np.random.choice(
  568. floor_neg_inds, size=num_expected_floor, replace=False)
  569. else:
  570. sampled_floor_inds = np.array(floor_neg_inds, dtype=np.int)
  571. sampled_inds = np.concatenate(
  572. (sampled_floor_inds, iou_sampled_inds))
  573. if len(sampled_inds) < num_expected:
  574. num_extra = num_expected - len(sampled_inds)
  575. extra_inds = np.array(list(neg_set - set(sampled_inds)))
  576. if len(extra_inds) > num_extra:
  577. extra_inds = np.random.choice(
  578. extra_inds, size=num_extra, replace=False)
  579. sampled_inds = np.concatenate((sampled_inds, extra_inds))
  580. return sampled_inds
  581. def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
  582. batch_size_per_im, fg_fraction, fg_thresh,
  583. bg_thresh_hi, bg_thresh_lo, bbox_reg_weights,
  584. class_nums, use_random, is_cls_agnostic,
  585. is_cascade_rcnn):
  586. rois_per_image = int(batch_size_per_im)
  587. fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
  588. # Roidb
  589. im_scale = im_info[2]
  590. inv_im_scale = 1. / im_scale
  591. rpn_rois = rpn_rois * inv_im_scale
  592. if is_cascade_rcnn:
  593. rpn_rois = rpn_rois[gt_boxes.shape[0]:, :]
  594. boxes = np.vstack([gt_boxes, rpn_rois])
  595. gt_overlaps = np.zeros((boxes.shape[0], class_nums))
  596. box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32)
  597. if len(gt_boxes) > 0:
  598. proposal_to_gt_overlaps = bbox_overlaps(boxes, gt_boxes)
  599. overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1)
  600. overlaps_max = proposal_to_gt_overlaps.max(axis=1)
  601. # Boxes which with non-zero overlap with gt boxes
  602. overlapped_boxes_ind = np.where(overlaps_max > 0)[0]
  603. overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[
  604. overlapped_boxes_ind]]
  605. for idx in range(len(overlapped_boxes_ind)):
  606. gt_overlaps[overlapped_boxes_ind[
  607. idx], overlapped_boxes_gt_classes[idx]] = overlaps_max[
  608. overlapped_boxes_ind[idx]]
  609. box_to_gt_ind_map[overlapped_boxes_ind[
  610. idx]] = overlaps_argmax[overlapped_boxes_ind[idx]]
  611. crowd_ind = np.where(is_crowd)[0]
  612. gt_overlaps[crowd_ind] = -1
  613. max_overlaps = gt_overlaps.max(axis=1)
  614. max_classes = gt_overlaps.argmax(axis=1)
  615. # Cascade RCNN Decode Filter
  616. if is_cascade_rcnn:
  617. ws = boxes[:, 2] - boxes[:, 0] + 1
  618. hs = boxes[:, 3] - boxes[:, 1] + 1
  619. keep = np.where((ws > 0) & (hs > 0))[0]
  620. boxes = boxes[keep]
  621. max_overlaps = max_overlaps[keep]
  622. fg_inds = np.where(max_overlaps >= fg_thresh)[0]
  623. bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
  624. max_overlaps >= bg_thresh_lo))[0]
  625. fg_rois_per_this_image = fg_inds.shape[0]
  626. bg_rois_per_this_image = bg_inds.shape[0]
  627. else:
  628. # Foreground
  629. fg_inds = np.where(max_overlaps >= fg_thresh)[0]
  630. fg_rois_per_this_image = np.minimum(fg_rois_per_im,
  631. fg_inds.shape[0])
  632. # Sample foreground if there are too many
  633. if fg_inds.shape[0] > fg_rois_per_this_image:
  634. if use_random:
  635. fg_inds = _sample_pos(max_overlaps, max_classes,
  636. fg_inds, fg_rois_per_this_image)
  637. fg_inds = fg_inds[:fg_rois_per_this_image]
  638. # Background
  639. bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
  640. max_overlaps >= bg_thresh_lo))[0]
  641. bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
  642. bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
  643. bg_inds.shape[0])
  644. assert bg_rois_per_this_image >= 0, "bg_rois_per_this_image must be >= 0 but got {}".format(
  645. bg_rois_per_this_image)
  646. # Sample background if there are too many
  647. if bg_inds.shape[0] > bg_rois_per_this_image:
  648. if use_random:
  649. # libra neg sample
  650. bg_inds = _sample_neg(
  651. max_overlaps,
  652. max_classes,
  653. bg_inds,
  654. bg_rois_per_this_image,
  655. num_bins=num_bins,
  656. bg_thresh_hi=bg_thresh_hi)
  657. bg_inds = bg_inds[:bg_rois_per_this_image]
  658. keep_inds = np.append(fg_inds, bg_inds)
  659. sampled_labels = max_classes[keep_inds] # N x 1
  660. sampled_labels[fg_rois_per_this_image:] = 0
  661. sampled_boxes = boxes[keep_inds] # N x 324
  662. sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]]
  663. sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0]
  664. bbox_label_targets = _compute_targets(
  665. sampled_boxes, sampled_gts, sampled_labels, bbox_reg_weights)
  666. bbox_targets, bbox_inside_weights = _expand_bbox_targets(
  667. bbox_label_targets, class_nums, is_cls_agnostic)
  668. bbox_outside_weights = np.array(
  669. bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
  670. # Scale rois
  671. sampled_rois = sampled_boxes * im_scale
  672. # Faster RCNN blobs
  673. frcn_blobs = dict(
  674. rois=sampled_rois,
  675. labels_int32=sampled_labels,
  676. bbox_targets=bbox_targets,
  677. bbox_inside_weights=bbox_inside_weights,
  678. bbox_outside_weights=bbox_outside_weights)
  679. return frcn_blobs
  680. def _compute_targets(roi_boxes, gt_boxes, labels, bbox_reg_weights):
  681. assert roi_boxes.shape[0] == gt_boxes.shape[0]
  682. assert roi_boxes.shape[1] == 4
  683. assert gt_boxes.shape[1] == 4
  684. targets = np.zeros(roi_boxes.shape)
  685. bbox_reg_weights = np.asarray(bbox_reg_weights)
  686. targets = box_to_delta(
  687. ex_boxes=roi_boxes,
  688. gt_boxes=gt_boxes,
  689. weights=bbox_reg_weights)
  690. return np.hstack([labels[:, np.newaxis], targets]).astype(
  691. np.float32, copy=False)
  692. def _expand_bbox_targets(bbox_targets_input, class_nums,
  693. is_cls_agnostic):
  694. class_labels = bbox_targets_input[:, 0]
  695. fg_inds = np.where(class_labels > 0)[0]
  696. bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums
  697. if not is_cls_agnostic else 4 * 2))
  698. bbox_inside_weights = np.zeros(bbox_targets.shape)
  699. for ind in fg_inds:
  700. class_label = int(class_labels[
  701. ind]) if not is_cls_agnostic else 1
  702. start_ind = class_label * 4
  703. end_ind = class_label * 4 + 4
  704. bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind,
  705. 1:]
  706. bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0,
  707. 1.0)
  708. return bbox_targets, bbox_inside_weights
  709. def generate_func(
  710. rpn_rois,
  711. gt_classes,
  712. is_crowd,
  713. gt_boxes,
  714. im_info, ):
  715. rpn_rois_lod = rpn_rois.lod()[0]
  716. gt_classes_lod = gt_classes.lod()[0]
  717. # convert
  718. rpn_rois = np.array(rpn_rois)
  719. gt_classes = np.array(gt_classes)
  720. is_crowd = np.array(is_crowd)
  721. gt_boxes = np.array(gt_boxes)
  722. im_info = np.array(im_info)
  723. rois = []
  724. labels_int32 = []
  725. bbox_targets = []
  726. bbox_inside_weights = []
  727. bbox_outside_weights = []
  728. lod = [0]
  729. for idx in range(len(rpn_rois_lod) - 1):
  730. rois_si = rpn_rois_lod[idx]
  731. rois_ei = rpn_rois_lod[idx + 1]
  732. gt_si = gt_classes_lod[idx]
  733. gt_ei = gt_classes_lod[idx + 1]
  734. frcn_blobs = _sample_rois(
  735. rpn_rois[rois_si:rois_ei], gt_classes[gt_si:gt_ei],
  736. is_crowd[gt_si:gt_ei], gt_boxes[gt_si:gt_ei], im_info[idx],
  737. batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
  738. bg_thresh_lo, bbox_reg_weights, class_nums, use_random,
  739. is_cls_agnostic, is_cascade_rcnn)
  740. lod.append(frcn_blobs['rois'].shape[0] + lod[-1])
  741. rois.append(frcn_blobs['rois'])
  742. labels_int32.append(frcn_blobs['labels_int32'].reshape(-1, 1))
  743. bbox_targets.append(frcn_blobs['bbox_targets'])
  744. bbox_inside_weights.append(frcn_blobs['bbox_inside_weights'])
  745. bbox_outside_weights.append(frcn_blobs['bbox_outside_weights'])
  746. rois = np.vstack(rois)
  747. labels_int32 = np.vstack(labels_int32)
  748. bbox_targets = np.vstack(bbox_targets)
  749. bbox_inside_weights = np.vstack(bbox_inside_weights)
  750. bbox_outside_weights = np.vstack(bbox_outside_weights)
  751. # create lod-tensor for return
  752. # notice that the func create_lod_tensor does not work well here
  753. ret_rois = fluid.LoDTensor()
  754. ret_rois.set_lod([lod])
  755. ret_rois.set(rois.astype("float32"), fluid.CPUPlace())
  756. ret_labels_int32 = fluid.LoDTensor()
  757. ret_labels_int32.set_lod([lod])
  758. ret_labels_int32.set(
  759. labels_int32.astype("int32"), fluid.CPUPlace())
  760. ret_bbox_targets = fluid.LoDTensor()
  761. ret_bbox_targets.set_lod([lod])
  762. ret_bbox_targets.set(
  763. bbox_targets.astype("float32"), fluid.CPUPlace())
  764. ret_bbox_inside_weights = fluid.LoDTensor()
  765. ret_bbox_inside_weights.set_lod([lod])
  766. ret_bbox_inside_weights.set(
  767. bbox_inside_weights.astype("float32"), fluid.CPUPlace())
  768. ret_bbox_outside_weights = fluid.LoDTensor()
  769. ret_bbox_outside_weights.set_lod([lod])
  770. ret_bbox_outside_weights.set(
  771. bbox_outside_weights.astype("float32"), fluid.CPUPlace())
  772. return ret_rois, ret_labels_int32, ret_bbox_targets, ret_bbox_inside_weights, ret_bbox_outside_weights
  773. rois = create_tmp_var(
  774. fluid.default_main_program(),
  775. name=None, #'rois',
  776. dtype='float32',
  777. shape=[-1, 4], )
  778. bbox_inside_weights = create_tmp_var(
  779. fluid.default_main_program(),
  780. name=None, #'bbox_inside_weights',
  781. dtype='float32',
  782. shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
  783. bbox_outside_weights = create_tmp_var(
  784. fluid.default_main_program(),
  785. name=None, #'bbox_outside_weights',
  786. dtype='float32',
  787. shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
  788. bbox_targets = create_tmp_var(
  789. fluid.default_main_program(),
  790. name=None, #'bbox_targets',
  791. dtype='float32',
  792. shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
  793. labels_int32 = create_tmp_var(
  794. fluid.default_main_program(),
  795. name=None, #'labels_int32',
  796. dtype='int32',
  797. shape=[-1, 1], )
  798. outs = [
  799. rois, labels_int32, bbox_targets, bbox_inside_weights,
  800. bbox_outside_weights
  801. ]
  802. fluid.layers.py_func(
  803. func=generate_func,
  804. x=[rpn_rois, gt_classes, is_crowd, gt_boxes, im_info],
  805. out=outs)
  806. return outs
  807. class BBoxAssigner(object):
  808. def __init__(self,
  809. batch_size_per_im=512,
  810. fg_fraction=.25,
  811. fg_thresh=.5,
  812. bg_thresh_hi=.5,
  813. bg_thresh_lo=0.,
  814. bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
  815. num_classes=81,
  816. shuffle_before_sample=True):
  817. super(BBoxAssigner, self).__init__()
  818. self.batch_size_per_im = batch_size_per_im
  819. self.fg_fraction = fg_fraction
  820. self.fg_thresh = fg_thresh
  821. self.bg_thresh_hi = bg_thresh_hi
  822. self.bg_thresh_lo = bg_thresh_lo
  823. self.bbox_reg_weights = bbox_reg_weights
  824. self.class_nums = num_classes
  825. self.use_random = shuffle_before_sample
  826. def __call__(self, rpn_rois, gt_classes, is_crowd, gt_boxes, im_info):
  827. return fluid.layers.generate_proposal_labels(
  828. rpn_rois=rpn_rois,
  829. gt_classes=gt_classes,
  830. is_crowd=is_crowd,
  831. gt_boxes=gt_boxes,
  832. im_info=im_info,
  833. batch_size_per_im=self.batch_size_per_im,
  834. fg_fraction=self.fg_fraction,
  835. fg_thresh=self.fg_thresh,
  836. bg_thresh_hi=self.bg_thresh_hi,
  837. bg_thresh_lo=self.bg_thresh_lo,
  838. bbox_reg_weights=self.bbox_reg_weights,
  839. class_nums=self.class_nums,
  840. use_random=self.use_random)