rpn_head.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import numpy as np
  18. from paddle import fluid
  19. from paddle.fluid.param_attr import ParamAttr
  20. from paddle.fluid.initializer import Normal
  21. from paddle.fluid.regularizer import L2Decay
  22. from paddle.fluid.initializer import Constant
  23. __all__ = ['RPNHead', 'FPNRPNHead']
  24. class RPNHead(object):
  25. def __init__(
  26. self,
  27. #anchor_generator
  28. stride=[16.0, 16.0],
  29. anchor_sizes=[32, 64, 128, 256, 512],
  30. aspect_ratios=[0.5, 1., 2.],
  31. variance=[1., 1., 1., 1.],
  32. #rpn_target_assign
  33. rpn_batch_size_per_im=256,
  34. rpn_straddle_thresh=0.,
  35. rpn_fg_fraction=0.5,
  36. rpn_positive_overlap=0.7,
  37. rpn_negative_overlap=0.3,
  38. use_random=True,
  39. rpn_cls_loss='SigmoidCrossEntropy',
  40. rpn_focal_loss_gamma=2,
  41. rpn_focal_loss_alpha=0.25,
  42. #train_proposal
  43. train_pre_nms_top_n=12000,
  44. train_post_nms_top_n=2000,
  45. train_nms_thresh=.7,
  46. train_min_size=.0,
  47. train_eta=1.,
  48. #test_proposal
  49. test_pre_nms_top_n=6000,
  50. test_post_nms_top_n=1000,
  51. test_nms_thresh=.7,
  52. test_min_size=.0,
  53. test_eta=1.,
  54. #num_classes
  55. num_classes=1):
  56. super(RPNHead, self).__init__()
  57. self.stride = stride
  58. self.anchor_sizes = anchor_sizes
  59. self.aspect_ratios = aspect_ratios
  60. self.variance = variance
  61. self.rpn_batch_size_per_im = rpn_batch_size_per_im
  62. self.rpn_straddle_thresh = rpn_straddle_thresh
  63. self.rpn_fg_fraction = rpn_fg_fraction
  64. self.rpn_positive_overlap = rpn_positive_overlap
  65. self.rpn_negative_overlap = rpn_negative_overlap
  66. self.use_random = use_random
  67. self.train_pre_nms_top_n = train_pre_nms_top_n
  68. self.train_post_nms_top_n = train_post_nms_top_n
  69. self.train_nms_thresh = train_nms_thresh
  70. self.train_min_size = train_min_size
  71. self.train_eta = train_eta
  72. self.test_pre_nms_top_n = test_pre_nms_top_n
  73. self.test_post_nms_top_n = test_post_nms_top_n
  74. self.test_nms_thresh = test_nms_thresh
  75. self.test_min_size = test_min_size
  76. self.test_eta = test_eta
  77. self.num_classes = num_classes
  78. self.rpn_cls_loss = rpn_cls_loss
  79. self.rpn_focal_loss_gamma = rpn_focal_loss_gamma
  80. self.rpn_focal_loss_alpha = rpn_focal_loss_alpha
  81. def _get_output(self, input):
  82. """
  83. Get anchor and RPN head output.
  84. Args:
  85. input(Variable): feature map from backbone with shape of [N, C, H, W]
  86. Returns:
  87. rpn_cls_score(Variable): Output of rpn head with shape of
  88. [N, num_anchors, H, W].
  89. rpn_bbox_pred(Variable): Output of rpn head with shape of
  90. [N, num_anchors * 4, H, W].
  91. """
  92. dim_out = input.shape[1]
  93. rpn_conv = fluid.layers.conv2d(
  94. input=input,
  95. num_filters=dim_out,
  96. filter_size=3,
  97. stride=1,
  98. padding=1,
  99. act='relu',
  100. name='conv_rpn',
  101. param_attr=ParamAttr(
  102. name="conv_rpn_w", initializer=Normal(
  103. loc=0., scale=0.01)),
  104. bias_attr=ParamAttr(
  105. name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.)))
  106. # Generate anchors
  107. self.anchor, self.anchor_var = fluid.layers.anchor_generator(
  108. input=rpn_conv,
  109. stride=self.stride,
  110. anchor_sizes=self.anchor_sizes,
  111. aspect_ratios=self.aspect_ratios,
  112. variance=self.variance)
  113. num_anchor = self.anchor.shape[2]
  114. # Proposal classification scores
  115. if self.rpn_cls_loss == 'SigmoidCrossEntropy':
  116. bias_init = None
  117. elif self.rpn_cls_loss == 'SigmoidFocalLoss':
  118. value = float(-np.log((1 - 0.01) / 0.01))
  119. bias_init = Constant(value=value)
  120. self.rpn_cls_score = fluid.layers.conv2d(
  121. rpn_conv,
  122. num_filters=num_anchor * self.num_classes,
  123. filter_size=1,
  124. stride=1,
  125. padding=0,
  126. act=None,
  127. name='rpn_cls_score',
  128. param_attr=ParamAttr(
  129. name="rpn_cls_logits_w",
  130. initializer=Normal(
  131. loc=0., scale=0.01)),
  132. bias_attr=ParamAttr(
  133. name="rpn_cls_logits_b",
  134. initializer=bias_init,
  135. learning_rate=2.,
  136. regularizer=L2Decay(0.)))
  137. # Proposal bbox regression deltas
  138. self.rpn_bbox_pred = fluid.layers.conv2d(
  139. rpn_conv,
  140. num_filters=4 * num_anchor,
  141. filter_size=1,
  142. stride=1,
  143. padding=0,
  144. act=None,
  145. name='rpn_bbox_pred',
  146. param_attr=ParamAttr(
  147. name="rpn_bbox_pred_w", initializer=Normal(
  148. loc=0., scale=0.01)),
  149. bias_attr=ParamAttr(
  150. name="rpn_bbox_pred_b",
  151. learning_rate=2.,
  152. regularizer=L2Decay(0.)))
  153. return self.rpn_cls_score, self.rpn_bbox_pred
  154. def get_proposals(self, body_feats, im_info, mode='train'):
  155. """
  156. Get proposals according to the output of backbone.
  157. Args:
  158. body_feats (dict): The dictionary of feature maps from backbone.
  159. im_info(Variable): The information of image with shape [N, 3] with
  160. shape (height, width, scale).
  161. body_feat_names(list): A list of names of feature maps from
  162. backbone.
  163. Returns:
  164. rpn_rois(Variable): Output proposals with shape of (rois_num, 4).
  165. """
  166. # In RPN Heads, only the last feature map of backbone is used.
  167. # And body_feat_names[-1] represents the last level name of backbone.
  168. body_feat = list(body_feats.values())[-1]
  169. rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat)
  170. if self.num_classes == 1:
  171. rpn_cls_prob = fluid.layers.sigmoid(
  172. rpn_cls_score, name='rpn_cls_prob')
  173. else:
  174. rpn_cls_score = fluid.layers.transpose(
  175. rpn_cls_score, perm=[0, 2, 3, 1])
  176. rpn_cls_score = fluid.layers.reshape(
  177. rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes))
  178. rpn_cls_prob_tmp = fluid.layers.softmax(
  179. rpn_cls_score, use_cudnn=False, name='rpn_cls_prob')
  180. rpn_cls_prob_slice = fluid.layers.slice(
  181. rpn_cls_prob_tmp,
  182. axes=[4],
  183. starts=[1],
  184. ends=[self.num_classes])
  185. rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1)
  186. rpn_cls_prob = fluid.layers.reshape(
  187. rpn_cls_prob, shape=(0, 0, 0, -1))
  188. rpn_cls_prob = fluid.layers.transpose(
  189. rpn_cls_prob, perm=[0, 3, 1, 2])
  190. if mode == 'train':
  191. rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals(
  192. scores=rpn_cls_prob,
  193. bbox_deltas=rpn_bbox_pred,
  194. im_info=im_info,
  195. anchors=self.anchor,
  196. variances=self.anchor_var,
  197. pre_nms_top_n=self.train_pre_nms_top_n,
  198. post_nms_top_n=self.train_post_nms_top_n,
  199. nms_thresh=self.train_nms_thresh,
  200. min_size=self.train_min_size,
  201. eta=self.train_eta)
  202. else:
  203. rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals(
  204. scores=rpn_cls_prob,
  205. bbox_deltas=rpn_bbox_pred,
  206. im_info=im_info,
  207. anchors=self.anchor,
  208. variances=self.anchor_var,
  209. pre_nms_top_n=self.test_pre_nms_top_n,
  210. post_nms_top_n=self.test_post_nms_top_n,
  211. nms_thresh=self.test_nms_thresh,
  212. min_size=self.test_min_size,
  213. eta=self.test_eta)
  214. return rpn_rois
  215. def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor,
  216. anchor_var):
  217. rpn_cls_score = fluid.layers.transpose(
  218. rpn_cls_score, perm=[0, 2, 3, 1])
  219. rpn_bbox_pred = fluid.layers.transpose(
  220. rpn_bbox_pred, perm=[0, 2, 3, 1])
  221. anchor = fluid.layers.reshape(anchor, shape=(-1, 4))
  222. anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4))
  223. rpn_cls_score = fluid.layers.reshape(
  224. x=rpn_cls_score, shape=(0, -1, self.num_classes))
  225. rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4))
  226. return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var
  227. def _get_loss_input(self):
  228. for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']:
  229. if not getattr(self, attr, None):
  230. raise ValueError("self.{} should not be None,".format(attr),
  231. "call RPNHead.get_proposals first")
  232. return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred,
  233. self.anchor, self.anchor_var)
  234. def get_loss(self, im_info, gt_box, is_crowd, gt_label=None):
  235. """
  236. Sample proposals and Calculate rpn loss.
  237. Args:
  238. im_info(Variable): The information of image with shape [N, 3] with
  239. shape (height, width, scale).
  240. gt_box(Variable): The ground-truth bounding boxes with shape [M, 4].
  241. M is the number of groundtruth.
  242. is_crowd(Variable): Indicates groud-truth is crowd or not with
  243. shape [M, 1]. M is the number of groundtruth.
  244. Returns:
  245. Type: dict
  246. rpn_cls_loss(Variable): RPN classification loss.
  247. rpn_bbox_loss(Variable): RPN bounding box regression loss.
  248. """
  249. rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input()
  250. if self.num_classes == 1:
  251. score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
  252. fluid.layers.rpn_target_assign(
  253. bbox_pred=rpn_bbox,
  254. cls_logits=rpn_cls,
  255. anchor_box=anchor,
  256. anchor_var=anchor_var,
  257. gt_boxes=gt_box,
  258. is_crowd=is_crowd,
  259. im_info=im_info,
  260. rpn_batch_size_per_im=self.rpn_batch_size_per_im,
  261. rpn_straddle_thresh=self.rpn_straddle_thresh,
  262. rpn_fg_fraction=self.rpn_fg_fraction,
  263. rpn_positive_overlap=self.rpn_positive_overlap,
  264. rpn_negative_overlap=self.rpn_negative_overlap,
  265. use_random=self.use_random)
  266. score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
  267. score_tgt.stop_gradient = True
  268. if self.rpn_cls_loss == 'SigmoidCrossEntropy':
  269. rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(
  270. x=score_pred, label=score_tgt)
  271. elif self.rpn_cls_loss == 'SigmoidFocalLoss':
  272. data = fluid.layers.fill_constant(
  273. shape=[1], value=1, dtype='int32')
  274. fg_label = fluid.layers.greater_equal(score_tgt, data)
  275. fg_label = fluid.layers.cast(fg_label, dtype='int32')
  276. fg_num = fluid.layers.reduce_sum(fg_label)
  277. fg_num.stop_gradient = True
  278. loss = fluid.layers.sigmoid_cross_entropy_with_logits(
  279. x=score_pred, label=score_tgt)
  280. pred = fluid.layers.sigmoid(score_pred)
  281. p_t = pred * score_tgt + (1 - pred) * (1 - score_tgt)
  282. if self.rpn_focal_loss_alpha is not None:
  283. alpha_t = self.rpn_focal_loss_alpha * score_tgt + (
  284. 1 - self.rpn_focal_loss_alpha) * (1 - score_tgt)
  285. loss = alpha_t * loss
  286. gamma_t = fluid.layers.pow((1 - p_t),
  287. self.rpn_focal_loss_gamma)
  288. loss = gamma_t * loss
  289. rpn_cls_loss = loss / fg_num
  290. else:
  291. score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
  292. fluid.layers.rpn_target_assign(
  293. bbox_pred=rpn_bbox,
  294. cls_logits=rpn_cls,
  295. anchor_box=anchor,
  296. anchor_var=anchor_var,
  297. gt_boxes=gt_box,
  298. gt_labels=gt_label,
  299. is_crowd=is_crowd,
  300. num_classes=self.num_classes,
  301. im_info=im_info,
  302. rpn_batch_size_per_im=self.rpn_batch_size_per_im,
  303. rpn_straddle_thresh=self.rpn_straddle_thresh,
  304. rpn_fg_fraction=self.rpn_fg_fraction,
  305. rpn_positive_overlap=self.rpn_positive_overlap,
  306. rpn_negative_overlap=self.rpn_negative_overlap,
  307. use_random=self.use_random)
  308. labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64')
  309. labels_int64.stop_gradient = True
  310. rpn_cls_loss = fluid.layers.softmax_with_cross_entropy(
  311. logits=score_pred,
  312. label=labels_int64,
  313. numeric_stable_mode=True)
  314. if self.rpn_cls_loss == 'SigmoidCrossEntropy':
  315. rpn_cls_loss = fluid.layers.reduce_mean(
  316. rpn_cls_loss, name='loss_rpn_cls')
  317. elif self.rpn_cls_loss == 'SigmoidFocalLoss':
  318. rpn_cls_loss = fluid.layers.reduce_sum(
  319. rpn_cls_loss, name='loss_rpn_cls')
  320. loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32')
  321. loc_tgt.stop_gradient = True
  322. rpn_reg_loss = fluid.layers.smooth_l1(
  323. x=loc_pred,
  324. y=loc_tgt,
  325. sigma=3.0,
  326. inside_weight=bbox_weight,
  327. outside_weight=bbox_weight)
  328. rpn_reg_loss = fluid.layers.reduce_sum(
  329. rpn_reg_loss, name='loss_rpn_bbox')
  330. if self.rpn_cls_loss == 'SigmoidCrossEntropy':
  331. score_shape = fluid.layers.shape(score_tgt)
  332. score_shape = fluid.layers.cast(x=score_shape, dtype='float32')
  333. norm = fluid.layers.reduce_prod(score_shape)
  334. norm.stop_gradient = True
  335. rpn_reg_loss = rpn_reg_loss / norm
  336. elif self.rpn_cls_loss == 'SigmoidFocalLoss':
  337. rpn_reg_loss = rpn_reg_loss / fluid.layers.cast(fg_num,
  338. rpn_reg_loss.dtype)
  339. return {'loss_rpn_cls': rpn_cls_loss, 'loss_rpn_bbox': rpn_reg_loss}
  340. class FPNRPNHead(RPNHead):
  341. def __init__(
  342. self,
  343. anchor_start_size=32,
  344. aspect_ratios=[0.5, 1., 2.],
  345. variance=[1., 1., 1., 1.],
  346. num_chan=256,
  347. min_level=2,
  348. max_level=6,
  349. #rpn_target_assign
  350. rpn_batch_size_per_im=256,
  351. rpn_straddle_thresh=0.,
  352. rpn_fg_fraction=0.5,
  353. rpn_positive_overlap=0.7,
  354. rpn_negative_overlap=0.3,
  355. use_random=True,
  356. rpn_cls_loss='SigmoidCrossEntropy',
  357. rpn_focal_loss_gamma=2,
  358. rpn_focal_loss_alpha=0.25,
  359. #train_proposal
  360. train_pre_nms_top_n=2000,
  361. train_post_nms_top_n=2000,
  362. train_nms_thresh=.7,
  363. train_min_size=.0,
  364. train_eta=1.,
  365. #test_proposal
  366. test_pre_nms_top_n=1000,
  367. test_post_nms_top_n=1000,
  368. test_nms_thresh=.7,
  369. test_min_size=.0,
  370. test_eta=1.,
  371. #num_classes
  372. num_classes=1):
  373. super(FPNRPNHead, self).__init__(
  374. aspect_ratios=aspect_ratios,
  375. variance=variance,
  376. rpn_batch_size_per_im=rpn_batch_size_per_im,
  377. rpn_straddle_thresh=rpn_straddle_thresh,
  378. rpn_fg_fraction=rpn_fg_fraction,
  379. rpn_positive_overlap=rpn_positive_overlap,
  380. rpn_negative_overlap=rpn_negative_overlap,
  381. use_random=use_random,
  382. train_pre_nms_top_n=train_pre_nms_top_n,
  383. train_post_nms_top_n=train_post_nms_top_n,
  384. train_nms_thresh=train_nms_thresh,
  385. train_min_size=train_min_size,
  386. train_eta=train_eta,
  387. test_pre_nms_top_n=test_pre_nms_top_n,
  388. test_post_nms_top_n=test_post_nms_top_n,
  389. test_nms_thresh=test_nms_thresh,
  390. test_min_size=test_min_size,
  391. test_eta=test_eta,
  392. num_classes=num_classes,
  393. rpn_cls_loss=rpn_cls_loss,
  394. rpn_focal_loss_gamma=rpn_focal_loss_gamma,
  395. rpn_focal_loss_alpha=rpn_focal_loss_alpha)
  396. self.anchor_start_size = anchor_start_size
  397. self.num_chan = num_chan
  398. self.min_level = min_level
  399. self.max_level = max_level
  400. self.num_classes = num_classes
  401. self.fpn_rpn_list = []
  402. self.anchors_list = []
  403. self.anchor_var_list = []
  404. def _get_output(self, input, feat_lvl):
  405. """
  406. Get anchor and FPN RPN head output at one level.
  407. Args:
  408. input(Variable): Body feature from backbone.
  409. feat_lvl(int): Indicate the level of rpn output corresponding
  410. to the level of feature map.
  411. Return:
  412. rpn_cls_score(Variable): Output of one level of fpn rpn head with
  413. shape of [N, num_anchors, H, W].
  414. rpn_bbox_pred(Variable): Output of one level of fpn rpn head with
  415. shape of [N, num_anchors * 4, H, W].
  416. """
  417. slvl = str(feat_lvl)
  418. conv_name = 'conv_rpn_fpn' + slvl
  419. cls_name = 'rpn_cls_logits_fpn' + slvl
  420. bbox_name = 'rpn_bbox_pred_fpn' + slvl
  421. conv_share_name = 'conv_rpn_fpn' + str(self.min_level)
  422. cls_share_name = 'rpn_cls_logits_fpn' + str(self.min_level)
  423. bbox_share_name = 'rpn_bbox_pred_fpn' + str(self.min_level)
  424. num_anchors = len(self.aspect_ratios)
  425. conv_rpn_fpn = fluid.layers.conv2d(
  426. input=input,
  427. num_filters=self.num_chan,
  428. filter_size=3,
  429. padding=1,
  430. act='relu',
  431. name=conv_name,
  432. param_attr=ParamAttr(
  433. name=conv_share_name + '_w',
  434. initializer=Normal(
  435. loc=0., scale=0.01)),
  436. bias_attr=ParamAttr(
  437. name=conv_share_name + '_b',
  438. learning_rate=2.,
  439. regularizer=L2Decay(0.)))
  440. self.anchors, self.anchor_var = fluid.layers.anchor_generator(
  441. input=conv_rpn_fpn,
  442. anchor_sizes=(self.anchor_start_size * 2.
  443. **(feat_lvl - self.min_level), ),
  444. stride=(2.**feat_lvl, 2.**feat_lvl),
  445. aspect_ratios=self.aspect_ratios,
  446. variance=self.variance)
  447. cls_num_filters = num_anchors * self.num_classes
  448. if self.rpn_cls_loss == 'SigmoidCrossEntropy':
  449. bias_init = None
  450. elif self.rpn_cls_loss == 'SigmoidFocalLoss':
  451. value = float(-np.log((1 - 0.01) / 0.01))
  452. bias_init = Constant(value=value)
  453. self.rpn_cls_score = fluid.layers.conv2d(
  454. input=conv_rpn_fpn,
  455. num_filters=cls_num_filters,
  456. filter_size=1,
  457. act=None,
  458. name=cls_name,
  459. param_attr=ParamAttr(
  460. name=cls_share_name + '_w',
  461. initializer=Normal(
  462. loc=0., scale=0.01)),
  463. bias_attr=ParamAttr(
  464. name=cls_share_name + '_b',
  465. initializer=bias_init,
  466. learning_rate=2.,
  467. regularizer=L2Decay(0.)))
  468. self.rpn_bbox_pred = fluid.layers.conv2d(
  469. input=conv_rpn_fpn,
  470. num_filters=num_anchors * 4,
  471. filter_size=1,
  472. act=None,
  473. name=bbox_name,
  474. param_attr=ParamAttr(
  475. name=bbox_share_name + '_w',
  476. initializer=Normal(
  477. loc=0., scale=0.01)),
  478. bias_attr=ParamAttr(
  479. name=bbox_share_name + '_b',
  480. learning_rate=2.,
  481. regularizer=L2Decay(0.)))
  482. return self.rpn_cls_score, self.rpn_bbox_pred
  483. def _get_single_proposals(self, body_feat, im_info, feat_lvl,
  484. mode='train'):
  485. """
  486. Get proposals in one level according to the output of fpn rpn head
  487. Args:
  488. body_feat(Variable): the feature map from backone.
  489. im_info(Variable): The information of image with shape [N, 3] with
  490. format (height, width, scale).
  491. feat_lvl(int): Indicate the level of proposals corresponding to
  492. the feature maps.
  493. Returns:
  494. rpn_rois_fpn(Variable): Output proposals with shape of (rois_num, 4).
  495. rpn_roi_probs_fpn(Variable): Scores of proposals with
  496. shape of (rois_num, 1).
  497. """
  498. rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output(body_feat,
  499. feat_lvl)
  500. if self.num_classes == 1:
  501. rpn_cls_prob_fpn = fluid.layers.sigmoid(
  502. rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl))
  503. else:
  504. rpn_cls_score_fpn = fluid.layers.transpose(
  505. rpn_cls_score_fpn, perm=[0, 2, 3, 1])
  506. rpn_cls_score_fpn = fluid.layers.reshape(
  507. rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes))
  508. rpn_cls_prob_fpn = fluid.layers.softmax(
  509. rpn_cls_score_fpn,
  510. use_cudnn=False,
  511. name='rpn_cls_prob_fpn' + str(feat_lvl))
  512. rpn_cls_prob_fpn = fluid.layers.slice(
  513. rpn_cls_prob_fpn,
  514. axes=[4],
  515. starts=[1],
  516. ends=[self.num_classes])
  517. rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1)
  518. rpn_cls_prob_fpn = fluid.layers.reshape(
  519. rpn_cls_prob_fpn, shape=(0, 0, 0, -1))
  520. rpn_cls_prob_fpn = fluid.layers.transpose(
  521. rpn_cls_prob_fpn, perm=[0, 3, 1, 2])
  522. if mode == 'train':
  523. rpn_rois_fpn, rpn_roi_prob_fpn = fluid.layers.generate_proposals(
  524. scores=rpn_cls_prob_fpn,
  525. bbox_deltas=rpn_bbox_pred_fpn,
  526. im_info=im_info,
  527. anchors=self.anchors,
  528. variances=self.anchor_var,
  529. pre_nms_top_n=self.train_pre_nms_top_n,
  530. post_nms_top_n=self.train_post_nms_top_n,
  531. nms_thresh=self.train_nms_thresh,
  532. min_size=self.train_min_size,
  533. eta=self.train_eta)
  534. else:
  535. rpn_rois_fpn, rpn_roi_prob_fpn = fluid.layers.generate_proposals(
  536. scores=rpn_cls_prob_fpn,
  537. bbox_deltas=rpn_bbox_pred_fpn,
  538. im_info=im_info,
  539. anchors=self.anchors,
  540. variances=self.anchor_var,
  541. pre_nms_top_n=self.test_pre_nms_top_n,
  542. post_nms_top_n=self.test_post_nms_top_n,
  543. nms_thresh=self.test_nms_thresh,
  544. min_size=self.test_min_size,
  545. eta=self.test_eta)
  546. return rpn_rois_fpn, rpn_roi_prob_fpn
  547. def get_proposals(self, fpn_feats, im_info, mode='train'):
  548. """
  549. Get proposals in multiple levels according to the output of fpn
  550. rpn head
  551. Args:
  552. fpn_feats(dict): A dictionary represents the output feature map
  553. of FPN with their name.
  554. im_info(Variable): The information of image with shape [N, 3] with
  555. format (height, width, scale).
  556. Return:
  557. rois_list(Variable): Output proposals in shape of [rois_num, 4]
  558. """
  559. rois_list = []
  560. roi_probs_list = []
  561. fpn_feat_names = list(fpn_feats.keys())
  562. for lvl in range(self.min_level, self.max_level + 1):
  563. fpn_feat_name = fpn_feat_names[self.max_level - lvl]
  564. fpn_feat = fpn_feats[fpn_feat_name]
  565. rois_fpn, roi_probs_fpn = self._get_single_proposals(
  566. fpn_feat, im_info, lvl, mode)
  567. self.fpn_rpn_list.append((self.rpn_cls_score, self.rpn_bbox_pred))
  568. rois_list.append(rois_fpn)
  569. roi_probs_list.append(roi_probs_fpn)
  570. self.anchors_list.append(self.anchors)
  571. self.anchor_var_list.append(self.anchor_var)
  572. post_nms_top_n = self.train_post_nms_top_n if mode == 'train' else \
  573. self.test_post_nms_top_n
  574. rois_collect = fluid.layers.collect_fpn_proposals(
  575. rois_list,
  576. roi_probs_list,
  577. self.min_level,
  578. self.max_level,
  579. post_nms_top_n,
  580. name='collect')
  581. return rois_collect
  582. def _get_loss_input(self):
  583. rpn_clses = []
  584. rpn_bboxes = []
  585. anchors = []
  586. anchor_vars = []
  587. for i in range(len(self.fpn_rpn_list)):
  588. single_input = self._transform_input(
  589. self.fpn_rpn_list[i][0], self.fpn_rpn_list[i][1],
  590. self.anchors_list[i], self.anchor_var_list[i])
  591. rpn_clses.append(single_input[0])
  592. rpn_bboxes.append(single_input[1])
  593. anchors.append(single_input[2])
  594. anchor_vars.append(single_input[3])
  595. rpn_cls = fluid.layers.concat(rpn_clses, axis=1)
  596. rpn_bbox = fluid.layers.concat(rpn_bboxes, axis=1)
  597. anchors = fluid.layers.concat(anchors)
  598. anchor_var = fluid.layers.concat(anchor_vars)
  599. return rpn_cls, rpn_bbox, anchors, anchor_var