rpn_head.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. from paddle import fluid
  18. from paddle.fluid.param_attr import ParamAttr
  19. from paddle.fluid.initializer import Normal
  20. from paddle.fluid.regularizer import L2Decay
  21. __all__ = ['RPNHead', 'FPNRPNHead']
  22. class RPNHead(object):
  23. def __init__(
  24. self,
  25. #anchor_generator
  26. stride=[16.0, 16.0],
  27. anchor_sizes=[32, 64, 128, 256, 512],
  28. aspect_ratios=[0.5, 1., 2.],
  29. variance=[1., 1., 1., 1.],
  30. #rpn_target_assign
  31. rpn_batch_size_per_im=256,
  32. rpn_straddle_thresh=0.,
  33. rpn_fg_fraction=0.5,
  34. rpn_positive_overlap=0.7,
  35. rpn_negative_overlap=0.3,
  36. use_random=True,
  37. #train_proposal
  38. train_pre_nms_top_n=12000,
  39. train_post_nms_top_n=2000,
  40. train_nms_thresh=.7,
  41. train_min_size=.0,
  42. train_eta=1.,
  43. #test_proposal
  44. test_pre_nms_top_n=6000,
  45. test_post_nms_top_n=1000,
  46. test_nms_thresh=.7,
  47. test_min_size=.0,
  48. test_eta=1.,
  49. #num_classes
  50. num_classes=1):
  51. super(RPNHead, self).__init__()
  52. self.stride = stride
  53. self.anchor_sizes = anchor_sizes
  54. self.aspect_ratios = aspect_ratios
  55. self.variance = variance
  56. self.rpn_batch_size_per_im = rpn_batch_size_per_im
  57. self.rpn_straddle_thresh = rpn_straddle_thresh
  58. self.rpn_fg_fraction = rpn_fg_fraction
  59. self.rpn_positive_overlap = rpn_positive_overlap
  60. self.rpn_negative_overlap = rpn_negative_overlap
  61. self.use_random = use_random
  62. self.train_pre_nms_top_n = train_pre_nms_top_n
  63. self.train_post_nms_top_n = train_post_nms_top_n
  64. self.train_nms_thresh = train_nms_thresh
  65. self.train_min_size = train_min_size
  66. self.train_eta = train_eta
  67. self.test_pre_nms_top_n = test_pre_nms_top_n
  68. self.test_post_nms_top_n = test_post_nms_top_n
  69. self.test_nms_thresh = test_nms_thresh
  70. self.test_min_size = test_min_size
  71. self.test_eta = test_eta
  72. self.num_classes = num_classes
  73. def _get_output(self, input):
  74. """
  75. Get anchor and RPN head output.
  76. Args:
  77. input(Variable): feature map from backbone with shape of [N, C, H, W]
  78. Returns:
  79. rpn_cls_score(Variable): Output of rpn head with shape of
  80. [N, num_anchors, H, W].
  81. rpn_bbox_pred(Variable): Output of rpn head with shape of
  82. [N, num_anchors * 4, H, W].
  83. """
  84. dim_out = input.shape[1]
  85. rpn_conv = fluid.layers.conv2d(
  86. input=input,
  87. num_filters=dim_out,
  88. filter_size=3,
  89. stride=1,
  90. padding=1,
  91. act='relu',
  92. name='conv_rpn',
  93. param_attr=ParamAttr(
  94. name="conv_rpn_w", initializer=Normal(loc=0., scale=0.01)),
  95. bias_attr=ParamAttr(
  96. name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.)))
  97. # Generate anchors
  98. self.anchor, self.anchor_var = fluid.layers.anchor_generator(
  99. input=rpn_conv,
  100. stride=self.stride,
  101. anchor_sizes=self.anchor_sizes,
  102. aspect_ratios=self.aspect_ratios,
  103. variance=self.variance)
  104. num_anchor = self.anchor.shape[2]
  105. # Proposal classification scores
  106. self.rpn_cls_score = fluid.layers.conv2d(
  107. rpn_conv,
  108. num_filters=num_anchor * self.num_classes,
  109. filter_size=1,
  110. stride=1,
  111. padding=0,
  112. act=None,
  113. name='rpn_cls_score',
  114. param_attr=ParamAttr(
  115. name="rpn_cls_logits_w",
  116. initializer=Normal(loc=0., scale=0.01)),
  117. bias_attr=ParamAttr(
  118. name="rpn_cls_logits_b",
  119. learning_rate=2.,
  120. regularizer=L2Decay(0.)))
  121. # Proposal bbox regression deltas
  122. self.rpn_bbox_pred = fluid.layers.conv2d(
  123. rpn_conv,
  124. num_filters=4 * num_anchor,
  125. filter_size=1,
  126. stride=1,
  127. padding=0,
  128. act=None,
  129. name='rpn_bbox_pred',
  130. param_attr=ParamAttr(
  131. name="rpn_bbox_pred_w", initializer=Normal(loc=0.,
  132. scale=0.01)),
  133. bias_attr=ParamAttr(
  134. name="rpn_bbox_pred_b",
  135. learning_rate=2.,
  136. regularizer=L2Decay(0.)))
  137. return self.rpn_cls_score, self.rpn_bbox_pred
  138. def get_proposals(self, body_feats, im_info, mode='train'):
  139. """
  140. Get proposals according to the output of backbone.
  141. Args:
  142. body_feats (dict): The dictionary of feature maps from backbone.
  143. im_info(Variable): The information of image with shape [N, 3] with
  144. shape (height, width, scale).
  145. body_feat_names(list): A list of names of feature maps from
  146. backbone.
  147. Returns:
  148. rpn_rois(Variable): Output proposals with shape of (rois_num, 4).
  149. """
  150. # In RPN Heads, only the last feature map of backbone is used.
  151. # And body_feat_names[-1] represents the last level name of backbone.
  152. body_feat = list(body_feats.values())[-1]
  153. rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat)
  154. if self.num_classes == 1:
  155. rpn_cls_prob = fluid.layers.sigmoid(
  156. rpn_cls_score, name='rpn_cls_prob')
  157. else:
  158. rpn_cls_score = fluid.layers.transpose(
  159. rpn_cls_score, perm=[0, 2, 3, 1])
  160. rpn_cls_score = fluid.layers.reshape(
  161. rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes))
  162. rpn_cls_prob_tmp = fluid.layers.softmax(
  163. rpn_cls_score, use_cudnn=False, name='rpn_cls_prob')
  164. rpn_cls_prob_slice = fluid.layers.slice(
  165. rpn_cls_prob_tmp,
  166. axes=[4],
  167. starts=[1],
  168. ends=[self.num_classes])
  169. rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1)
  170. rpn_cls_prob = fluid.layers.reshape(
  171. rpn_cls_prob, shape=(0, 0, 0, -1))
  172. rpn_cls_prob = fluid.layers.transpose(
  173. rpn_cls_prob, perm=[0, 3, 1, 2])
  174. if mode == 'train':
  175. rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals(
  176. scores=rpn_cls_prob,
  177. bbox_deltas=rpn_bbox_pred,
  178. im_info=im_info,
  179. anchors=self.anchor,
  180. variances=self.anchor_var,
  181. pre_nms_top_n=self.train_pre_nms_top_n,
  182. post_nms_top_n=self.train_post_nms_top_n,
  183. nms_thresh=self.train_nms_thresh,
  184. min_size=self.train_min_size,
  185. eta=self.train_eta)
  186. else:
  187. rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals(
  188. scores=rpn_cls_prob,
  189. bbox_deltas=rpn_bbox_pred,
  190. im_info=im_info,
  191. anchors=self.anchor,
  192. variances=self.anchor_var,
  193. pre_nms_top_n=self.test_pre_nms_top_n,
  194. post_nms_top_n=self.test_post_nms_top_n,
  195. nms_thresh=self.test_nms_thresh,
  196. min_size=self.test_min_size,
  197. eta=self.test_eta)
  198. return rpn_rois
  199. def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor,
  200. anchor_var):
  201. rpn_cls_score = fluid.layers.transpose(
  202. rpn_cls_score, perm=[0, 2, 3, 1])
  203. rpn_bbox_pred = fluid.layers.transpose(
  204. rpn_bbox_pred, perm=[0, 2, 3, 1])
  205. anchor = fluid.layers.reshape(anchor, shape=(-1, 4))
  206. anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4))
  207. rpn_cls_score = fluid.layers.reshape(
  208. x=rpn_cls_score, shape=(0, -1, self.num_classes))
  209. rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4))
  210. return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var
  211. def _get_loss_input(self):
  212. for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']:
  213. if not getattr(self, attr, None):
  214. raise ValueError("self.{} should not be None,".format(attr),
  215. "call RPNHead.get_proposals first")
  216. return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred,
  217. self.anchor, self.anchor_var)
  218. def get_loss(self, im_info, gt_box, is_crowd, gt_label=None):
  219. """
  220. Sample proposals and Calculate rpn loss.
  221. Args:
  222. im_info(Variable): The information of image with shape [N, 3] with
  223. shape (height, width, scale).
  224. gt_box(Variable): The ground-truth bounding boxes with shape [M, 4].
  225. M is the number of groundtruth.
  226. is_crowd(Variable): Indicates groud-truth is crowd or not with
  227. shape [M, 1]. M is the number of groundtruth.
  228. Returns:
  229. Type: dict
  230. rpn_cls_loss(Variable): RPN classification loss.
  231. rpn_bbox_loss(Variable): RPN bounding box regression loss.
  232. """
  233. rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input()
  234. if self.num_classes == 1:
  235. score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
  236. fluid.layers.rpn_target_assign(
  237. bbox_pred=rpn_bbox,
  238. cls_logits=rpn_cls,
  239. anchor_box=anchor,
  240. anchor_var=anchor_var,
  241. gt_boxes=gt_box,
  242. is_crowd=is_crowd,
  243. im_info=im_info,
  244. rpn_batch_size_per_im=self.rpn_batch_size_per_im,
  245. rpn_straddle_thresh=self.rpn_straddle_thresh,
  246. rpn_fg_fraction=self.rpn_fg_fraction,
  247. rpn_positive_overlap=self.rpn_positive_overlap,
  248. rpn_negative_overlap=self.rpn_negative_overlap,
  249. use_random=self.use_random)
  250. score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
  251. score_tgt.stop_gradient = True
  252. rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(
  253. x=score_pred, label=score_tgt)
  254. else:
  255. score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
  256. fluid.layers.rpn_target_assign(
  257. bbox_pred=rpn_bbox,
  258. cls_logits=rpn_cls,
  259. anchor_box=anchor,
  260. anchor_var=anchor_var,
  261. gt_boxes=gt_box,
  262. gt_labels=gt_label,
  263. is_crowd=is_crowd,
  264. num_classes=self.num_classes,
  265. im_info=im_info,
  266. rpn_batch_size_per_im=self.rpn_batch_size_per_im,
  267. rpn_straddle_thresh=self.rpn_straddle_thresh,
  268. rpn_fg_fraction=self.rpn_fg_fraction,
  269. rpn_positive_overlap=self.rpn_positive_overlap,
  270. rpn_negative_overlap=self.rpn_negative_overlap,
  271. use_random=self.use_random)
  272. labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64')
  273. labels_int64.stop_gradient = True
  274. rpn_cls_loss = fluid.layers.softmax_with_cross_entropy(
  275. logits=score_pred,
  276. label=labels_int64,
  277. numeric_stable_mode=True)
  278. rpn_cls_loss = fluid.layers.reduce_mean(
  279. rpn_cls_loss, name='loss_rpn_cls')
  280. loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32')
  281. loc_tgt.stop_gradient = True
  282. rpn_reg_loss = fluid.layers.smooth_l1(
  283. x=loc_pred,
  284. y=loc_tgt,
  285. sigma=3.0,
  286. inside_weight=bbox_weight,
  287. outside_weight=bbox_weight)
  288. rpn_reg_loss = fluid.layers.reduce_sum(
  289. rpn_reg_loss, name='loss_rpn_bbox')
  290. score_shape = fluid.layers.shape(score_tgt)
  291. score_shape = fluid.layers.cast(x=score_shape, dtype='float32')
  292. norm = fluid.layers.reduce_prod(score_shape)
  293. norm.stop_gradient = True
  294. rpn_reg_loss = rpn_reg_loss / norm
  295. return {'loss_rpn_cls': rpn_cls_loss, 'loss_rpn_bbox': rpn_reg_loss}
  296. class FPNRPNHead(RPNHead):
  297. def __init__(
  298. self,
  299. anchor_start_size=32,
  300. aspect_ratios=[0.5, 1., 2.],
  301. variance=[1., 1., 1., 1.],
  302. num_chan=256,
  303. min_level=2,
  304. max_level=6,
  305. #rpn_target_assign
  306. rpn_batch_size_per_im=256,
  307. rpn_straddle_thresh=0.,
  308. rpn_fg_fraction=0.5,
  309. rpn_positive_overlap=0.7,
  310. rpn_negative_overlap=0.3,
  311. use_random=True,
  312. #train_proposal
  313. train_pre_nms_top_n=2000,
  314. train_post_nms_top_n=2000,
  315. train_nms_thresh=.7,
  316. train_min_size=.0,
  317. train_eta=1.,
  318. #test_proposal
  319. test_pre_nms_top_n=1000,
  320. test_post_nms_top_n=1000,
  321. test_nms_thresh=.7,
  322. test_min_size=.0,
  323. test_eta=1.,
  324. #num_classes
  325. num_classes=1):
  326. super(FPNRPNHead, self).__init__(
  327. aspect_ratios=aspect_ratios,
  328. variance=variance,
  329. rpn_batch_size_per_im=rpn_batch_size_per_im,
  330. rpn_straddle_thresh=rpn_straddle_thresh,
  331. rpn_fg_fraction=rpn_fg_fraction,
  332. rpn_positive_overlap=rpn_positive_overlap,
  333. rpn_negative_overlap=rpn_negative_overlap,
  334. use_random=use_random,
  335. train_pre_nms_top_n=train_pre_nms_top_n,
  336. train_post_nms_top_n=train_post_nms_top_n,
  337. train_nms_thresh=train_nms_thresh,
  338. train_min_size=train_min_size,
  339. train_eta=train_eta,
  340. test_pre_nms_top_n=test_pre_nms_top_n,
  341. test_post_nms_top_n=test_post_nms_top_n,
  342. test_nms_thresh=test_nms_thresh,
  343. test_min_size=test_min_size,
  344. test_eta=test_eta,
  345. num_classes=num_classes)
  346. self.anchor_start_size = anchor_start_size
  347. self.num_chan = num_chan
  348. self.min_level = min_level
  349. self.max_level = max_level
  350. self.num_classes = num_classes
  351. self.fpn_rpn_list = []
  352. self.anchors_list = []
  353. self.anchor_var_list = []
  354. def _get_output(self, input, feat_lvl):
  355. """
  356. Get anchor and FPN RPN head output at one level.
  357. Args:
  358. input(Variable): Body feature from backbone.
  359. feat_lvl(int): Indicate the level of rpn output corresponding
  360. to the level of feature map.
  361. Return:
  362. rpn_cls_score(Variable): Output of one level of fpn rpn head with
  363. shape of [N, num_anchors, H, W].
  364. rpn_bbox_pred(Variable): Output of one level of fpn rpn head with
  365. shape of [N, num_anchors * 4, H, W].
  366. """
  367. slvl = str(feat_lvl)
  368. conv_name = 'conv_rpn_fpn' + slvl
  369. cls_name = 'rpn_cls_logits_fpn' + slvl
  370. bbox_name = 'rpn_bbox_pred_fpn' + slvl
  371. conv_share_name = 'conv_rpn_fpn' + str(self.min_level)
  372. cls_share_name = 'rpn_cls_logits_fpn' + str(self.min_level)
  373. bbox_share_name = 'rpn_bbox_pred_fpn' + str(self.min_level)
  374. num_anchors = len(self.aspect_ratios)
  375. conv_rpn_fpn = fluid.layers.conv2d(
  376. input=input,
  377. num_filters=self.num_chan,
  378. filter_size=3,
  379. padding=1,
  380. act='relu',
  381. name=conv_name,
  382. param_attr=ParamAttr(
  383. name=conv_share_name + '_w',
  384. initializer=Normal(loc=0., scale=0.01)),
  385. bias_attr=ParamAttr(
  386. name=conv_share_name + '_b',
  387. learning_rate=2.,
  388. regularizer=L2Decay(0.)))
  389. self.anchors, self.anchor_var = fluid.layers.anchor_generator(
  390. input=conv_rpn_fpn,
  391. anchor_sizes=(self.anchor_start_size * 2.**
  392. (feat_lvl - self.min_level), ),
  393. stride=(2.**feat_lvl, 2.**feat_lvl),
  394. aspect_ratios=self.aspect_ratios,
  395. variance=self.variance)
  396. cls_num_filters = num_anchors * self.num_classes
  397. self.rpn_cls_score = fluid.layers.conv2d(
  398. input=conv_rpn_fpn,
  399. num_filters=cls_num_filters,
  400. filter_size=1,
  401. act=None,
  402. name=cls_name,
  403. param_attr=ParamAttr(
  404. name=cls_share_name + '_w',
  405. initializer=Normal(loc=0., scale=0.01)),
  406. bias_attr=ParamAttr(
  407. name=cls_share_name + '_b',
  408. learning_rate=2.,
  409. regularizer=L2Decay(0.)))
  410. self.rpn_bbox_pred = fluid.layers.conv2d(
  411. input=conv_rpn_fpn,
  412. num_filters=num_anchors * 4,
  413. filter_size=1,
  414. act=None,
  415. name=bbox_name,
  416. param_attr=ParamAttr(
  417. name=bbox_share_name + '_w',
  418. initializer=Normal(loc=0., scale=0.01)),
  419. bias_attr=ParamAttr(
  420. name=bbox_share_name + '_b',
  421. learning_rate=2.,
  422. regularizer=L2Decay(0.)))
  423. return self.rpn_cls_score, self.rpn_bbox_pred
  424. def _get_single_proposals(self, body_feat, im_info, feat_lvl,
  425. mode='train'):
  426. """
  427. Get proposals in one level according to the output of fpn rpn head
  428. Args:
  429. body_feat(Variable): the feature map from backone.
  430. im_info(Variable): The information of image with shape [N, 3] with
  431. format (height, width, scale).
  432. feat_lvl(int): Indicate the level of proposals corresponding to
  433. the feature maps.
  434. Returns:
  435. rpn_rois_fpn(Variable): Output proposals with shape of (rois_num, 4).
  436. rpn_roi_probs_fpn(Variable): Scores of proposals with
  437. shape of (rois_num, 1).
  438. """
  439. rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output(
  440. body_feat, feat_lvl)
  441. if self.num_classes == 1:
  442. rpn_cls_prob_fpn = fluid.layers.sigmoid(
  443. rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl))
  444. else:
  445. rpn_cls_score_fpn = fluid.layers.transpose(
  446. rpn_cls_score_fpn, perm=[0, 2, 3, 1])
  447. rpn_cls_score_fpn = fluid.layers.reshape(
  448. rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes))
  449. rpn_cls_prob_fpn = fluid.layers.softmax(
  450. rpn_cls_score_fpn,
  451. use_cudnn=False,
  452. name='rpn_cls_prob_fpn' + str(feat_lvl))
  453. rpn_cls_prob_fpn = fluid.layers.slice(
  454. rpn_cls_prob_fpn,
  455. axes=[4],
  456. starts=[1],
  457. ends=[self.num_classes])
  458. rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1)
  459. rpn_cls_prob_fpn = fluid.layers.reshape(
  460. rpn_cls_prob_fpn, shape=(0, 0, 0, -1))
  461. rpn_cls_prob_fpn = fluid.layers.transpose(
  462. rpn_cls_prob_fpn, perm=[0, 3, 1, 2])
  463. if mode == 'train':
  464. rpn_rois_fpn, rpn_roi_prob_fpn = fluid.layers.generate_proposals(
  465. scores=rpn_cls_prob_fpn,
  466. bbox_deltas=rpn_bbox_pred_fpn,
  467. im_info=im_info,
  468. anchors=self.anchors,
  469. variances=self.anchor_var,
  470. pre_nms_top_n=self.train_pre_nms_top_n,
  471. post_nms_top_n=self.train_post_nms_top_n,
  472. nms_thresh=self.train_nms_thresh,
  473. min_size=self.train_min_size,
  474. eta=self.train_eta)
  475. else:
  476. rpn_rois_fpn, rpn_roi_prob_fpn = fluid.layers.generate_proposals(
  477. scores=rpn_cls_prob_fpn,
  478. bbox_deltas=rpn_bbox_pred_fpn,
  479. im_info=im_info,
  480. anchors=self.anchors,
  481. variances=self.anchor_var,
  482. pre_nms_top_n=self.test_pre_nms_top_n,
  483. post_nms_top_n=self.test_post_nms_top_n,
  484. nms_thresh=self.test_nms_thresh,
  485. min_size=self.test_min_size,
  486. eta=self.test_eta)
  487. return rpn_rois_fpn, rpn_roi_prob_fpn
  488. def get_proposals(self, fpn_feats, im_info, mode='train'):
  489. """
  490. Get proposals in multiple levels according to the output of fpn
  491. rpn head
  492. Args:
  493. fpn_feats(dict): A dictionary represents the output feature map
  494. of FPN with their name.
  495. im_info(Variable): The information of image with shape [N, 3] with
  496. format (height, width, scale).
  497. Return:
  498. rois_list(Variable): Output proposals in shape of [rois_num, 4]
  499. """
  500. rois_list = []
  501. roi_probs_list = []
  502. fpn_feat_names = list(fpn_feats.keys())
  503. for lvl in range(self.min_level, self.max_level + 1):
  504. fpn_feat_name = fpn_feat_names[self.max_level - lvl]
  505. fpn_feat = fpn_feats[fpn_feat_name]
  506. rois_fpn, roi_probs_fpn = self._get_single_proposals(
  507. fpn_feat, im_info, lvl, mode)
  508. self.fpn_rpn_list.append((self.rpn_cls_score, self.rpn_bbox_pred))
  509. rois_list.append(rois_fpn)
  510. roi_probs_list.append(roi_probs_fpn)
  511. self.anchors_list.append(self.anchors)
  512. self.anchor_var_list.append(self.anchor_var)
  513. post_nms_top_n = self.train_post_nms_top_n if mode == 'train' else \
  514. self.test_post_nms_top_n
  515. rois_collect = fluid.layers.collect_fpn_proposals(
  516. rois_list,
  517. roi_probs_list,
  518. self.min_level,
  519. self.max_level,
  520. post_nms_top_n,
  521. name='collect')
  522. return rois_collect
  523. def _get_loss_input(self):
  524. rpn_clses = []
  525. rpn_bboxes = []
  526. anchors = []
  527. anchor_vars = []
  528. for i in range(len(self.fpn_rpn_list)):
  529. single_input = self._transform_input(
  530. self.fpn_rpn_list[i][0], self.fpn_rpn_list[i][1],
  531. self.anchors_list[i], self.anchor_var_list[i])
  532. rpn_clses.append(single_input[0])
  533. rpn_bboxes.append(single_input[1])
  534. anchors.append(single_input[2])
  535. anchor_vars.append(single_input[3])
  536. rpn_cls = fluid.layers.concat(rpn_clses, axis=1)
  537. rpn_bbox = fluid.layers.concat(rpn_bboxes, axis=1)
  538. anchors = fluid.layers.concat(anchors)
  539. anchor_var = fluid.layers.concat(anchor_vars)
  540. return rpn_cls, rpn_bbox, anchors, anchor_var