rpn_head.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import numpy as np
  18. from paddle import fluid
  19. from paddle.fluid.param_attr import ParamAttr
  20. from paddle.fluid.initializer import Normal
  21. from paddle.fluid.regularizer import L2Decay
  22. from paddle.fluid.initializer import Constant
  23. __all__ = ['RPNHead', 'FPNRPNHead']
  24. class RPNHead(object):
  25. def __init__(
  26. self,
  27. #anchor_generator
  28. stride=[16.0, 16.0],
  29. anchor_sizes=[32, 64, 128, 256, 512],
  30. aspect_ratios=[0.5, 1., 2.],
  31. variance=[1., 1., 1., 1.],
  32. #rpn_target_assign
  33. rpn_batch_size_per_im=256,
  34. rpn_straddle_thresh=0.,
  35. rpn_fg_fraction=0.5,
  36. rpn_positive_overlap=0.7,
  37. rpn_negative_overlap=0.3,
  38. use_random=True,
  39. rpn_cls_loss='SigmoidCrossEntropy',
  40. rpn_focal_loss_gamma=2,
  41. rpn_focal_loss_alpha=0.25,
  42. #train_proposal
  43. train_pre_nms_top_n=12000,
  44. train_post_nms_top_n=2000,
  45. train_nms_thresh=.7,
  46. train_min_size=.0,
  47. train_eta=1.,
  48. #test_proposal
  49. test_pre_nms_top_n=6000,
  50. test_post_nms_top_n=1000,
  51. test_nms_thresh=.7,
  52. test_min_size=.0,
  53. test_eta=1.,
  54. #num_classes
  55. num_classes=1):
  56. super(RPNHead, self).__init__()
  57. self.stride = stride
  58. self.anchor_sizes = anchor_sizes
  59. self.aspect_ratios = aspect_ratios
  60. self.variance = variance
  61. self.rpn_batch_size_per_im = rpn_batch_size_per_im
  62. self.rpn_straddle_thresh = rpn_straddle_thresh
  63. self.rpn_fg_fraction = rpn_fg_fraction
  64. self.rpn_positive_overlap = rpn_positive_overlap
  65. self.rpn_negative_overlap = rpn_negative_overlap
  66. self.use_random = use_random
  67. self.train_pre_nms_top_n = train_pre_nms_top_n
  68. self.train_post_nms_top_n = train_post_nms_top_n
  69. self.train_nms_thresh = train_nms_thresh
  70. self.train_min_size = train_min_size
  71. self.train_eta = train_eta
  72. self.test_pre_nms_top_n = test_pre_nms_top_n
  73. self.test_post_nms_top_n = test_post_nms_top_n
  74. self.test_nms_thresh = test_nms_thresh
  75. self.test_min_size = test_min_size
  76. self.test_eta = test_eta
  77. self.num_classes = num_classes
  78. self.rpn_cls_loss = rpn_cls_loss
  79. self.rpn_focal_loss_gamma = rpn_focal_loss_gamma
  80. self.rpn_focal_loss_alpha = rpn_focal_loss_alpha
  81. def _get_output(self, input):
  82. """
  83. Get anchor and RPN head output.
  84. Args:
  85. input(Variable): feature map from backbone with shape of [N, C, H, W]
  86. Returns:
  87. rpn_cls_score(Variable): Output of rpn head with shape of
  88. [N, num_anchors, H, W].
  89. rpn_bbox_pred(Variable): Output of rpn head with shape of
  90. [N, num_anchors * 4, H, W].
  91. """
  92. dim_out = input.shape[1]
  93. rpn_conv = fluid.layers.conv2d(
  94. input=input,
  95. num_filters=dim_out,
  96. filter_size=3,
  97. stride=1,
  98. padding=1,
  99. act='relu',
  100. name='conv_rpn',
  101. param_attr=ParamAttr(
  102. name="conv_rpn_w", initializer=Normal(
  103. loc=0., scale=0.01)),
  104. bias_attr=ParamAttr(
  105. name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.)))
  106. # Generate anchors
  107. self.anchor, self.anchor_var = fluid.layers.anchor_generator(
  108. input=rpn_conv,
  109. stride=self.stride,
  110. anchor_sizes=self.anchor_sizes,
  111. aspect_ratios=self.aspect_ratios,
  112. variance=self.variance)
  113. num_anchor = self.anchor.shape[2]
  114. # Proposal classification scores
  115. if self.rpn_cls_loss == 'SigmoidCrossEntropy':
  116. bias_init = None
  117. elif self.rpn_cls_loss == 'SigmoidFocalLoss':
  118. value = float(-np.log((1 - 0.01) / 0.01))
  119. bias_init = Constant(value=value)
  120. self.rpn_cls_score = fluid.layers.conv2d(
  121. rpn_conv,
  122. num_filters=num_anchor * self.num_classes,
  123. filter_size=1,
  124. stride=1,
  125. padding=0,
  126. act=None,
  127. name='rpn_cls_score',
  128. param_attr=ParamAttr(
  129. name="rpn_cls_logits_w",
  130. initializer=Normal(
  131. loc=0., scale=0.01)),
  132. bias_attr=ParamAttr(
  133. name="rpn_cls_logits_b",
  134. initializer=bias_init,
  135. learning_rate=2.,
  136. regularizer=L2Decay(0.)))
  137. # Proposal bbox regression deltas
  138. self.rpn_bbox_pred = fluid.layers.conv2d(
  139. rpn_conv,
  140. num_filters=4 * num_anchor,
  141. filter_size=1,
  142. stride=1,
  143. padding=0,
  144. act=None,
  145. name='rpn_bbox_pred',
  146. param_attr=ParamAttr(
  147. name="rpn_bbox_pred_w", initializer=Normal(
  148. loc=0., scale=0.01)),
  149. bias_attr=ParamAttr(
  150. name="rpn_bbox_pred_b",
  151. learning_rate=2.,
  152. regularizer=L2Decay(0.)))
  153. return self.rpn_cls_score, self.rpn_bbox_pred
  154. def get_proposals(self, body_feats, im_info, mode='train'):
  155. """
  156. Get proposals according to the output of backbone.
  157. Args:
  158. body_feats (dict): The dictionary of feature maps from backbone.
  159. im_info(Variable): The information of image with shape [N, 3] with
  160. shape (height, width, scale).
  161. body_feat_names(list): A list of names of feature maps from
  162. backbone.
  163. Returns:
  164. rpn_rois(Variable): Output proposals with shape of (rois_num, 4).
  165. """
  166. # In RPN Heads, only the last feature map of backbone is used.
  167. # And body_feat_names[-1] represents the last level name of backbone.
  168. body_feat = list(body_feats.values())[-1]
  169. rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat)
  170. if self.num_classes == 1:
  171. rpn_cls_prob = fluid.layers.sigmoid(
  172. rpn_cls_score, name='rpn_cls_prob')
  173. else:
  174. rpn_cls_score = fluid.layers.transpose(
  175. rpn_cls_score, perm=[0, 2, 3, 1])
  176. rpn_cls_score = fluid.layers.reshape(
  177. rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes))
  178. rpn_cls_prob_tmp = fluid.layers.softmax(
  179. rpn_cls_score, use_cudnn=False, name='rpn_cls_prob')
  180. rpn_cls_prob_slice = fluid.layers.slice(
  181. rpn_cls_prob_tmp,
  182. axes=[4],
  183. starts=[1],
  184. ends=[self.num_classes])
  185. rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1)
  186. rpn_cls_prob = fluid.layers.reshape(
  187. rpn_cls_prob, shape=(0, 0, 0, -1))
  188. rpn_cls_prob = fluid.layers.transpose(
  189. rpn_cls_prob, perm=[0, 3, 1, 2])
  190. if mode == 'train':
  191. rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals(
  192. scores=rpn_cls_prob,
  193. bbox_deltas=rpn_bbox_pred,
  194. im_info=im_info,
  195. anchors=self.anchor,
  196. variances=self.anchor_var,
  197. pre_nms_top_n=self.train_pre_nms_top_n,
  198. post_nms_top_n=self.train_post_nms_top_n,
  199. nms_thresh=self.train_nms_thresh,
  200. min_size=self.train_min_size,
  201. eta=self.train_eta)
  202. else:
  203. rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals(
  204. scores=rpn_cls_prob,
  205. bbox_deltas=rpn_bbox_pred,
  206. im_info=im_info,
  207. anchors=self.anchor,
  208. variances=self.anchor_var,
  209. pre_nms_top_n=self.test_pre_nms_top_n,
  210. post_nms_top_n=self.test_post_nms_top_n,
  211. nms_thresh=self.test_nms_thresh,
  212. min_size=self.test_min_size,
  213. eta=self.test_eta)
  214. return rpn_rois
  215. def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor,
  216. anchor_var):
  217. rpn_cls_score = fluid.layers.transpose(
  218. rpn_cls_score, perm=[0, 2, 3, 1])
  219. rpn_bbox_pred = fluid.layers.transpose(
  220. rpn_bbox_pred, perm=[0, 2, 3, 1])
  221. anchor = fluid.layers.reshape(anchor, shape=(-1, 4))
  222. anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4))
  223. rpn_cls_score = fluid.layers.reshape(
  224. x=rpn_cls_score, shape=(0, -1, self.num_classes))
  225. rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4))
  226. return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var
  227. def _get_loss_input(self):
  228. for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']:
  229. if not getattr(self, attr, None):
  230. raise ValueError("self.{} should not be None,".format(attr),
  231. "call RPNHead.get_proposals first")
  232. return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred,
  233. self.anchor, self.anchor_var)
  234. def get_loss(self, im_info, gt_box, is_crowd, gt_label=None):
  235. """
  236. Sample proposals and Calculate rpn loss.
  237. Args:
  238. im_info(Variable): The information of image with shape [N, 3] with
  239. shape (height, width, scale).
  240. gt_box(Variable): The ground-truth bounding boxes with shape [M, 4].
  241. M is the number of groundtruth.
  242. is_crowd(Variable): Indicates groud-truth is crowd or not with
  243. shape [M, 1]. M is the number of groundtruth.
  244. Returns:
  245. Type: dict
  246. rpn_cls_loss(Variable): RPN classification loss.
  247. rpn_bbox_loss(Variable): RPN bounding box regression loss.
  248. """
  249. rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input()
  250. if self.num_classes == 1:
  251. score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
  252. fluid.layers.rpn_target_assign(
  253. bbox_pred=rpn_bbox,
  254. cls_logits=rpn_cls,
  255. anchor_box=anchor,
  256. anchor_var=anchor_var,
  257. gt_boxes=gt_box,
  258. is_crowd=is_crowd,
  259. im_info=im_info,
  260. rpn_batch_size_per_im=self.rpn_batch_size_per_im,
  261. rpn_straddle_thresh=self.rpn_straddle_thresh,
  262. rpn_fg_fraction=self.rpn_fg_fraction,
  263. rpn_positive_overlap=self.rpn_positive_overlap,
  264. rpn_negative_overlap=self.rpn_negative_overlap,
  265. use_random=self.use_random)
  266. if self.rpn_cls_loss == 'SigmoidCrossEntropy':
  267. score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
  268. score_tgt.stop_gradient = True
  269. rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(
  270. x=score_pred, label=score_tgt)
  271. elif self.rpn_cls_loss == 'SigmoidFocalLoss':
  272. data = fluid.layers.fill_constant(
  273. shape=[1], value=1, dtype='int32')
  274. fg_label = fluid.layers.greater_equal(score_tgt, data)
  275. fg_label = fluid.layers.cast(fg_label, dtype='int32')
  276. fg_num = fluid.layers.reduce_sum(fg_label)
  277. fg_num.stop_gradient = True
  278. score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
  279. score_tgt.stop_gradient = True
  280. loss = fluid.layers.sigmoid_cross_entropy_with_logits(
  281. x=score_pred, label=score_tgt)
  282. pred = fluid.layers.sigmoid(score_pred)
  283. p_t = pred * score_tgt + (1 - pred) * (1 - score_tgt)
  284. if self.rpn_focal_loss_alpha is not None:
  285. alpha_t = self.rpn_focal_loss_alpha * score_tgt + (
  286. 1 - self.rpn_focal_loss_alpha) * (1 - score_tgt)
  287. loss = alpha_t * loss
  288. gamma_t = fluid.layers.pow((1 - p_t),
  289. self.rpn_focal_loss_gamma)
  290. loss = gamma_t * loss
  291. rpn_cls_loss = loss / fg_num
  292. else:
  293. score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
  294. fluid.layers.rpn_target_assign(
  295. bbox_pred=rpn_bbox,
  296. cls_logits=rpn_cls,
  297. anchor_box=anchor,
  298. anchor_var=anchor_var,
  299. gt_boxes=gt_box,
  300. gt_labels=gt_label,
  301. is_crowd=is_crowd,
  302. num_classes=self.num_classes,
  303. im_info=im_info,
  304. rpn_batch_size_per_im=self.rpn_batch_size_per_im,
  305. rpn_straddle_thresh=self.rpn_straddle_thresh,
  306. rpn_fg_fraction=self.rpn_fg_fraction,
  307. rpn_positive_overlap=self.rpn_positive_overlap,
  308. rpn_negative_overlap=self.rpn_negative_overlap,
  309. use_random=self.use_random)
  310. labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64')
  311. labels_int64.stop_gradient = True
  312. rpn_cls_loss = fluid.layers.softmax_with_cross_entropy(
  313. logits=score_pred,
  314. label=labels_int64,
  315. numeric_stable_mode=True)
  316. if self.rpn_cls_loss == 'SigmoidCrossEntropy':
  317. rpn_cls_loss = fluid.layers.reduce_mean(
  318. rpn_cls_loss, name='loss_rpn_cls')
  319. elif self.rpn_cls_loss == 'SigmoidFocalLoss':
  320. rpn_cls_loss = fluid.layers.reduce_sum(
  321. rpn_cls_loss, name='loss_rpn_cls')
  322. loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32')
  323. loc_tgt.stop_gradient = True
  324. rpn_reg_loss = fluid.layers.smooth_l1(
  325. x=loc_pred,
  326. y=loc_tgt,
  327. sigma=3.0,
  328. inside_weight=bbox_weight,
  329. outside_weight=bbox_weight)
  330. rpn_reg_loss = fluid.layers.reduce_sum(
  331. rpn_reg_loss, name='loss_rpn_bbox')
  332. if self.rpn_cls_loss == 'SigmoidCrossEntropy':
  333. score_shape = fluid.layers.shape(score_tgt)
  334. score_shape = fluid.layers.cast(x=score_shape, dtype='float32')
  335. norm = fluid.layers.reduce_prod(score_shape)
  336. norm.stop_gradient = True
  337. rpn_reg_loss = rpn_reg_loss / norm
  338. elif self.rpn_cls_loss == 'SigmoidFocalLoss':
  339. rpn_reg_loss = rpn_reg_loss / fluid.layers.cast(fg_num,
  340. rpn_reg_loss.dtype)
  341. return {'loss_rpn_cls': rpn_cls_loss, 'loss_rpn_bbox': rpn_reg_loss}
  342. class FPNRPNHead(RPNHead):
  343. def __init__(
  344. self,
  345. anchor_start_size=32,
  346. aspect_ratios=[0.5, 1., 2.],
  347. variance=[1., 1., 1., 1.],
  348. num_chan=256,
  349. min_level=2,
  350. max_level=6,
  351. #rpn_target_assign
  352. rpn_batch_size_per_im=256,
  353. rpn_straddle_thresh=0.,
  354. rpn_fg_fraction=0.5,
  355. rpn_positive_overlap=0.7,
  356. rpn_negative_overlap=0.3,
  357. use_random=True,
  358. rpn_cls_loss='SigmoidCrossEntropy',
  359. rpn_focal_loss_gamma=2,
  360. rpn_focal_loss_alpha=0.25,
  361. #train_proposal
  362. train_pre_nms_top_n=2000,
  363. train_post_nms_top_n=2000,
  364. train_nms_thresh=.7,
  365. train_min_size=.0,
  366. train_eta=1.,
  367. #test_proposal
  368. test_pre_nms_top_n=1000,
  369. test_post_nms_top_n=1000,
  370. test_nms_thresh=.7,
  371. test_min_size=.0,
  372. test_eta=1.,
  373. #num_classes
  374. num_classes=1):
  375. super(FPNRPNHead, self).__init__(
  376. aspect_ratios=aspect_ratios,
  377. variance=variance,
  378. rpn_batch_size_per_im=rpn_batch_size_per_im,
  379. rpn_straddle_thresh=rpn_straddle_thresh,
  380. rpn_fg_fraction=rpn_fg_fraction,
  381. rpn_positive_overlap=rpn_positive_overlap,
  382. rpn_negative_overlap=rpn_negative_overlap,
  383. use_random=use_random,
  384. train_pre_nms_top_n=train_pre_nms_top_n,
  385. train_post_nms_top_n=train_post_nms_top_n,
  386. train_nms_thresh=train_nms_thresh,
  387. train_min_size=train_min_size,
  388. train_eta=train_eta,
  389. test_pre_nms_top_n=test_pre_nms_top_n,
  390. test_post_nms_top_n=test_post_nms_top_n,
  391. test_nms_thresh=test_nms_thresh,
  392. test_min_size=test_min_size,
  393. test_eta=test_eta,
  394. num_classes=num_classes,
  395. rpn_cls_loss=rpn_cls_loss,
  396. rpn_focal_loss_gamma=rpn_focal_loss_gamma,
  397. rpn_focal_loss_alpha=rpn_focal_loss_alpha)
  398. self.anchor_start_size = anchor_start_size
  399. self.num_chan = num_chan
  400. self.min_level = min_level
  401. self.max_level = max_level
  402. self.num_classes = num_classes
  403. self.fpn_rpn_list = []
  404. self.anchors_list = []
  405. self.anchor_var_list = []
  406. def _get_output(self, input, feat_lvl):
  407. """
  408. Get anchor and FPN RPN head output at one level.
  409. Args:
  410. input(Variable): Body feature from backbone.
  411. feat_lvl(int): Indicate the level of rpn output corresponding
  412. to the level of feature map.
  413. Return:
  414. rpn_cls_score(Variable): Output of one level of fpn rpn head with
  415. shape of [N, num_anchors, H, W].
  416. rpn_bbox_pred(Variable): Output of one level of fpn rpn head with
  417. shape of [N, num_anchors * 4, H, W].
  418. """
  419. slvl = str(feat_lvl)
  420. conv_name = 'conv_rpn_fpn' + slvl
  421. cls_name = 'rpn_cls_logits_fpn' + slvl
  422. bbox_name = 'rpn_bbox_pred_fpn' + slvl
  423. conv_share_name = 'conv_rpn_fpn' + str(self.min_level)
  424. cls_share_name = 'rpn_cls_logits_fpn' + str(self.min_level)
  425. bbox_share_name = 'rpn_bbox_pred_fpn' + str(self.min_level)
  426. num_anchors = len(self.aspect_ratios)
  427. conv_rpn_fpn = fluid.layers.conv2d(
  428. input=input,
  429. num_filters=self.num_chan,
  430. filter_size=3,
  431. padding=1,
  432. act='relu',
  433. name=conv_name,
  434. param_attr=ParamAttr(
  435. name=conv_share_name + '_w',
  436. initializer=Normal(
  437. loc=0., scale=0.01)),
  438. bias_attr=ParamAttr(
  439. name=conv_share_name + '_b',
  440. learning_rate=2.,
  441. regularizer=L2Decay(0.)))
  442. self.anchors, self.anchor_var = fluid.layers.anchor_generator(
  443. input=conv_rpn_fpn,
  444. anchor_sizes=(self.anchor_start_size * 2.
  445. **(feat_lvl - self.min_level), ),
  446. stride=(2.**feat_lvl, 2.**feat_lvl),
  447. aspect_ratios=self.aspect_ratios,
  448. variance=self.variance)
  449. cls_num_filters = num_anchors * self.num_classes
  450. if self.rpn_cls_loss == 'SigmoidCrossEntropy':
  451. bias_init = None
  452. elif self.rpn_cls_loss == 'SigmoidFocalLoss':
  453. value = float(-np.log((1 - 0.01) / 0.01))
  454. bias_init = Constant(value=value)
  455. self.rpn_cls_score = fluid.layers.conv2d(
  456. input=conv_rpn_fpn,
  457. num_filters=cls_num_filters,
  458. filter_size=1,
  459. act=None,
  460. name=cls_name,
  461. param_attr=ParamAttr(
  462. name=cls_share_name + '_w',
  463. initializer=Normal(
  464. loc=0., scale=0.01)),
  465. bias_attr=ParamAttr(
  466. name=cls_share_name + '_b',
  467. initializer=bias_init,
  468. learning_rate=2.,
  469. regularizer=L2Decay(0.)))
  470. self.rpn_bbox_pred = fluid.layers.conv2d(
  471. input=conv_rpn_fpn,
  472. num_filters=num_anchors * 4,
  473. filter_size=1,
  474. act=None,
  475. name=bbox_name,
  476. param_attr=ParamAttr(
  477. name=bbox_share_name + '_w',
  478. initializer=Normal(
  479. loc=0., scale=0.01)),
  480. bias_attr=ParamAttr(
  481. name=bbox_share_name + '_b',
  482. learning_rate=2.,
  483. regularizer=L2Decay(0.)))
  484. return self.rpn_cls_score, self.rpn_bbox_pred
  485. def _get_single_proposals(self, body_feat, im_info, feat_lvl,
  486. mode='train'):
  487. """
  488. Get proposals in one level according to the output of fpn rpn head
  489. Args:
  490. body_feat(Variable): the feature map from backone.
  491. im_info(Variable): The information of image with shape [N, 3] with
  492. format (height, width, scale).
  493. feat_lvl(int): Indicate the level of proposals corresponding to
  494. the feature maps.
  495. Returns:
  496. rpn_rois_fpn(Variable): Output proposals with shape of (rois_num, 4).
  497. rpn_roi_probs_fpn(Variable): Scores of proposals with
  498. shape of (rois_num, 1).
  499. """
  500. rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output(body_feat,
  501. feat_lvl)
  502. if self.num_classes == 1:
  503. rpn_cls_prob_fpn = fluid.layers.sigmoid(
  504. rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl))
  505. else:
  506. rpn_cls_score_fpn = fluid.layers.transpose(
  507. rpn_cls_score_fpn, perm=[0, 2, 3, 1])
  508. rpn_cls_score_fpn = fluid.layers.reshape(
  509. rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes))
  510. rpn_cls_prob_fpn = fluid.layers.softmax(
  511. rpn_cls_score_fpn,
  512. use_cudnn=False,
  513. name='rpn_cls_prob_fpn' + str(feat_lvl))
  514. rpn_cls_prob_fpn = fluid.layers.slice(
  515. rpn_cls_prob_fpn,
  516. axes=[4],
  517. starts=[1],
  518. ends=[self.num_classes])
  519. rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1)
  520. rpn_cls_prob_fpn = fluid.layers.reshape(
  521. rpn_cls_prob_fpn, shape=(0, 0, 0, -1))
  522. rpn_cls_prob_fpn = fluid.layers.transpose(
  523. rpn_cls_prob_fpn, perm=[0, 3, 1, 2])
  524. if mode == 'train':
  525. rpn_rois_fpn, rpn_roi_prob_fpn = fluid.layers.generate_proposals(
  526. scores=rpn_cls_prob_fpn,
  527. bbox_deltas=rpn_bbox_pred_fpn,
  528. im_info=im_info,
  529. anchors=self.anchors,
  530. variances=self.anchor_var,
  531. pre_nms_top_n=self.train_pre_nms_top_n,
  532. post_nms_top_n=self.train_post_nms_top_n,
  533. nms_thresh=self.train_nms_thresh,
  534. min_size=self.train_min_size,
  535. eta=self.train_eta)
  536. else:
  537. rpn_rois_fpn, rpn_roi_prob_fpn = fluid.layers.generate_proposals(
  538. scores=rpn_cls_prob_fpn,
  539. bbox_deltas=rpn_bbox_pred_fpn,
  540. im_info=im_info,
  541. anchors=self.anchors,
  542. variances=self.anchor_var,
  543. pre_nms_top_n=self.test_pre_nms_top_n,
  544. post_nms_top_n=self.test_post_nms_top_n,
  545. nms_thresh=self.test_nms_thresh,
  546. min_size=self.test_min_size,
  547. eta=self.test_eta)
  548. return rpn_rois_fpn, rpn_roi_prob_fpn
  549. def get_proposals(self, fpn_feats, im_info, mode='train'):
  550. """
  551. Get proposals in multiple levels according to the output of fpn
  552. rpn head
  553. Args:
  554. fpn_feats(dict): A dictionary represents the output feature map
  555. of FPN with their name.
  556. im_info(Variable): The information of image with shape [N, 3] with
  557. format (height, width, scale).
  558. Return:
  559. rois_list(Variable): Output proposals in shape of [rois_num, 4]
  560. """
  561. rois_list = []
  562. roi_probs_list = []
  563. fpn_feat_names = list(fpn_feats.keys())
  564. for lvl in range(self.min_level, self.max_level + 1):
  565. fpn_feat_name = fpn_feat_names[self.max_level - lvl]
  566. fpn_feat = fpn_feats[fpn_feat_name]
  567. rois_fpn, roi_probs_fpn = self._get_single_proposals(
  568. fpn_feat, im_info, lvl, mode)
  569. self.fpn_rpn_list.append((self.rpn_cls_score, self.rpn_bbox_pred))
  570. rois_list.append(rois_fpn)
  571. roi_probs_list.append(roi_probs_fpn)
  572. self.anchors_list.append(self.anchors)
  573. self.anchor_var_list.append(self.anchor_var)
  574. post_nms_top_n = self.train_post_nms_top_n if mode == 'train' else \
  575. self.test_post_nms_top_n
  576. rois_collect = fluid.layers.collect_fpn_proposals(
  577. rois_list,
  578. roi_probs_list,
  579. self.min_level,
  580. self.max_level,
  581. post_nms_top_n,
  582. name='collect')
  583. return rois_collect
  584. def _get_loss_input(self):
  585. rpn_clses = []
  586. rpn_bboxes = []
  587. anchors = []
  588. anchor_vars = []
  589. for i in range(len(self.fpn_rpn_list)):
  590. single_input = self._transform_input(
  591. self.fpn_rpn_list[i][0], self.fpn_rpn_list[i][1],
  592. self.anchors_list[i], self.anchor_var_list[i])
  593. rpn_clses.append(single_input[0])
  594. rpn_bboxes.append(single_input[1])
  595. anchors.append(single_input[2])
  596. anchor_vars.append(single_input[3])
  597. rpn_cls = fluid.layers.concat(rpn_clses, axis=1)
  598. rpn_bbox = fluid.layers.concat(rpn_bboxes, axis=1)
  599. anchors = fluid.layers.concat(anchors)
  600. anchor_var = fluid.layers.concat(anchor_vars)
  601. return rpn_cls, rpn_bbox, anchors, anchor_var