cascade_head.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import paddle
  15. import paddle.nn as nn
  16. import paddle.nn.functional as F
  17. from paddle.nn.initializer import Normal, XavierUniform
  18. from paddle.regularizer import L2Decay
  19. from paddlex.ppdet.core.workspace import register, create
  20. from paddlex.ppdet.modeling import ops
  21. from .bbox_head import BBoxHead, TwoFCHead, XConvNormHead
  22. from .roi_extractor import RoIAlign
  23. from ..shape_spec import ShapeSpec
  24. from ..bbox_utils import bbox2delta, delta2bbox, clip_bbox, nonempty_bbox
  25. __all__ = ['CascadeTwoFCHead', 'CascadeXConvNormHead', 'CascadeHead']
  26. @register
  27. class CascadeTwoFCHead(nn.Layer):
  28. __shared__ = ['num_cascade_stage']
  29. """
  30. Cascade RCNN bbox head with Two fc layers to extract feature
  31. Args:
  32. in_channel (int): Input channel which can be derived by from_config
  33. out_channel (int): Output channel
  34. resolution (int): Resolution of input feature map, default 7
  35. num_cascade_stage (int): The number of cascade stage, default 3
  36. """
  37. def __init__(self,
  38. in_channel=256,
  39. out_channel=1024,
  40. resolution=7,
  41. num_cascade_stage=3):
  42. super(CascadeTwoFCHead, self).__init__()
  43. self.in_channel = in_channel
  44. self.out_channel = out_channel
  45. self.head_list = []
  46. for stage in range(num_cascade_stage):
  47. head_per_stage = self.add_sublayer(
  48. str(stage), TwoFCHead(in_channel, out_channel, resolution))
  49. self.head_list.append(head_per_stage)
  50. @classmethod
  51. def from_config(cls, cfg, input_shape):
  52. s = input_shape
  53. s = s[0] if isinstance(s, (list, tuple)) else s
  54. return {'in_channel': s.channels}
  55. @property
  56. def out_shape(self):
  57. return [ShapeSpec(channels=self.out_channel, )]
  58. def forward(self, rois_feat, stage=0):
  59. out = self.head_list[stage](rois_feat)
  60. return out
  61. @register
  62. class CascadeXConvNormHead(nn.Layer):
  63. __shared__ = ['norm_type', 'freeze_norm', 'num_cascade_stage']
  64. """
  65. Cascade RCNN bbox head with serveral convolution layers
  66. Args:
  67. in_channel (int): Input channels which can be derived by from_config
  68. num_convs (int): The number of conv layers
  69. conv_dim (int): The number of channels for the conv layers
  70. out_channel (int): Output channels
  71. resolution (int): Resolution of input feature map
  72. norm_type (string): Norm type, bn, gn, sync_bn are available,
  73. default `gn`
  74. freeze_norm (bool): Whether to freeze the norm
  75. num_cascade_stage (int): The number of cascade stage, default 3
  76. """
  77. def __init__(self,
  78. in_channel=256,
  79. num_convs=4,
  80. conv_dim=256,
  81. out_channel=1024,
  82. resolution=7,
  83. norm_type='gn',
  84. freeze_norm=False,
  85. num_cascade_stage=3):
  86. super(CascadeXConvNormHead, self).__init__()
  87. self.in_channel = in_channel
  88. self.out_channel = out_channel
  89. self.head_list = []
  90. for stage in range(num_cascade_stage):
  91. head_per_stage = self.add_sublayer(
  92. str(stage),
  93. XConvNormHead(
  94. in_channel,
  95. num_convs,
  96. conv_dim,
  97. out_channel,
  98. resolution,
  99. norm_type,
  100. freeze_norm,
  101. stage_name='stage{}_'.format(stage)))
  102. self.head_list.append(head_per_stage)
  103. @classmethod
  104. def from_config(cls, cfg, input_shape):
  105. s = input_shape
  106. s = s[0] if isinstance(s, (list, tuple)) else s
  107. return {'in_channel': s.channels}
  108. @property
  109. def out_shape(self):
  110. return [ShapeSpec(channels=self.out_channel, )]
  111. def forward(self, rois_feat, stage=0):
  112. out = self.head_list[stage](rois_feat)
  113. return out
  114. @register
  115. class CascadeHead(BBoxHead):
  116. __shared__ = ['num_classes', 'num_cascade_stages']
  117. __inject__ = ['bbox_assigner', 'bbox_loss']
  118. """
  119. Cascade RCNN bbox head
  120. Args:
  121. head (nn.Layer): Extract feature in bbox head
  122. in_channel (int): Input channel after RoI extractor
  123. roi_extractor (object): The module of RoI Extractor
  124. bbox_assigner (object): The module of Box Assigner, label and sample the
  125. box.
  126. num_classes (int): The number of classes
  127. bbox_weight (List[List[float]]): The weight to get the decode box and the
  128. length of weight is the number of cascade stage
  129. num_cascade_stages (int): THe number of stage to refine the box
  130. """
  131. def __init__(self,
  132. head,
  133. in_channel,
  134. roi_extractor=RoIAlign().__dict__,
  135. bbox_assigner='BboxAssigner',
  136. num_classes=80,
  137. bbox_weight=[[10., 10., 5., 5.], [20.0, 20.0, 10.0, 10.0],
  138. [30.0, 30.0, 15.0, 15.0]],
  139. num_cascade_stages=3,
  140. bbox_loss=None):
  141. nn.Layer.__init__(self, )
  142. self.head = head
  143. self.roi_extractor = roi_extractor
  144. if isinstance(roi_extractor, dict):
  145. self.roi_extractor = RoIAlign(**roi_extractor)
  146. self.bbox_assigner = bbox_assigner
  147. self.num_classes = num_classes
  148. self.bbox_weight = bbox_weight
  149. self.num_cascade_stages = num_cascade_stages
  150. self.bbox_loss = bbox_loss
  151. self.bbox_score_list = []
  152. self.bbox_delta_list = []
  153. for i in range(num_cascade_stages):
  154. score_name = 'bbox_score_stage{}'.format(i)
  155. delta_name = 'bbox_delta_stage{}'.format(i)
  156. bbox_score = self.add_sublayer(
  157. score_name,
  158. nn.Linear(
  159. in_channel,
  160. self.num_classes + 1,
  161. weight_attr=paddle.ParamAttr(initializer=Normal(
  162. mean=0.0, std=0.01))))
  163. bbox_delta = self.add_sublayer(
  164. delta_name,
  165. nn.Linear(
  166. in_channel,
  167. 4,
  168. weight_attr=paddle.ParamAttr(initializer=Normal(
  169. mean=0.0, std=0.001))))
  170. self.bbox_score_list.append(bbox_score)
  171. self.bbox_delta_list.append(bbox_delta)
  172. self.assigned_label = None
  173. self.assigned_rois = None
  174. def forward(self, body_feats=None, rois=None, rois_num=None, inputs=None):
  175. """
  176. body_feats (list[Tensor]): Feature maps from backbone
  177. rois (Tensor): RoIs generated from RPN module
  178. rois_num (Tensor): The number of RoIs in each image
  179. inputs (dict{Tensor}): The ground-truth of image
  180. """
  181. targets = []
  182. if self.training:
  183. rois, rois_num, targets = self.bbox_assigner(rois, rois_num, inputs)
  184. targets_list = [targets]
  185. self.assigned_rois = (rois, rois_num)
  186. self.assigned_targets = targets
  187. pred_bbox = None
  188. head_out_list = []
  189. for i in range(self.num_cascade_stages):
  190. if i > 0:
  191. rois, rois_num = self._get_rois_from_boxes(pred_bbox,
  192. inputs['im_shape'])
  193. if self.training:
  194. rois, rois_num, targets = self.bbox_assigner(
  195. rois, rois_num, inputs, i, is_cascade=True)
  196. targets_list.append(targets)
  197. rois_feat = self.roi_extractor(body_feats, rois, rois_num)
  198. bbox_feat = self.head(rois_feat, i)
  199. scores = self.bbox_score_list[i](bbox_feat)
  200. deltas = self.bbox_delta_list[i](bbox_feat)
  201. head_out_list.append([scores, deltas, rois])
  202. pred_bbox = self._get_pred_bbox(deltas, rois, self.bbox_weight[i])
  203. if self.training:
  204. loss = {}
  205. for stage, value in enumerate(zip(head_out_list, targets_list)):
  206. (scores, deltas, rois), targets = value
  207. loss_stage = self.get_loss(scores, deltas, targets, rois,
  208. self.bbox_weight[stage])
  209. for k, v in loss_stage.items():
  210. loss[k + "_stage{}".format(
  211. stage)] = v / self.num_cascade_stages
  212. return loss, bbox_feat
  213. else:
  214. scores, deltas, self.refined_rois = self.get_prediction(
  215. head_out_list)
  216. return (deltas, scores), self.head
  217. def _get_rois_from_boxes(self, boxes, im_shape):
  218. rois = []
  219. for i, boxes_per_image in enumerate(boxes):
  220. clip_box = clip_bbox(boxes_per_image, im_shape[i])
  221. if self.training:
  222. keep = nonempty_bbox(clip_box)
  223. if keep.shape[0] == 0:
  224. keep = paddle.zeros([1], dtype='int32')
  225. clip_box = paddle.gather(clip_box, keep)
  226. rois.append(clip_box)
  227. rois_num = paddle.concat([paddle.shape(r)[0] for r in rois])
  228. return rois, rois_num
  229. def _get_pred_bbox(self, deltas, proposals, weights):
  230. pred_proposals = paddle.concat(proposals) if len(
  231. proposals) > 1 else proposals[0]
  232. pred_bbox = delta2bbox(deltas, pred_proposals, weights)
  233. pred_bbox = paddle.reshape(pred_bbox, [-1, deltas.shape[-1]])
  234. num_prop = [p.shape[0] for p in proposals]
  235. return pred_bbox.split(num_prop)
  236. def get_prediction(self, head_out_list):
  237. """
  238. head_out_list(List[Tensor]): scores, deltas, rois
  239. """
  240. pred_list = []
  241. scores_list = [F.softmax(head[0]) for head in head_out_list]
  242. scores = paddle.add_n(scores_list) / self.num_cascade_stages
  243. # Get deltas and rois from the last stage
  244. _, deltas, rois = head_out_list[-1]
  245. return scores, deltas, rois
  246. def get_refined_rois(self, ):
  247. return self.refined_rois