pico_head.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import math
  18. import numpy as np
  19. import paddle
  20. import paddle.nn as nn
  21. import paddle.nn.functional as F
  22. from paddle import ParamAttr
  23. from paddle.nn.initializer import Normal, Constant
  24. from paddlex.ppdet.core.workspace import register
  25. from paddlex.ppdet.modeling.layers import ConvNormLayer
  26. from paddlex.ppdet.modeling.bbox_utils import distance2bbox, bbox2distance
  27. from paddlex.ppdet.data.transform.atss_assigner import bbox_overlaps
  28. from .gfl_head import GFLHead
  29. @register
  30. class PicoFeat(nn.Layer):
  31. """
  32. PicoFeat of PicoDet
  33. Args:
  34. feat_in (int): The channel number of input Tensor.
  35. feat_out (int): The channel number of output Tensor.
  36. num_convs (int): The convolution number of the LiteGFLFeat.
  37. norm_type (str): Normalization type, 'bn'/'sync_bn'/'gn'.
  38. """
  39. def __init__(self,
  40. feat_in=256,
  41. feat_out=96,
  42. num_fpn_stride=3,
  43. num_convs=2,
  44. norm_type='bn',
  45. share_cls_reg=False):
  46. super(PicoFeat, self).__init__()
  47. self.num_convs = num_convs
  48. self.norm_type = norm_type
  49. self.share_cls_reg = share_cls_reg
  50. self.cls_convs = []
  51. self.reg_convs = []
  52. for stage_idx in range(num_fpn_stride):
  53. cls_subnet_convs = []
  54. reg_subnet_convs = []
  55. for i in range(self.num_convs):
  56. in_c = feat_in if i == 0 else feat_out
  57. cls_conv_dw = self.add_sublayer(
  58. 'cls_conv_dw{}.{}'.format(stage_idx, i),
  59. ConvNormLayer(
  60. ch_in=in_c,
  61. ch_out=feat_out,
  62. filter_size=5,
  63. stride=1,
  64. groups=feat_out,
  65. norm_type=norm_type,
  66. bias_on=False,
  67. lr_scale=2.))
  68. cls_subnet_convs.append(cls_conv_dw)
  69. cls_conv_pw = self.add_sublayer(
  70. 'cls_conv_pw{}.{}'.format(stage_idx, i),
  71. ConvNormLayer(
  72. ch_in=in_c,
  73. ch_out=feat_out,
  74. filter_size=1,
  75. stride=1,
  76. norm_type=norm_type,
  77. bias_on=False,
  78. lr_scale=2.))
  79. cls_subnet_convs.append(cls_conv_pw)
  80. if not self.share_cls_reg:
  81. reg_conv_dw = self.add_sublayer(
  82. 'reg_conv_dw{}.{}'.format(stage_idx, i),
  83. ConvNormLayer(
  84. ch_in=in_c,
  85. ch_out=feat_out,
  86. filter_size=5,
  87. stride=1,
  88. groups=feat_out,
  89. norm_type=norm_type,
  90. bias_on=False,
  91. lr_scale=2.))
  92. reg_subnet_convs.append(reg_conv_dw)
  93. reg_conv_pw = self.add_sublayer(
  94. 'reg_conv_pw{}.{}'.format(stage_idx, i),
  95. ConvNormLayer(
  96. ch_in=in_c,
  97. ch_out=feat_out,
  98. filter_size=1,
  99. stride=1,
  100. norm_type=norm_type,
  101. bias_on=False,
  102. lr_scale=2.))
  103. reg_subnet_convs.append(reg_conv_pw)
  104. self.cls_convs.append(cls_subnet_convs)
  105. self.reg_convs.append(reg_subnet_convs)
  106. def forward(self, fpn_feat, stage_idx):
  107. assert stage_idx < len(self.cls_convs)
  108. cls_feat = fpn_feat
  109. reg_feat = fpn_feat
  110. for i in range(len(self.cls_convs[stage_idx])):
  111. cls_feat = F.leaky_relu(self.cls_convs[stage_idx][i](cls_feat),
  112. 0.1)
  113. if not self.share_cls_reg:
  114. reg_feat = F.leaky_relu(self.reg_convs[stage_idx][i](reg_feat),
  115. 0.1)
  116. return cls_feat, reg_feat
  117. @register
  118. class PicoHead(GFLHead):
  119. """
  120. PicoHead
  121. Args:
  122. conv_feat (object): Instance of 'LiteGFLFeat'
  123. num_classes (int): Number of classes
  124. fpn_stride (list): The stride of each FPN Layer
  125. prior_prob (float): Used to set the bias init for the class prediction layer
  126. loss_qfl (object):
  127. loss_dfl (object):
  128. loss_bbox (object):
  129. reg_max: Max value of integral set :math: `{0, ..., reg_max}`
  130. n QFL setting. Default: 16.
  131. """
  132. __inject__ = [
  133. 'conv_feat', 'dgqp_module', 'loss_qfl', 'loss_dfl', 'loss_bbox', 'nms'
  134. ]
  135. __shared__ = ['num_classes']
  136. def __init__(self,
  137. conv_feat='PicoFeat',
  138. dgqp_module=None,
  139. num_classes=80,
  140. fpn_stride=[8, 16, 32],
  141. prior_prob=0.01,
  142. loss_qfl='QualityFocalLoss',
  143. loss_dfl='DistributionFocalLoss',
  144. loss_bbox='GIoULoss',
  145. reg_max=16,
  146. feat_in_chan=96,
  147. nms=None,
  148. nms_pre=1000,
  149. cell_offset=0):
  150. super(PicoHead, self).__init__(
  151. conv_feat=conv_feat,
  152. dgqp_module=dgqp_module,
  153. num_classes=num_classes,
  154. fpn_stride=fpn_stride,
  155. prior_prob=prior_prob,
  156. loss_qfl=loss_qfl,
  157. loss_dfl=loss_dfl,
  158. loss_bbox=loss_bbox,
  159. reg_max=reg_max,
  160. feat_in_chan=feat_in_chan,
  161. nms=nms,
  162. nms_pre=nms_pre,
  163. cell_offset=cell_offset)
  164. self.conv_feat = conv_feat
  165. self.num_classes = num_classes
  166. self.fpn_stride = fpn_stride
  167. self.prior_prob = prior_prob
  168. self.loss_qfl = loss_qfl
  169. self.loss_dfl = loss_dfl
  170. self.loss_bbox = loss_bbox
  171. self.reg_max = reg_max
  172. self.feat_in_chan = feat_in_chan
  173. self.nms = nms
  174. self.nms_pre = nms_pre
  175. self.cell_offset = cell_offset
  176. self.use_sigmoid = self.loss_qfl.use_sigmoid
  177. if self.use_sigmoid:
  178. self.cls_out_channels = self.num_classes
  179. else:
  180. self.cls_out_channels = self.num_classes + 1
  181. bias_init_value = -math.log((1 - self.prior_prob) / self.prior_prob)
  182. # Clear the super class initialization
  183. self.gfl_head_cls = None
  184. self.gfl_head_reg = None
  185. self.scales_regs = None
  186. self.head_cls_list = []
  187. self.head_reg_list = []
  188. for i in range(len(fpn_stride)):
  189. head_cls = self.add_sublayer(
  190. "head_cls" + str(i),
  191. nn.Conv2D(
  192. in_channels=self.feat_in_chan,
  193. out_channels=self.cls_out_channels + 4 * (self.reg_max + 1)
  194. if self.conv_feat.share_cls_reg else self.cls_out_channels,
  195. kernel_size=1,
  196. stride=1,
  197. padding=0,
  198. weight_attr=ParamAttr(initializer=Normal(
  199. mean=0., std=0.01)),
  200. bias_attr=ParamAttr(
  201. initializer=Constant(value=bias_init_value))))
  202. self.head_cls_list.append(head_cls)
  203. if not self.conv_feat.share_cls_reg:
  204. head_reg = self.add_sublayer(
  205. "head_reg" + str(i),
  206. nn.Conv2D(
  207. in_channels=self.feat_in_chan,
  208. out_channels=4 * (self.reg_max + 1),
  209. kernel_size=1,
  210. stride=1,
  211. padding=0,
  212. weight_attr=ParamAttr(initializer=Normal(
  213. mean=0., std=0.01)),
  214. bias_attr=ParamAttr(initializer=Constant(value=0))))
  215. self.head_reg_list.append(head_reg)
  216. def forward(self, fpn_feats):
  217. assert len(fpn_feats) == len(
  218. self.fpn_stride
  219. ), "The size of fpn_feats is not equal to size of fpn_stride"
  220. cls_logits_list = []
  221. bboxes_reg_list = []
  222. for i, fpn_feat in enumerate(fpn_feats):
  223. conv_cls_feat, conv_reg_feat = self.conv_feat(fpn_feat, i)
  224. if self.conv_feat.share_cls_reg:
  225. cls_logits = self.head_cls_list[i](conv_cls_feat)
  226. cls_score, bbox_pred = paddle.split(
  227. cls_logits,
  228. [self.cls_out_channels, 4 * (self.reg_max + 1)],
  229. axis=1)
  230. else:
  231. cls_score = self.head_cls_list[i](conv_cls_feat)
  232. bbox_pred = self.head_reg_list[i](conv_reg_feat)
  233. if self.dgqp_module:
  234. quality_score = self.dgqp_module(bbox_pred)
  235. cls_score = F.sigmoid(cls_score) * quality_score
  236. if not self.training:
  237. cls_score = F.sigmoid(cls_score.transpose([0, 2, 3, 1]))
  238. bbox_pred = bbox_pred.transpose([0, 2, 3, 1])
  239. cls_logits_list.append(cls_score)
  240. bboxes_reg_list.append(bbox_pred)
  241. return (cls_logits_list, bboxes_reg_list)