ttf_head.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import paddle
  15. import paddle.nn as nn
  16. import paddle.nn.functional as F
  17. from paddle import ParamAttr
  18. from paddle.nn.initializer import Constant, Uniform, Normal
  19. from paddle.regularizer import L2Decay
  20. from paddlex.ppdet.core.workspace import register
  21. from paddlex.ppdet.modeling.layers import DeformableConvV2, LiteConv
  22. import numpy as np
  23. @register
  24. class HMHead(nn.Layer):
  25. """
  26. Args:
  27. ch_in (int): The channel number of input Tensor.
  28. ch_out (int): The channel number of output Tensor.
  29. num_classes (int): Number of classes.
  30. conv_num (int): The convolution number of hm_feat.
  31. dcn_head(bool): whether use dcn in head. False by default.
  32. lite_head(bool): whether use lite version. False by default.
  33. norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
  34. bn by default
  35. Return:
  36. Heatmap head output
  37. """
  38. __shared__ = ['num_classes', 'norm_type']
  39. def __init__(
  40. self,
  41. ch_in,
  42. ch_out=128,
  43. num_classes=80,
  44. conv_num=2,
  45. dcn_head=False,
  46. lite_head=False,
  47. norm_type='bn', ):
  48. super(HMHead, self).__init__()
  49. head_conv = nn.Sequential()
  50. for i in range(conv_num):
  51. name = 'conv.{}'.format(i)
  52. if lite_head:
  53. lite_name = 'hm.' + name
  54. head_conv.add_sublayer(
  55. lite_name,
  56. LiteConv(
  57. in_channels=ch_in if i == 0 else ch_out,
  58. out_channels=ch_out,
  59. norm_type=norm_type))
  60. head_conv.add_sublayer(lite_name + '.act', nn.ReLU6())
  61. else:
  62. if dcn_head:
  63. head_conv.add_sublayer(
  64. name,
  65. DeformableConvV2(
  66. in_channels=ch_in if i == 0 else ch_out,
  67. out_channels=ch_out,
  68. kernel_size=3,
  69. weight_attr=ParamAttr(initializer=Normal(0, 0.01))))
  70. else:
  71. head_conv.add_sublayer(
  72. name,
  73. nn.Conv2D(
  74. in_channels=ch_in if i == 0 else ch_out,
  75. out_channels=ch_out,
  76. kernel_size=3,
  77. padding=1,
  78. weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
  79. bias_attr=ParamAttr(
  80. learning_rate=2., regularizer=L2Decay(0.))))
  81. head_conv.add_sublayer(name + '.act', nn.ReLU())
  82. self.feat = head_conv
  83. bias_init = float(-np.log((1 - 0.01) / 0.01))
  84. self.head = nn.Conv2D(
  85. in_channels=ch_out,
  86. out_channels=num_classes,
  87. kernel_size=1,
  88. weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
  89. bias_attr=ParamAttr(
  90. learning_rate=2.,
  91. regularizer=L2Decay(0.),
  92. initializer=Constant(bias_init)))
  93. def forward(self, feat):
  94. out = self.feat(feat)
  95. out = self.head(out)
  96. return out
  97. @register
  98. class WHHead(nn.Layer):
  99. """
  100. Args:
  101. ch_in (int): The channel number of input Tensor.
  102. ch_out (int): The channel number of output Tensor.
  103. conv_num (int): The convolution number of wh_feat.
  104. dcn_head(bool): whether use dcn in head. False by default.
  105. lite_head(bool): whether use lite version. False by default.
  106. norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
  107. bn by default
  108. Return:
  109. Width & Height head output
  110. """
  111. __shared__ = ['norm_type']
  112. def __init__(self,
  113. ch_in,
  114. ch_out=64,
  115. conv_num=2,
  116. dcn_head=False,
  117. lite_head=False,
  118. norm_type='bn'):
  119. super(WHHead, self).__init__()
  120. head_conv = nn.Sequential()
  121. for i in range(conv_num):
  122. name = 'conv.{}'.format(i)
  123. if lite_head:
  124. lite_name = 'wh.' + name
  125. head_conv.add_sublayer(
  126. lite_name,
  127. LiteConv(
  128. in_channels=ch_in if i == 0 else ch_out,
  129. out_channels=ch_out,
  130. norm_type=norm_type))
  131. head_conv.add_sublayer(lite_name + '.act', nn.ReLU6())
  132. else:
  133. if dcn_head:
  134. head_conv.add_sublayer(
  135. name,
  136. DeformableConvV2(
  137. in_channels=ch_in if i == 0 else ch_out,
  138. out_channels=ch_out,
  139. kernel_size=3,
  140. weight_attr=ParamAttr(initializer=Normal(0, 0.01))))
  141. else:
  142. head_conv.add_sublayer(
  143. name,
  144. nn.Conv2D(
  145. in_channels=ch_in if i == 0 else ch_out,
  146. out_channels=ch_out,
  147. kernel_size=3,
  148. padding=1,
  149. weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
  150. bias_attr=ParamAttr(
  151. learning_rate=2., regularizer=L2Decay(0.))))
  152. head_conv.add_sublayer(name + '.act', nn.ReLU())
  153. self.feat = head_conv
  154. self.head = nn.Conv2D(
  155. in_channels=ch_out,
  156. out_channels=4,
  157. kernel_size=1,
  158. weight_attr=ParamAttr(initializer=Normal(0, 0.001)),
  159. bias_attr=ParamAttr(
  160. learning_rate=2., regularizer=L2Decay(0.)))
  161. def forward(self, feat):
  162. out = self.feat(feat)
  163. out = self.head(out)
  164. out = F.relu(out)
  165. return out
  166. @register
  167. class TTFHead(nn.Layer):
  168. """
  169. TTFHead
  170. Args:
  171. in_channels (int): the channel number of input to TTFHead.
  172. num_classes (int): the number of classes, 80 by default.
  173. hm_head_planes (int): the channel number in heatmap head,
  174. 128 by default.
  175. wh_head_planes (int): the channel number in width & height head,
  176. 64 by default.
  177. hm_head_conv_num (int): the number of convolution in heatmap head,
  178. 2 by default.
  179. wh_head_conv_num (int): the number of convolution in width & height
  180. head, 2 by default.
  181. hm_loss (object): Instance of 'CTFocalLoss'.
  182. wh_loss (object): Instance of 'GIoULoss'.
  183. wh_offset_base (float): the base offset of width and height,
  184. 16.0 by default.
  185. down_ratio (int): the actual down_ratio is calculated by base_down_ratio
  186. (default 16) and the number of upsample layers.
  187. lite_head(bool): whether use lite version. False by default.
  188. norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
  189. bn by default
  190. ags_module(bool): whether use AGS module to reweight location feature.
  191. false by default.
  192. """
  193. __shared__ = ['num_classes', 'down_ratio', 'norm_type']
  194. __inject__ = ['hm_loss', 'wh_loss']
  195. def __init__(self,
  196. in_channels,
  197. num_classes=80,
  198. hm_head_planes=128,
  199. wh_head_planes=64,
  200. hm_head_conv_num=2,
  201. wh_head_conv_num=2,
  202. hm_loss='CTFocalLoss',
  203. wh_loss='GIoULoss',
  204. wh_offset_base=16.,
  205. down_ratio=4,
  206. dcn_head=False,
  207. lite_head=False,
  208. norm_type='bn',
  209. ags_module=False):
  210. super(TTFHead, self).__init__()
  211. self.in_channels = in_channels
  212. self.hm_head = HMHead(in_channels, hm_head_planes, num_classes,
  213. hm_head_conv_num, dcn_head, lite_head, norm_type)
  214. self.wh_head = WHHead(in_channels, wh_head_planes, wh_head_conv_num,
  215. dcn_head, lite_head, norm_type)
  216. self.hm_loss = hm_loss
  217. self.wh_loss = wh_loss
  218. self.wh_offset_base = wh_offset_base
  219. self.down_ratio = down_ratio
  220. self.ags_module = ags_module
  221. @classmethod
  222. def from_config(cls, cfg, input_shape):
  223. if isinstance(input_shape, (list, tuple)):
  224. input_shape = input_shape[0]
  225. return {'in_channels': input_shape.channels, }
  226. def forward(self, feats):
  227. hm = self.hm_head(feats)
  228. wh = self.wh_head(feats) * self.wh_offset_base
  229. return hm, wh
  230. def filter_box_by_weight(self, pred, target, weight):
  231. """
  232. Filter out boxes where ttf_reg_weight is 0, only keep positive samples.
  233. """
  234. index = paddle.nonzero(weight > 0)
  235. index.stop_gradient = True
  236. weight = paddle.gather_nd(weight, index)
  237. pred = paddle.gather_nd(pred, index)
  238. target = paddle.gather_nd(target, index)
  239. return pred, target, weight
  240. def filter_loc_by_weight(self, score, weight):
  241. index = paddle.nonzero(weight > 0)
  242. index.stop_gradient = True
  243. score = paddle.gather_nd(score, index)
  244. return score
  245. def get_loss(self, pred_hm, pred_wh, target_hm, box_target, target_weight):
  246. pred_hm = paddle.clip(F.sigmoid(pred_hm), 1e-4, 1 - 1e-4)
  247. hm_loss = self.hm_loss(pred_hm, target_hm)
  248. H, W = target_hm.shape[2:]
  249. mask = paddle.reshape(target_weight, [-1, H, W])
  250. avg_factor = paddle.sum(mask) + 1e-4
  251. base_step = self.down_ratio
  252. shifts_x = paddle.arange(0, W * base_step, base_step, dtype='int32')
  253. shifts_y = paddle.arange(0, H * base_step, base_step, dtype='int32')
  254. shift_y, shift_x = paddle.tensor.meshgrid([shifts_y, shifts_x])
  255. base_loc = paddle.stack([shift_x, shift_y], axis=0)
  256. base_loc.stop_gradient = True
  257. pred_boxes = paddle.concat(
  258. [0 - pred_wh[:, 0:2, :, :] + base_loc, pred_wh[:, 2:4] + base_loc],
  259. axis=1)
  260. pred_boxes = paddle.transpose(pred_boxes, [0, 2, 3, 1])
  261. boxes = paddle.transpose(box_target, [0, 2, 3, 1])
  262. boxes.stop_gradient = True
  263. if self.ags_module:
  264. pred_hm_max = paddle.max(pred_hm, axis=1, keepdim=True)
  265. pred_hm_max_softmax = F.softmax(pred_hm_max, axis=1)
  266. pred_hm_max_softmax = paddle.transpose(pred_hm_max_softmax,
  267. [0, 2, 3, 1])
  268. pred_hm_max_softmax = self.filter_loc_by_weight(pred_hm_max_softmax,
  269. mask)
  270. else:
  271. pred_hm_max_softmax = None
  272. pred_boxes, boxes, mask = self.filter_box_by_weight(pred_boxes, boxes,
  273. mask)
  274. mask.stop_gradient = True
  275. wh_loss = self.wh_loss(
  276. pred_boxes,
  277. boxes,
  278. iou_weight=mask.unsqueeze(1),
  279. loc_reweight=pred_hm_max_softmax)
  280. wh_loss = wh_loss / avg_factor
  281. ttf_loss = {'hm_loss': hm_loss, 'wh_loss': wh_loss}
  282. return ttf_loss