darknet.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import paddle
  15. import paddle.nn as nn
  16. import paddle.nn.functional as F
  17. from paddlex.ppdet.core.workspace import register, serializable
  18. from paddlex.ppdet.modeling.ops import batch_norm, mish
  19. from ..shape_spec import ShapeSpec
  20. __all__ = ['DarkNet', 'ConvBNLayer']
  21. class ConvBNLayer(nn.Layer):
  22. def __init__(self,
  23. ch_in,
  24. ch_out,
  25. filter_size=3,
  26. stride=1,
  27. groups=1,
  28. padding=0,
  29. norm_type='bn',
  30. norm_decay=0.,
  31. act="leaky",
  32. freeze_norm=False,
  33. data_format='NCHW',
  34. name=''):
  35. """
  36. conv + bn + activation layer
  37. Args:
  38. ch_in (int): input channel
  39. ch_out (int): output channel
  40. filter_size (int): filter size, default 3
  41. stride (int): stride, default 1
  42. groups (int): number of groups of conv layer, default 1
  43. padding (int): padding size, default 0
  44. norm_type (str): batch norm type, default bn
  45. norm_decay (str): decay for weight and bias of batch norm layer, default 0.
  46. act (str): activation function type, default 'leaky', which means leaky_relu
  47. freeze_norm (bool): whether to freeze norm, default False
  48. data_format (str): data format, NCHW or NHWC
  49. """
  50. super(ConvBNLayer, self).__init__()
  51. self.conv = nn.Conv2D(
  52. in_channels=ch_in,
  53. out_channels=ch_out,
  54. kernel_size=filter_size,
  55. stride=stride,
  56. padding=padding,
  57. groups=groups,
  58. data_format=data_format,
  59. bias_attr=False)
  60. self.batch_norm = batch_norm(
  61. ch_out,
  62. norm_type=norm_type,
  63. norm_decay=norm_decay,
  64. freeze_norm=freeze_norm,
  65. data_format=data_format)
  66. self.act = act
  67. def forward(self, inputs):
  68. out = self.conv(inputs)
  69. out = self.batch_norm(out)
  70. if self.act == 'leaky':
  71. out = F.leaky_relu(out, 0.1)
  72. elif self.act == 'mish':
  73. out = mish(out)
  74. return out
  75. class DownSample(nn.Layer):
  76. def __init__(self,
  77. ch_in,
  78. ch_out,
  79. filter_size=3,
  80. stride=2,
  81. padding=1,
  82. norm_type='bn',
  83. norm_decay=0.,
  84. freeze_norm=False,
  85. data_format='NCHW'):
  86. """
  87. downsample layer
  88. Args:
  89. ch_in (int): input channel
  90. ch_out (int): output channel
  91. filter_size (int): filter size, default 3
  92. stride (int): stride, default 2
  93. padding (int): padding size, default 1
  94. norm_type (str): batch norm type, default bn
  95. norm_decay (str): decay for weight and bias of batch norm layer, default 0.
  96. freeze_norm (bool): whether to freeze norm, default False
  97. data_format (str): data format, NCHW or NHWC
  98. """
  99. super(DownSample, self).__init__()
  100. self.conv_bn_layer = ConvBNLayer(
  101. ch_in=ch_in,
  102. ch_out=ch_out,
  103. filter_size=filter_size,
  104. stride=stride,
  105. padding=padding,
  106. norm_type=norm_type,
  107. norm_decay=norm_decay,
  108. freeze_norm=freeze_norm,
  109. data_format=data_format)
  110. self.ch_out = ch_out
  111. def forward(self, inputs):
  112. out = self.conv_bn_layer(inputs)
  113. return out
  114. class BasicBlock(nn.Layer):
  115. def __init__(self,
  116. ch_in,
  117. ch_out,
  118. norm_type='bn',
  119. norm_decay=0.,
  120. freeze_norm=False,
  121. data_format='NCHW'):
  122. """
  123. BasicBlock layer of DarkNet
  124. Args:
  125. ch_in (int): input channel
  126. ch_out (int): output channel
  127. norm_type (str): batch norm type, default bn
  128. norm_decay (str): decay for weight and bias of batch norm layer, default 0.
  129. freeze_norm (bool): whether to freeze norm, default False
  130. data_format (str): data format, NCHW or NHWC
  131. """
  132. super(BasicBlock, self).__init__()
  133. self.conv1 = ConvBNLayer(
  134. ch_in=ch_in,
  135. ch_out=ch_out,
  136. filter_size=1,
  137. stride=1,
  138. padding=0,
  139. norm_type=norm_type,
  140. norm_decay=norm_decay,
  141. freeze_norm=freeze_norm,
  142. data_format=data_format)
  143. self.conv2 = ConvBNLayer(
  144. ch_in=ch_out,
  145. ch_out=ch_out * 2,
  146. filter_size=3,
  147. stride=1,
  148. padding=1,
  149. norm_type=norm_type,
  150. norm_decay=norm_decay,
  151. freeze_norm=freeze_norm,
  152. data_format=data_format)
  153. def forward(self, inputs):
  154. conv1 = self.conv1(inputs)
  155. conv2 = self.conv2(conv1)
  156. out = paddle.add(x=inputs, y=conv2)
  157. return out
  158. class Blocks(nn.Layer):
  159. def __init__(self,
  160. ch_in,
  161. ch_out,
  162. count,
  163. norm_type='bn',
  164. norm_decay=0.,
  165. freeze_norm=False,
  166. name=None,
  167. data_format='NCHW'):
  168. """
  169. Blocks layer, which consist of some BaickBlock layers
  170. Args:
  171. ch_in (int): input channel
  172. ch_out (int): output channel
  173. count (int): number of BasicBlock layer
  174. norm_type (str): batch norm type, default bn
  175. norm_decay (str): decay for weight and bias of batch norm layer, default 0.
  176. freeze_norm (bool): whether to freeze norm, default False
  177. name (str): layer name
  178. data_format (str): data format, NCHW or NHWC
  179. """
  180. super(Blocks, self).__init__()
  181. self.basicblock0 = BasicBlock(
  182. ch_in,
  183. ch_out,
  184. norm_type=norm_type,
  185. norm_decay=norm_decay,
  186. freeze_norm=freeze_norm,
  187. data_format=data_format)
  188. self.res_out_list = []
  189. for i in range(1, count):
  190. block_name = '{}.{}'.format(name, i)
  191. res_out = self.add_sublayer(
  192. block_name,
  193. BasicBlock(
  194. ch_out * 2,
  195. ch_out,
  196. norm_type=norm_type,
  197. norm_decay=norm_decay,
  198. freeze_norm=freeze_norm,
  199. data_format=data_format))
  200. self.res_out_list.append(res_out)
  201. self.ch_out = ch_out
  202. def forward(self, inputs):
  203. y = self.basicblock0(inputs)
  204. for basic_block_i in self.res_out_list:
  205. y = basic_block_i(y)
  206. return y
  207. DarkNet_cfg = {53: ([1, 2, 8, 8, 4])}
  208. @register
  209. @serializable
  210. class DarkNet(nn.Layer):
  211. __shared__ = ['norm_type', 'data_format']
  212. def __init__(self,
  213. depth=53,
  214. freeze_at=-1,
  215. return_idx=[2, 3, 4],
  216. num_stages=5,
  217. norm_type='bn',
  218. norm_decay=0.,
  219. freeze_norm=False,
  220. data_format='NCHW'):
  221. """
  222. Darknet, see https://pjreddie.com/darknet/yolo/
  223. Args:
  224. depth (int): depth of network
  225. freeze_at (int): freeze the backbone at which stage
  226. filter_size (int): filter size, default 3
  227. return_idx (list): index of stages whose feature maps are returned
  228. norm_type (str): batch norm type, default bn
  229. norm_decay (str): decay for weight and bias of batch norm layer, default 0.
  230. data_format (str): data format, NCHW or NHWC
  231. """
  232. super(DarkNet, self).__init__()
  233. self.depth = depth
  234. self.freeze_at = freeze_at
  235. self.return_idx = return_idx
  236. self.num_stages = num_stages
  237. self.stages = DarkNet_cfg[self.depth][0:num_stages]
  238. self.conv0 = ConvBNLayer(
  239. ch_in=3,
  240. ch_out=32,
  241. filter_size=3,
  242. stride=1,
  243. padding=1,
  244. norm_type=norm_type,
  245. norm_decay=norm_decay,
  246. freeze_norm=freeze_norm,
  247. data_format=data_format)
  248. self.downsample0 = DownSample(
  249. ch_in=32,
  250. ch_out=32 * 2,
  251. norm_type=norm_type,
  252. norm_decay=norm_decay,
  253. freeze_norm=freeze_norm,
  254. data_format=data_format)
  255. self._out_channels = []
  256. self.darknet_conv_block_list = []
  257. self.downsample_list = []
  258. ch_in = [64, 128, 256, 512, 1024]
  259. for i, stage in enumerate(self.stages):
  260. name = 'stage.{}'.format(i)
  261. conv_block = self.add_sublayer(
  262. name,
  263. Blocks(
  264. int(ch_in[i]),
  265. 32 * (2**i),
  266. stage,
  267. norm_type=norm_type,
  268. norm_decay=norm_decay,
  269. freeze_norm=freeze_norm,
  270. data_format=data_format,
  271. name=name))
  272. self.darknet_conv_block_list.append(conv_block)
  273. if i in return_idx:
  274. self._out_channels.append(64 * (2**i))
  275. for i in range(num_stages - 1):
  276. down_name = 'stage.{}.downsample'.format(i)
  277. downsample = self.add_sublayer(
  278. down_name,
  279. DownSample(
  280. ch_in=32 * (2**(i + 1)),
  281. ch_out=32 * (2**(i + 2)),
  282. norm_type=norm_type,
  283. norm_decay=norm_decay,
  284. freeze_norm=freeze_norm,
  285. data_format=data_format))
  286. self.downsample_list.append(downsample)
  287. def forward(self, inputs):
  288. x = inputs['image']
  289. out = self.conv0(x)
  290. out = self.downsample0(out)
  291. blocks = []
  292. for i, conv_block_i in enumerate(self.darknet_conv_block_list):
  293. out = conv_block_i(out)
  294. if i == self.freeze_at:
  295. out.stop_gradient = True
  296. if i in self.return_idx:
  297. blocks.append(out)
  298. if i < self.num_stages - 1:
  299. out = self.downsample_list[i](out)
  300. return blocks
  301. @property
  302. def out_shape(self):
  303. return [ShapeSpec(channels=c) for c in self._out_channels]