vgg.py 6.7 KB


  1. from __future__ import division
  2. import paddle
  3. import paddle.nn as nn
  4. import paddle.nn.functional as F
  5. from paddle import ParamAttr
  6. from paddle.regularizer import L2Decay
  7. from paddle.nn import Conv2D, MaxPool2D
  8. from paddlex.ppdet.core.workspace import register, serializable
  9. from ..shape_spec import ShapeSpec
  10. __all__ = ['VGG']
  11. VGG_cfg = {16: [2, 2, 3, 3, 3], 19: [2, 2, 4, 4, 4]}
  12. class ConvBlock(nn.Layer):
  13. def __init__(self,
  14. in_channels,
  15. out_channels,
  16. groups,
  17. pool_size=2,
  18. pool_stride=2,
  19. pool_padding=0,
  20. name=None):
  21. super(ConvBlock, self).__init__()
  22. self.groups = groups
  23. self.conv0 = nn.Conv2D(
  24. in_channels=in_channels,
  25. out_channels=out_channels,
  26. kernel_size=3,
  27. stride=1,
  28. padding=1,
  29. weight_attr=ParamAttr(name=name + "1_weights"),
  30. bias_attr=ParamAttr(name=name + "1_bias"))
  31. self.conv_out_list = []
  32. for i in range(1, groups):
  33. conv_out = self.add_sublayer(
  34. 'conv{}'.format(i),
  35. Conv2D(
  36. in_channels=out_channels,
  37. out_channels=out_channels,
  38. kernel_size=3,
  39. stride=1,
  40. padding=1,
  41. weight_attr=ParamAttr(
  42. name=name + "{}_weights".format(i + 1)),
  43. bias_attr=ParamAttr(name=name + "{}_bias".format(i + 1))))
  44. self.conv_out_list.append(conv_out)
  45. self.pool = MaxPool2D(
  46. kernel_size=pool_size,
  47. stride=pool_stride,
  48. padding=pool_padding,
  49. ceil_mode=True)
  50. def forward(self, inputs):
  51. out = self.conv0(inputs)
  52. out = F.relu(out)
  53. for conv_i in self.conv_out_list:
  54. out = conv_i(out)
  55. out = F.relu(out)
  56. pool = self.pool(out)
  57. return out, pool
  58. class ExtraBlock(nn.Layer):
  59. def __init__(self,
  60. in_channels,
  61. mid_channels,
  62. out_channels,
  63. padding,
  64. stride,
  65. kernel_size,
  66. name=None):
  67. super(ExtraBlock, self).__init__()
  68. self.conv0 = Conv2D(
  69. in_channels=in_channels,
  70. out_channels=mid_channels,
  71. kernel_size=1,
  72. stride=1,
  73. padding=0)
  74. self.conv1 = Conv2D(
  75. in_channels=mid_channels,
  76. out_channels=out_channels,
  77. kernel_size=kernel_size,
  78. stride=stride,
  79. padding=padding)
  80. def forward(self, inputs):
  81. out = self.conv0(inputs)
  82. out = F.relu(out)
  83. out = self.conv1(out)
  84. out = F.relu(out)
  85. return out
  86. class L2NormScale(nn.Layer):
  87. def __init__(self, num_channels, scale=1.0):
  88. super(L2NormScale, self).__init__()
  89. self.scale = self.create_parameter(
  90. attr=ParamAttr(initializer=paddle.nn.initializer.Constant(scale)),
  91. shape=[num_channels])
  92. def forward(self, inputs):
  93. out = F.normalize(inputs, axis=1, epsilon=1e-10)
  94. # out = self.scale.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(
  95. # out) * out
  96. out = self.scale.unsqueeze(0).unsqueeze(2).unsqueeze(3) * out
  97. return out
  98. @register
  99. @serializable
  100. class VGG(nn.Layer):
  101. def __init__(self,
  102. depth=16,
  103. normalizations=[20., -1, -1, -1, -1, -1],
  104. extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3],
  105. [128, 256, 0, 1, 3],
  106. [128, 256, 0, 1, 3]]):
  107. super(VGG, self).__init__()
  108. assert depth in [16, 19], \
  109. "depth as 16/19 supported currently, but got {}".format(depth)
  110. self.depth = depth
  111. self.groups = VGG_cfg[depth]
  112. self.normalizations = normalizations
  113. self.extra_block_filters = extra_block_filters
  114. self._out_channels = []
  115. self.conv_block_0 = ConvBlock(
  116. 3, 64, self.groups[0], 2, 2, 0, name="conv1_")
  117. self.conv_block_1 = ConvBlock(
  118. 64, 128, self.groups[1], 2, 2, 0, name="conv2_")
  119. self.conv_block_2 = ConvBlock(
  120. 128, 256, self.groups[2], 2, 2, 0, name="conv3_")
  121. self.conv_block_3 = ConvBlock(
  122. 256, 512, self.groups[3], 2, 2, 0, name="conv4_")
  123. self.conv_block_4 = ConvBlock(
  124. 512, 512, self.groups[4], 3, 1, 1, name="conv5_")
  125. self._out_channels.append(512)
  126. self.fc6 = Conv2D(
  127. in_channels=512,
  128. out_channels=1024,
  129. kernel_size=3,
  130. stride=1,
  131. padding=6,
  132. dilation=6)
  133. self.fc7 = Conv2D(
  134. in_channels=1024,
  135. out_channels=1024,
  136. kernel_size=1,
  137. stride=1,
  138. padding=0)
  139. self._out_channels.append(1024)
  140. # extra block
  141. self.extra_convs = []
  142. last_channels = 1024
  143. for i, v in enumerate(self.extra_block_filters):
  144. assert len(v) == 5, "extra_block_filters size not fix"
  145. extra_conv = self.add_sublayer("conv{}".format(6 + i),
  146. ExtraBlock(last_channels, v[0], v[1],
  147. v[2], v[3], v[4]))
  148. last_channels = v[1]
  149. self.extra_convs.append(extra_conv)
  150. self._out_channels.append(last_channels)
  151. self.norms = []
  152. for i, n in enumerate(self.normalizations):
  153. if n != -1:
  154. norm = self.add_sublayer("norm{}".format(i),
  155. L2NormScale(
  156. self.extra_block_filters[i][1], n))
  157. else:
  158. norm = None
  159. self.norms.append(norm)
  160. def forward(self, inputs):
  161. outputs = []
  162. conv, pool = self.conv_block_0(inputs['image'])
  163. conv, pool = self.conv_block_1(pool)
  164. conv, pool = self.conv_block_2(pool)
  165. conv, pool = self.conv_block_3(pool)
  166. outputs.append(conv)
  167. conv, pool = self.conv_block_4(pool)
  168. out = self.fc6(pool)
  169. out = F.relu(out)
  170. out = self.fc7(out)
  171. out = F.relu(out)
  172. outputs.append(out)
  173. if not self.extra_block_filters:
  174. return outputs
  175. # extra block
  176. for extra_conv in self.extra_convs:
  177. out = extra_conv(out)
  178. outputs.append(out)
  179. for i, n in enumerate(self.normalizations):
  180. if n != -1:
  181. outputs[i] = self.norms[i](outputs[i])
  182. return outputs
  183. @property
  184. def out_shape(self):
  185. return [ShapeSpec(channels=c) for c in self._out_channels]