mobilenet_v3.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import paddle
  18. from paddle import ParamAttr
  19. import paddle.nn as nn
  20. import paddle.nn.functional as F
  21. from paddle.nn.functional import hardswish, hardsigmoid
  22. from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
  23. from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
  24. from paddle.regularizer import L2Decay
  25. __all__ = ["MobileNetV3_small", "MobileNetV3_large"]
  26. def make_divisible(v, divisor=8, min_value=None):
  27. if min_value is None:
  28. min_value = divisor
  29. new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
  30. if new_v < 0.9 * v:
  31. new_v += divisor
  32. return new_v
  33. class MobileNetV3(nn.Layer):
  34. def __init__(self,
  35. scale=1.0,
  36. model_name="small",
  37. dropout_prob=0.2,
  38. class_dim=1000):
  39. super(MobileNetV3, self).__init__()
  40. inplanes = 16
  41. if model_name == "large":
  42. self.cfg = [
  43. # k, exp, c, se, nl, s,
  44. [3, 16, 16, False, "relu", 1],
  45. [3, 64, 24, False, "relu", 2],
  46. [3, 72, 24, False, "relu", 1],
  47. [5, 72, 40, True, "relu", 2],
  48. [5, 120, 40, True, "relu", 1],
  49. [5, 120, 40, True, "relu", 1],
  50. [3, 240, 80, False, "hardswish", 2],
  51. [3, 200, 80, False, "hardswish", 1],
  52. [3, 184, 80, False, "hardswish", 1],
  53. [3, 184, 80, False, "hardswish", 1],
  54. [3, 480, 112, True, "hardswish", 1],
  55. [3, 672, 112, True, "hardswish", 1],
  56. [5, 672, 160, True, "hardswish", 2],
  57. [5, 960, 160, True, "hardswish", 1],
  58. [5, 960, 160, True, "hardswish", 1],
  59. ]
  60. self.cls_ch_squeeze = 960
  61. self.cls_ch_expand = 1280
  62. elif model_name == "small":
  63. self.cfg = [
  64. # k, exp, c, se, nl, s,
  65. [3, 16, 16, True, "relu", 2],
  66. [3, 72, 24, False, "relu", 2],
  67. [3, 88, 24, False, "relu", 1],
  68. [5, 96, 40, True, "hardswish", 2],
  69. [5, 240, 40, True, "hardswish", 1],
  70. [5, 240, 40, True, "hardswish", 1],
  71. [5, 120, 48, True, "hardswish", 1],
  72. [5, 144, 48, True, "hardswish", 1],
  73. [5, 288, 96, True, "hardswish", 2],
  74. [5, 576, 96, True, "hardswish", 1],
  75. [5, 576, 96, True, "hardswish", 1],
  76. ]
  77. self.cls_ch_squeeze = 576
  78. self.cls_ch_expand = 1280
  79. else:
  80. raise NotImplementedError(
  81. "mode[{}_model] is not implemented!".format(model_name))
  82. self.conv1 = ConvBNLayer(
  83. in_c=3,
  84. out_c=make_divisible(inplanes * scale),
  85. filter_size=3,
  86. stride=2,
  87. padding=1,
  88. num_groups=1,
  89. if_act=True,
  90. act="hardswish",
  91. name="conv1")
  92. self.block_list = []
  93. i = 0
  94. inplanes = make_divisible(inplanes * scale)
  95. for (k, exp, c, se, nl, s) in self.cfg:
  96. block = self.add_sublayer(
  97. "conv" + str(i + 2),
  98. ResidualUnit(
  99. in_c=inplanes,
  100. mid_c=make_divisible(scale * exp),
  101. out_c=make_divisible(scale * c),
  102. filter_size=k,
  103. stride=s,
  104. use_se=se,
  105. act=nl,
  106. name="conv" + str(i + 2)))
  107. self.block_list.append(block)
  108. inplanes = make_divisible(scale * c)
  109. i += 1
  110. self.last_second_conv = ConvBNLayer(
  111. in_c=inplanes,
  112. out_c=make_divisible(scale * self.cls_ch_squeeze),
  113. filter_size=1,
  114. stride=1,
  115. padding=0,
  116. num_groups=1,
  117. if_act=True,
  118. act="hardswish",
  119. name="conv_last")
  120. self.pool = AdaptiveAvgPool2D(1)
  121. self.last_conv = Conv2D(
  122. in_channels=make_divisible(scale * self.cls_ch_squeeze),
  123. out_channels=self.cls_ch_expand,
  124. kernel_size=1,
  125. stride=1,
  126. padding=0,
  127. weight_attr=ParamAttr(name="last_1x1_conv_weights"),
  128. bias_attr=False)
  129. self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer")
  130. self.out = Linear(
  131. self.cls_ch_expand,
  132. class_dim,
  133. weight_attr=ParamAttr("fc_weights"),
  134. bias_attr=ParamAttr(name="fc_offset"))
  135. def forward(self, inputs, label=None):
  136. x = self.conv1(inputs)
  137. for block in self.block_list:
  138. x = block(x)
  139. x = self.last_second_conv(x)
  140. x = self.pool(x)
  141. x = self.last_conv(x)
  142. x = hardswish(x)
  143. x = self.dropout(x)
  144. x = paddle.flatten(x, start_axis=1, stop_axis=-1)
  145. x = self.out(x)
  146. return x
  147. class ConvBNLayer(nn.Layer):
  148. def __init__(self,
  149. in_c,
  150. out_c,
  151. filter_size,
  152. stride,
  153. padding,
  154. num_groups=1,
  155. if_act=True,
  156. act=None,
  157. use_cudnn=True,
  158. name=""):
  159. super(ConvBNLayer, self).__init__()
  160. self.if_act = if_act
  161. self.act = act
  162. self.conv = Conv2D(
  163. in_channels=in_c,
  164. out_channels=out_c,
  165. kernel_size=filter_size,
  166. stride=stride,
  167. padding=padding,
  168. groups=num_groups,
  169. weight_attr=ParamAttr(name=name + "_weights"),
  170. bias_attr=False)
  171. self.bn = BatchNorm(
  172. num_channels=out_c,
  173. act=None,
  174. param_attr=ParamAttr(
  175. name=name + "_bn_scale", regularizer=L2Decay(0.0)),
  176. bias_attr=ParamAttr(
  177. name=name + "_bn_offset", regularizer=L2Decay(0.0)),
  178. moving_mean_name=name + "_bn_mean",
  179. moving_variance_name=name + "_bn_variance")
  180. def forward(self, x):
  181. x = self.conv(x)
  182. x = self.bn(x)
  183. if self.if_act:
  184. if self.act == "relu":
  185. x = F.relu(x)
  186. elif self.act == "hardswish":
  187. x = hardswish(x)
  188. else:
  189. print("The activation function is selected incorrectly.")
  190. exit()
  191. return x
  192. class ResidualUnit(nn.Layer):
  193. def __init__(self,
  194. in_c,
  195. mid_c,
  196. out_c,
  197. filter_size,
  198. stride,
  199. use_se,
  200. act=None,
  201. name=''):
  202. super(ResidualUnit, self).__init__()
  203. self.if_shortcut = stride == 1 and in_c == out_c
  204. self.if_se = use_se
  205. self.expand_conv = ConvBNLayer(
  206. in_c=in_c,
  207. out_c=mid_c,
  208. filter_size=1,
  209. stride=1,
  210. padding=0,
  211. if_act=True,
  212. act=act,
  213. name=name + "_expand")
  214. self.bottleneck_conv = ConvBNLayer(
  215. in_c=mid_c,
  216. out_c=mid_c,
  217. filter_size=filter_size,
  218. stride=stride,
  219. padding=int((filter_size - 1) // 2),
  220. num_groups=mid_c,
  221. if_act=True,
  222. act=act,
  223. name=name + "_depthwise")
  224. if self.if_se:
  225. self.mid_se = SEModule(mid_c, name=name + "_se")
  226. self.linear_conv = ConvBNLayer(
  227. in_c=mid_c,
  228. out_c=out_c,
  229. filter_size=1,
  230. stride=1,
  231. padding=0,
  232. if_act=False,
  233. act=None,
  234. name=name + "_linear")
  235. def forward(self, inputs):
  236. x = self.expand_conv(inputs)
  237. x = self.bottleneck_conv(x)
  238. if self.if_se:
  239. x = self.mid_se(x)
  240. x = self.linear_conv(x)
  241. if self.if_shortcut:
  242. x = paddle.add(inputs, x)
  243. return x
  244. class SEModule(nn.Layer):
  245. def __init__(self, channel, reduction=4, name=""):
  246. super(SEModule, self).__init__()
  247. self.avg_pool = AdaptiveAvgPool2D(1)
  248. self.conv1 = Conv2D(
  249. in_channels=channel,
  250. out_channels=channel // reduction,
  251. kernel_size=1,
  252. stride=1,
  253. padding=0,
  254. weight_attr=ParamAttr(name=name + "_1_weights"),
  255. bias_attr=ParamAttr(name=name + "_1_offset"))
  256. self.conv2 = Conv2D(
  257. in_channels=channel // reduction,
  258. out_channels=channel,
  259. kernel_size=1,
  260. stride=1,
  261. padding=0,
  262. weight_attr=ParamAttr(name + "_2_weights"),
  263. bias_attr=ParamAttr(name=name + "_2_offset"))
  264. def forward(self, inputs):
  265. outputs = self.avg_pool(inputs)
  266. outputs = self.conv1(outputs)
  267. outputs = F.relu(outputs)
  268. outputs = self.conv2(outputs)
  269. outputs = hardsigmoid(outputs, slope=0.2, offset=0.5)
  270. return paddle.multiply(x=inputs, y=outputs)
  271. def MobileNetV3_small(scale=1.0, **args):
  272. model = MobileNetV3(model_name="small", scale=scale, **args)
  273. return model
  274. def MobileNetV3_large(scale=1.0, **args):
  275. model = MobileNetV3(model_name="large", scale=scale, **args)
  276. return model