mobilenet_v3.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from collections import OrderedDict
  15. import paddle.fluid as fluid
  16. from paddle.fluid.param_attr import ParamAttr
  17. from paddle.fluid.regularizer import L2Decay
  18. import math
  19. class MobileNetV3():
  20. """
  21. MobileNet v3, see https://arxiv.org/abs/1905.02244
  22. Args:
  23. scale (float): scaling factor for convolution groups proportion of mobilenet_v3.
  24. model_name (str): There are two modes, small and large.
  25. norm_type (str): normalization type, 'bn' and 'sync_bn' are supported.
  26. norm_decay (float): weight decay for normalization layer weights.
  27. conv_decay (float): weight decay for convolution layer weights.
  28. with_extra_blocks (bool): if extra blocks should be added.
  29. extra_block_filters (list): number of filter for each extra block.
  30. """
  31. def __init__(self,
  32. scale=1.0,
  33. model_name='small',
  34. with_extra_blocks=False,
  35. conv_decay=0.0,
  36. norm_type='bn',
  37. norm_decay=0.0,
  38. extra_block_filters=[[256, 512], [128, 256], [128, 256],
  39. [64, 128]],
  40. num_classes=None,
  41. lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0]):
  42. assert len(lr_mult_list) == 5, \
  43. "lr_mult_list length in MobileNetV3 must be 5 but got {}!!".format(
  44. len(lr_mult_list))
  45. self.scale = scale
  46. self.with_extra_blocks = with_extra_blocks
  47. self.extra_block_filters = extra_block_filters
  48. self.conv_decay = conv_decay
  49. self.norm_decay = norm_decay
  50. self.inplanes = 16
  51. self.end_points = []
  52. self.block_stride = 1
  53. self.num_classes = num_classes
  54. self.lr_mult_list = lr_mult_list
  55. self.curr_stage = 0
  56. if model_name == "large":
  57. self.cfg = [
  58. # kernel_size, expand, channel, se_block, act_mode, stride
  59. [3, 16, 16, False, 'relu', 1],
  60. [3, 64, 24, False, 'relu', 2],
  61. [3, 72, 24, False, 'relu', 1],
  62. [5, 72, 40, True, 'relu', 2],
  63. [5, 120, 40, True, 'relu', 1],
  64. [5, 120, 40, True, 'relu', 1],
  65. [3, 240, 80, False, 'hard_swish', 2],
  66. [3, 200, 80, False, 'hard_swish', 1],
  67. [3, 184, 80, False, 'hard_swish', 1],
  68. [3, 184, 80, False, 'hard_swish', 1],
  69. [3, 480, 112, True, 'hard_swish', 1],
  70. [3, 672, 112, True, 'hard_swish', 1],
  71. [5, 672, 160, True, 'hard_swish', 2],
  72. [5, 960, 160, True, 'hard_swish', 1],
  73. [5, 960, 160, True, 'hard_swish', 1],
  74. ]
  75. self.cls_ch_squeeze = 960
  76. self.cls_ch_expand = 1280
  77. self.lr_interval = 3
  78. elif model_name == "small":
  79. self.cfg = [
  80. # kernel_size, expand, channel, se_block, act_mode, stride
  81. [3, 16, 16, True, 'relu', 2],
  82. [3, 72, 24, False, 'relu', 2],
  83. [3, 88, 24, False, 'relu', 1],
  84. [5, 96, 40, True, 'hard_swish', 2],
  85. [5, 240, 40, True, 'hard_swish', 1],
  86. [5, 240, 40, True, 'hard_swish', 1],
  87. [5, 120, 48, True, 'hard_swish', 1],
  88. [5, 144, 48, True, 'hard_swish', 1],
  89. [5, 288, 96, True, 'hard_swish', 2],
  90. [5, 576, 96, True, 'hard_swish', 1],
  91. [5, 576, 96, True, 'hard_swish', 1],
  92. ]
  93. self.cls_ch_squeeze = 576
  94. self.cls_ch_expand = 1280
  95. self.lr_interval = 2
  96. else:
  97. raise NotImplementedError
  98. def _conv_bn_layer(self,
  99. input,
  100. filter_size,
  101. num_filters,
  102. stride,
  103. padding,
  104. num_groups=1,
  105. if_act=True,
  106. act=None,
  107. name=None,
  108. use_cudnn=True):
  109. lr_idx = self.curr_stage // self.lr_interval
  110. lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
  111. lr_mult = self.lr_mult_list[lr_idx]
  112. conv_param_attr = ParamAttr(name=name + '_weights',
  113. learning_rate=lr_mult,
  114. regularizer=L2Decay(self.conv_decay))
  115. conv = fluid.layers.conv2d(input=input,
  116. num_filters=num_filters,
  117. filter_size=filter_size,
  118. stride=stride,
  119. padding=padding,
  120. groups=num_groups,
  121. act=None,
  122. use_cudnn=use_cudnn,
  123. param_attr=conv_param_attr,
  124. bias_attr=False)
  125. bn_name = name + '_bn'
  126. bn_param_attr = ParamAttr(name=bn_name + "_scale",
  127. regularizer=L2Decay(self.norm_decay))
  128. bn_bias_attr = ParamAttr(name=bn_name + "_offset",
  129. regularizer=L2Decay(self.norm_decay))
  130. bn = fluid.layers.batch_norm(input=conv,
  131. param_attr=bn_param_attr,
  132. bias_attr=bn_bias_attr,
  133. moving_mean_name=bn_name + '_mean',
  134. moving_variance_name=bn_name + '_variance')
  135. if if_act:
  136. if act == 'relu':
  137. bn = fluid.layers.relu(bn)
  138. elif act == 'hard_swish':
  139. bn = self._hard_swish(bn)
  140. elif act == 'relu6':
  141. bn = fluid.layers.relu6(bn)
  142. return bn
  143. def _hard_swish(self, x):
  144. return x * fluid.layers.relu6(x + 3) / 6.
  145. def _se_block(self, input, num_out_filter, ratio=4, name=None):
  146. lr_idx = self.curr_stage // self.lr_interval
  147. lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
  148. lr_mult = self.lr_mult_list[lr_idx]
  149. num_mid_filter = int(num_out_filter // ratio)
  150. pool = fluid.layers.pool2d(input=input,
  151. pool_type='avg',
  152. global_pooling=True,
  153. use_cudnn=False)
  154. conv1 = fluid.layers.conv2d(
  155. input=pool,
  156. filter_size=1,
  157. num_filters=num_mid_filter,
  158. act='relu',
  159. param_attr=ParamAttr(
  160. name=name + '_1_weights', learning_rate=lr_mult),
  161. bias_attr=ParamAttr(
  162. name=name + '_1_offset', learning_rate=lr_mult))
  163. conv2 = fluid.layers.conv2d(
  164. input=conv1,
  165. filter_size=1,
  166. num_filters=num_out_filter,
  167. act='hard_sigmoid',
  168. param_attr=ParamAttr(
  169. name=name + '_2_weights', learning_rate=lr_mult),
  170. bias_attr=ParamAttr(
  171. name=name + '_2_offset', learning_rate=lr_mult))
  172. scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
  173. return scale
  174. def _residual_unit(self,
  175. input,
  176. num_in_filter,
  177. num_mid_filter,
  178. num_out_filter,
  179. stride,
  180. filter_size,
  181. act=None,
  182. use_se=False,
  183. name=None):
  184. input_data = input
  185. conv0 = self._conv_bn_layer(input=input,
  186. filter_size=1,
  187. num_filters=num_mid_filter,
  188. stride=1,
  189. padding=0,
  190. if_act=True,
  191. act=act,
  192. name=name + '_expand')
  193. if self.block_stride == 16 and stride == 2:
  194. self.end_points.append(conv0)
  195. conv1 = self._conv_bn_layer(input=conv0,
  196. filter_size=filter_size,
  197. num_filters=num_mid_filter,
  198. stride=stride,
  199. padding=int((filter_size - 1) // 2),
  200. if_act=True,
  201. act=act,
  202. num_groups=num_mid_filter,
  203. use_cudnn=False,
  204. name=name + '_depthwise')
  205. if use_se:
  206. conv1 = self._se_block(input=conv1,
  207. num_out_filter=num_mid_filter,
  208. name=name + '_se')
  209. conv2 = self._conv_bn_layer(input=conv1,
  210. filter_size=1,
  211. num_filters=num_out_filter,
  212. stride=1,
  213. padding=0,
  214. if_act=False,
  215. name=name + '_linear')
  216. if num_in_filter != num_out_filter or stride != 1:
  217. return conv2
  218. else:
  219. return fluid.layers.elementwise_add(x=input_data, y=conv2, act=None)
  220. def _extra_block_dw(self,
  221. input,
  222. num_filters1,
  223. num_filters2,
  224. stride,
  225. name=None):
  226. pointwise_conv = self._conv_bn_layer(input=input,
  227. filter_size=1,
  228. num_filters=int(num_filters1),
  229. stride=1,
  230. padding="SAME",
  231. act='relu6',
  232. name=name + "_extra1")
  233. depthwise_conv = self._conv_bn_layer(input=pointwise_conv,
  234. filter_size=3,
  235. num_filters=int(num_filters2),
  236. stride=stride,
  237. padding="SAME",
  238. num_groups=int(num_filters1),
  239. act='relu6',
  240. use_cudnn=False,
  241. name=name + "_extra2_dw")
  242. normal_conv = self._conv_bn_layer(input=depthwise_conv,
  243. filter_size=1,
  244. num_filters=int(num_filters2),
  245. stride=1,
  246. padding="SAME",
  247. act='relu6',
  248. name=name + "_extra2_sep")
  249. return normal_conv
  250. def __call__(self, input):
  251. scale = self.scale
  252. inplanes = self.inplanes
  253. cfg = self.cfg
  254. blocks = []
  255. #conv1
  256. conv = self._conv_bn_layer(
  257. input,
  258. filter_size=3,
  259. num_filters=inplanes if scale <= 1.0 else int(inplanes * scale),
  260. stride=2,
  261. padding=1,
  262. num_groups=1,
  263. if_act=True,
  264. act='hard_swish',
  265. name='conv1')
  266. i = 0
  267. for layer_cfg in cfg:
  268. self.block_stride *= layer_cfg[5]
  269. if layer_cfg[5] == 2:
  270. blocks.append(conv)
  271. conv = self._residual_unit(input=conv,
  272. num_in_filter=inplanes,
  273. num_mid_filter=int(scale * layer_cfg[1]),
  274. num_out_filter=int(scale * layer_cfg[2]),
  275. act=layer_cfg[4],
  276. stride=layer_cfg[5],
  277. filter_size=layer_cfg[0],
  278. use_se=layer_cfg[3],
  279. name='conv' + str(i + 2))
  280. inplanes = int(scale * layer_cfg[2])
  281. i += 1
  282. self.curr_stage = i
  283. blocks.append(conv)
  284. if self.num_classes:
  285. conv = self._conv_bn_layer(input=conv,
  286. filter_size=1,
  287. num_filters=int(scale * self.cls_ch_squeeze),
  288. stride=1,
  289. padding=0,
  290. num_groups=1,
  291. if_act=True,
  292. act='hard_swish',
  293. name='conv_last')
  294. conv = fluid.layers.pool2d(input=conv,
  295. pool_type='avg',
  296. global_pooling=True,
  297. use_cudnn=False)
  298. conv = fluid.layers.conv2d(
  299. input=conv,
  300. num_filters=self.cls_ch_expand,
  301. filter_size=1,
  302. stride=1,
  303. padding=0,
  304. act=None,
  305. param_attr=ParamAttr(name='last_1x1_conv_weights'),
  306. bias_attr=False)
  307. conv = self._hard_swish(conv)
  308. drop = fluid.layers.dropout(x=conv, dropout_prob=0.2)
  309. out = fluid.layers.fc(input=drop,
  310. size=self.num_classes,
  311. param_attr=ParamAttr(name='fc_weights'),
  312. bias_attr=ParamAttr(name='fc_offset'))
  313. return OrderedDict([('logits', out)])
  314. if not self.with_extra_blocks:
  315. return blocks
  316. # extra block
  317. conv_extra = self._conv_bn_layer(conv,
  318. filter_size=1,
  319. num_filters=int(scale * cfg[-1][1]),
  320. stride=1,
  321. padding="SAME",
  322. num_groups=1,
  323. if_act=True,
  324. act='hard_swish',
  325. name='conv' + str(i + 2))
  326. self.end_points.append(conv_extra)
  327. i += 1
  328. for block_filter in self.extra_block_filters:
  329. conv_extra = self._extra_block_dw(conv_extra, block_filter[0],
  330. block_filter[1], 2,
  331. 'conv' + str(i + 2))
  332. self.end_points.append(conv_extra)
  333. i += 1
  334. return self.end_points