|
@@ -31,7 +31,6 @@ class MobileNetV3():
|
|
|
with_extra_blocks (bool): if extra blocks should be added.
|
|
with_extra_blocks (bool): if extra blocks should be added.
|
|
|
extra_block_filters (list): number of filter for each extra block.
|
|
extra_block_filters (list): number of filter for each extra block.
|
|
|
"""
|
|
"""
|
|
|
-
|
|
|
|
|
def __init__(self,
|
|
def __init__(self,
|
|
|
scale=1.0,
|
|
scale=1.0,
|
|
|
model_name='small',
|
|
model_name='small',
|
|
@@ -41,7 +40,11 @@ class MobileNetV3():
|
|
|
norm_decay=0.0,
|
|
norm_decay=0.0,
|
|
|
extra_block_filters=[[256, 512], [128, 256], [128, 256],
|
|
extra_block_filters=[[256, 512], [128, 256], [128, 256],
|
|
|
[64, 128]],
|
|
[64, 128]],
|
|
|
- num_classes=None):
|
|
|
|
|
|
|
+ num_classes=None,
|
|
|
|
|
+ lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0]):
|
|
|
|
|
+ assert len(lr_mult_list) == 5, \
|
|
|
|
|
+ "lr_mult_list length in MobileNetV3 must be 5 but got {}!!".format(
|
|
|
|
|
+ len(lr_mult_list))
|
|
|
self.scale = scale
|
|
self.scale = scale
|
|
|
self.with_extra_blocks = with_extra_blocks
|
|
self.with_extra_blocks = with_extra_blocks
|
|
|
self.extra_block_filters = extra_block_filters
|
|
self.extra_block_filters = extra_block_filters
|
|
@@ -51,6 +54,8 @@ class MobileNetV3():
|
|
|
self.end_points = []
|
|
self.end_points = []
|
|
|
self.block_stride = 1
|
|
self.block_stride = 1
|
|
|
self.num_classes = num_classes
|
|
self.num_classes = num_classes
|
|
|
|
|
+ self.lr_mult_list = lr_mult_list
|
|
|
|
|
+ self.curr_stage = 0
|
|
|
if model_name == "large":
|
|
if model_name == "large":
|
|
|
self.cfg = [
|
|
self.cfg = [
|
|
|
# kernel_size, expand, channel, se_block, act_mode, stride
|
|
# kernel_size, expand, channel, se_block, act_mode, stride
|
|
@@ -72,6 +77,7 @@ class MobileNetV3():
|
|
|
]
|
|
]
|
|
|
self.cls_ch_squeeze = 960
|
|
self.cls_ch_squeeze = 960
|
|
|
self.cls_ch_expand = 1280
|
|
self.cls_ch_expand = 1280
|
|
|
|
|
+ self.lr_interval = 3
|
|
|
elif model_name == "small":
|
|
elif model_name == "small":
|
|
|
self.cfg = [
|
|
self.cfg = [
|
|
|
# kernel_size, expand, channel, se_block, act_mode, stride
|
|
# kernel_size, expand, channel, se_block, act_mode, stride
|
|
@@ -89,6 +95,7 @@ class MobileNetV3():
|
|
|
]
|
|
]
|
|
|
self.cls_ch_squeeze = 576
|
|
self.cls_ch_squeeze = 576
|
|
|
self.cls_ch_expand = 1280
|
|
self.cls_ch_expand = 1280
|
|
|
|
|
+ self.lr_interval = 2
|
|
|
else:
|
|
else:
|
|
|
raise NotImplementedError
|
|
raise NotImplementedError
|
|
|
|
|
|
|
@@ -103,30 +110,32 @@ class MobileNetV3():
|
|
|
act=None,
|
|
act=None,
|
|
|
name=None,
|
|
name=None,
|
|
|
use_cudnn=True):
|
|
use_cudnn=True):
|
|
|
- conv_param_attr = ParamAttr(
|
|
|
|
|
- name=name + '_weights', regularizer=L2Decay(self.conv_decay))
|
|
|
|
|
- conv = fluid.layers.conv2d(
|
|
|
|
|
- input=input,
|
|
|
|
|
- num_filters=num_filters,
|
|
|
|
|
- filter_size=filter_size,
|
|
|
|
|
- stride=stride,
|
|
|
|
|
- padding=padding,
|
|
|
|
|
- groups=num_groups,
|
|
|
|
|
- act=None,
|
|
|
|
|
- use_cudnn=use_cudnn,
|
|
|
|
|
- param_attr=conv_param_attr,
|
|
|
|
|
- bias_attr=False)
|
|
|
|
|
|
|
+ lr_idx = self.curr_stage // self.lr_interval
|
|
|
|
|
+ lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
|
|
|
|
|
+ lr_mult = self.lr_mult_list[lr_idx]
|
|
|
|
|
+ conv_param_attr = ParamAttr(name=name + '_weights',
|
|
|
|
|
+ learning_rate=lr_mult,
|
|
|
|
|
+ regularizer=L2Decay(self.conv_decay))
|
|
|
|
|
+ conv = fluid.layers.conv2d(input=input,
|
|
|
|
|
+ num_filters=num_filters,
|
|
|
|
|
+ filter_size=filter_size,
|
|
|
|
|
+ stride=stride,
|
|
|
|
|
+ padding=padding,
|
|
|
|
|
+ groups=num_groups,
|
|
|
|
|
+ act=None,
|
|
|
|
|
+ use_cudnn=use_cudnn,
|
|
|
|
|
+ param_attr=conv_param_attr,
|
|
|
|
|
+ bias_attr=False)
|
|
|
bn_name = name + '_bn'
|
|
bn_name = name + '_bn'
|
|
|
- bn_param_attr = ParamAttr(
|
|
|
|
|
- name=bn_name + "_scale", regularizer=L2Decay(self.norm_decay))
|
|
|
|
|
- bn_bias_attr = ParamAttr(
|
|
|
|
|
- name=bn_name + "_offset", regularizer=L2Decay(self.norm_decay))
|
|
|
|
|
- bn = fluid.layers.batch_norm(
|
|
|
|
|
- input=conv,
|
|
|
|
|
- param_attr=bn_param_attr,
|
|
|
|
|
- bias_attr=bn_bias_attr,
|
|
|
|
|
- moving_mean_name=bn_name + '_mean',
|
|
|
|
|
- moving_variance_name=bn_name + '_variance')
|
|
|
|
|
|
|
+ bn_param_attr = ParamAttr(name=bn_name + "_scale",
|
|
|
|
|
+ regularizer=L2Decay(self.norm_decay))
|
|
|
|
|
+ bn_bias_attr = ParamAttr(name=bn_name + "_offset",
|
|
|
|
|
+ regularizer=L2Decay(self.norm_decay))
|
|
|
|
|
+ bn = fluid.layers.batch_norm(input=conv,
|
|
|
|
|
+ param_attr=bn_param_attr,
|
|
|
|
|
+ bias_attr=bn_bias_attr,
|
|
|
|
|
+ moving_mean_name=bn_name + '_mean',
|
|
|
|
|
+ moving_variance_name=bn_name + '_variance')
|
|
|
if if_act:
|
|
if if_act:
|
|
|
if act == 'relu':
|
|
if act == 'relu':
|
|
|
bn = fluid.layers.relu(bn)
|
|
bn = fluid.layers.relu(bn)
|
|
@@ -140,23 +149,33 @@ class MobileNetV3():
|
|
|
return x * fluid.layers.relu6(x + 3) / 6.
|
|
return x * fluid.layers.relu6(x + 3) / 6.
|
|
|
|
|
|
|
|
def _se_block(self, input, num_out_filter, ratio=4, name=None):
|
|
def _se_block(self, input, num_out_filter, ratio=4, name=None):
|
|
|
|
|
+ lr_idx = self.curr_stage // self.lr_interval
|
|
|
|
|
+ lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
|
|
|
|
|
+ lr_mult = self.lr_mult_list[lr_idx]
|
|
|
|
|
+
|
|
|
num_mid_filter = int(num_out_filter // ratio)
|
|
num_mid_filter = int(num_out_filter // ratio)
|
|
|
- pool = fluid.layers.pool2d(
|
|
|
|
|
- input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
|
|
|
|
|
|
|
+ pool = fluid.layers.pool2d(input=input,
|
|
|
|
|
+ pool_type='avg',
|
|
|
|
|
+ global_pooling=True,
|
|
|
|
|
+ use_cudnn=False)
|
|
|
conv1 = fluid.layers.conv2d(
|
|
conv1 = fluid.layers.conv2d(
|
|
|
input=pool,
|
|
input=pool,
|
|
|
filter_size=1,
|
|
filter_size=1,
|
|
|
num_filters=num_mid_filter,
|
|
num_filters=num_mid_filter,
|
|
|
act='relu',
|
|
act='relu',
|
|
|
- param_attr=ParamAttr(name=name + '_1_weights'),
|
|
|
|
|
- bias_attr=ParamAttr(name=name + '_1_offset'))
|
|
|
|
|
|
|
+ param_attr=ParamAttr(
|
|
|
|
|
+ name=name + '_1_weights', learning_rate=lr_mult),
|
|
|
|
|
+ bias_attr=ParamAttr(
|
|
|
|
|
+ name=name + '_1_offset', learning_rate=lr_mult))
|
|
|
conv2 = fluid.layers.conv2d(
|
|
conv2 = fluid.layers.conv2d(
|
|
|
input=conv1,
|
|
input=conv1,
|
|
|
filter_size=1,
|
|
filter_size=1,
|
|
|
num_filters=num_out_filter,
|
|
num_filters=num_out_filter,
|
|
|
act='hard_sigmoid',
|
|
act='hard_sigmoid',
|
|
|
- param_attr=ParamAttr(name=name + '_2_weights'),
|
|
|
|
|
- bias_attr=ParamAttr(name=name + '_2_offset'))
|
|
|
|
|
|
|
+ param_attr=ParamAttr(
|
|
|
|
|
+ name=name + '_2_weights', learning_rate=lr_mult),
|
|
|
|
|
+ bias_attr=ParamAttr(
|
|
|
|
|
+ name=name + '_2_offset', learning_rate=lr_mult))
|
|
|
|
|
|
|
|
scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
|
|
scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
|
|
|
return scale
|
|
return scale
|
|
@@ -172,46 +191,43 @@ class MobileNetV3():
|
|
|
use_se=False,
|
|
use_se=False,
|
|
|
name=None):
|
|
name=None):
|
|
|
input_data = input
|
|
input_data = input
|
|
|
- conv0 = self._conv_bn_layer(
|
|
|
|
|
- input=input,
|
|
|
|
|
- filter_size=1,
|
|
|
|
|
- num_filters=num_mid_filter,
|
|
|
|
|
- stride=1,
|
|
|
|
|
- padding=0,
|
|
|
|
|
- if_act=True,
|
|
|
|
|
- act=act,
|
|
|
|
|
- name=name + '_expand')
|
|
|
|
|
|
|
+ conv0 = self._conv_bn_layer(input=input,
|
|
|
|
|
+ filter_size=1,
|
|
|
|
|
+ num_filters=num_mid_filter,
|
|
|
|
|
+ stride=1,
|
|
|
|
|
+ padding=0,
|
|
|
|
|
+ if_act=True,
|
|
|
|
|
+ act=act,
|
|
|
|
|
+ name=name + '_expand')
|
|
|
if self.block_stride == 16 and stride == 2:
|
|
if self.block_stride == 16 and stride == 2:
|
|
|
self.end_points.append(conv0)
|
|
self.end_points.append(conv0)
|
|
|
- conv1 = self._conv_bn_layer(
|
|
|
|
|
- input=conv0,
|
|
|
|
|
- filter_size=filter_size,
|
|
|
|
|
- num_filters=num_mid_filter,
|
|
|
|
|
- stride=stride,
|
|
|
|
|
- padding=int((filter_size - 1) // 2),
|
|
|
|
|
- if_act=True,
|
|
|
|
|
- act=act,
|
|
|
|
|
- num_groups=num_mid_filter,
|
|
|
|
|
- use_cudnn=False,
|
|
|
|
|
- name=name + '_depthwise')
|
|
|
|
|
|
|
+ conv1 = self._conv_bn_layer(input=conv0,
|
|
|
|
|
+ filter_size=filter_size,
|
|
|
|
|
+ num_filters=num_mid_filter,
|
|
|
|
|
+ stride=stride,
|
|
|
|
|
+ padding=int((filter_size - 1) // 2),
|
|
|
|
|
+ if_act=True,
|
|
|
|
|
+ act=act,
|
|
|
|
|
+ num_groups=num_mid_filter,
|
|
|
|
|
+ use_cudnn=False,
|
|
|
|
|
+ name=name + '_depthwise')
|
|
|
|
|
|
|
|
if use_se:
|
|
if use_se:
|
|
|
- conv1 = self._se_block(
|
|
|
|
|
- input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
|
|
|
|
|
|
|
+ conv1 = self._se_block(input=conv1,
|
|
|
|
|
+ num_out_filter=num_mid_filter,
|
|
|
|
|
+ name=name + '_se')
|
|
|
|
|
|
|
|
- conv2 = self._conv_bn_layer(
|
|
|
|
|
- input=conv1,
|
|
|
|
|
- filter_size=1,
|
|
|
|
|
- num_filters=num_out_filter,
|
|
|
|
|
- stride=1,
|
|
|
|
|
- padding=0,
|
|
|
|
|
- if_act=False,
|
|
|
|
|
- name=name + '_linear')
|
|
|
|
|
|
|
+ conv2 = self._conv_bn_layer(input=conv1,
|
|
|
|
|
+ filter_size=1,
|
|
|
|
|
+ num_filters=num_out_filter,
|
|
|
|
|
+ stride=1,
|
|
|
|
|
+ padding=0,
|
|
|
|
|
+ if_act=False,
|
|
|
|
|
+ name=name + '_linear')
|
|
|
if num_in_filter != num_out_filter or stride != 1:
|
|
if num_in_filter != num_out_filter or stride != 1:
|
|
|
return conv2
|
|
return conv2
|
|
|
else:
|
|
else:
|
|
|
- return fluid.layers.elementwise_add(
|
|
|
|
|
- x=input_data, y=conv2, act=None)
|
|
|
|
|
|
|
+ return fluid.layers.elementwise_add(x=input_data, y=conv2, act=None)
|
|
|
|
|
|
|
|
def _extra_block_dw(self,
|
|
def _extra_block_dw(self,
|
|
|
input,
|
|
input,
|
|
@@ -219,32 +235,29 @@ class MobileNetV3():
|
|
|
num_filters2,
|
|
num_filters2,
|
|
|
stride,
|
|
stride,
|
|
|
name=None):
|
|
name=None):
|
|
|
- pointwise_conv = self._conv_bn_layer(
|
|
|
|
|
- input=input,
|
|
|
|
|
- filter_size=1,
|
|
|
|
|
- num_filters=int(num_filters1),
|
|
|
|
|
- stride=1,
|
|
|
|
|
- padding="SAME",
|
|
|
|
|
- act='relu6',
|
|
|
|
|
- name=name + "_extra1")
|
|
|
|
|
- depthwise_conv = self._conv_bn_layer(
|
|
|
|
|
- input=pointwise_conv,
|
|
|
|
|
- filter_size=3,
|
|
|
|
|
- num_filters=int(num_filters2),
|
|
|
|
|
- stride=stride,
|
|
|
|
|
- padding="SAME",
|
|
|
|
|
- num_groups=int(num_filters1),
|
|
|
|
|
- act='relu6',
|
|
|
|
|
- use_cudnn=False,
|
|
|
|
|
- name=name + "_extra2_dw")
|
|
|
|
|
- normal_conv = self._conv_bn_layer(
|
|
|
|
|
- input=depthwise_conv,
|
|
|
|
|
- filter_size=1,
|
|
|
|
|
- num_filters=int(num_filters2),
|
|
|
|
|
- stride=1,
|
|
|
|
|
- padding="SAME",
|
|
|
|
|
- act='relu6',
|
|
|
|
|
- name=name + "_extra2_sep")
|
|
|
|
|
|
|
+ pointwise_conv = self._conv_bn_layer(input=input,
|
|
|
|
|
+ filter_size=1,
|
|
|
|
|
+ num_filters=int(num_filters1),
|
|
|
|
|
+ stride=1,
|
|
|
|
|
+ padding="SAME",
|
|
|
|
|
+ act='relu6',
|
|
|
|
|
+ name=name + "_extra1")
|
|
|
|
|
+ depthwise_conv = self._conv_bn_layer(input=pointwise_conv,
|
|
|
|
|
+ filter_size=3,
|
|
|
|
|
+ num_filters=int(num_filters2),
|
|
|
|
|
+ stride=stride,
|
|
|
|
|
+ padding="SAME",
|
|
|
|
|
+ num_groups=int(num_filters1),
|
|
|
|
|
+ act='relu6',
|
|
|
|
|
+ use_cudnn=False,
|
|
|
|
|
+ name=name + "_extra2_dw")
|
|
|
|
|
+ normal_conv = self._conv_bn_layer(input=depthwise_conv,
|
|
|
|
|
+ filter_size=1,
|
|
|
|
|
+ num_filters=int(num_filters2),
|
|
|
|
|
+ stride=1,
|
|
|
|
|
+ padding="SAME",
|
|
|
|
|
+ act='relu6',
|
|
|
|
|
+ name=name + "_extra2_sep")
|
|
|
return normal_conv
|
|
return normal_conv
|
|
|
|
|
|
|
|
def __call__(self, input):
|
|
def __call__(self, input):
|
|
@@ -269,38 +282,36 @@ class MobileNetV3():
|
|
|
self.block_stride *= layer_cfg[5]
|
|
self.block_stride *= layer_cfg[5]
|
|
|
if layer_cfg[5] == 2:
|
|
if layer_cfg[5] == 2:
|
|
|
blocks.append(conv)
|
|
blocks.append(conv)
|
|
|
- conv = self._residual_unit(
|
|
|
|
|
- input=conv,
|
|
|
|
|
- num_in_filter=inplanes,
|
|
|
|
|
- num_mid_filter=int(scale * layer_cfg[1]),
|
|
|
|
|
- num_out_filter=int(scale * layer_cfg[2]),
|
|
|
|
|
- act=layer_cfg[4],
|
|
|
|
|
- stride=layer_cfg[5],
|
|
|
|
|
- filter_size=layer_cfg[0],
|
|
|
|
|
- use_se=layer_cfg[3],
|
|
|
|
|
- name='conv' + str(i + 2))
|
|
|
|
|
-
|
|
|
|
|
|
|
+ conv = self._residual_unit(input=conv,
|
|
|
|
|
+ num_in_filter=inplanes,
|
|
|
|
|
+ num_mid_filter=int(scale * layer_cfg[1]),
|
|
|
|
|
+ num_out_filter=int(scale * layer_cfg[2]),
|
|
|
|
|
+ act=layer_cfg[4],
|
|
|
|
|
+ stride=layer_cfg[5],
|
|
|
|
|
+ filter_size=layer_cfg[0],
|
|
|
|
|
+ use_se=layer_cfg[3],
|
|
|
|
|
+ name='conv' + str(i + 2))
|
|
|
|
|
+
|
|
|
inplanes = int(scale * layer_cfg[2])
|
|
inplanes = int(scale * layer_cfg[2])
|
|
|
i += 1
|
|
i += 1
|
|
|
|
|
+ self.curr_stage = i
|
|
|
blocks.append(conv)
|
|
blocks.append(conv)
|
|
|
|
|
|
|
|
if self.num_classes:
|
|
if self.num_classes:
|
|
|
- conv = self._conv_bn_layer(
|
|
|
|
|
- input=conv,
|
|
|
|
|
- filter_size=1,
|
|
|
|
|
- num_filters=int(scale * self.cls_ch_squeeze),
|
|
|
|
|
- stride=1,
|
|
|
|
|
- padding=0,
|
|
|
|
|
- num_groups=1,
|
|
|
|
|
- if_act=True,
|
|
|
|
|
- act='hard_swish',
|
|
|
|
|
- name='conv_last')
|
|
|
|
|
-
|
|
|
|
|
- conv = fluid.layers.pool2d(
|
|
|
|
|
- input=conv,
|
|
|
|
|
- pool_type='avg',
|
|
|
|
|
- global_pooling=True,
|
|
|
|
|
- use_cudnn=False)
|
|
|
|
|
|
|
+ conv = self._conv_bn_layer(input=conv,
|
|
|
|
|
+ filter_size=1,
|
|
|
|
|
+ num_filters=int(scale * self.cls_ch_squeeze),
|
|
|
|
|
+ stride=1,
|
|
|
|
|
+ padding=0,
|
|
|
|
|
+ num_groups=1,
|
|
|
|
|
+ if_act=True,
|
|
|
|
|
+ act='hard_swish',
|
|
|
|
|
+ name='conv_last')
|
|
|
|
|
+
|
|
|
|
|
+ conv = fluid.layers.pool2d(input=conv,
|
|
|
|
|
+ pool_type='avg',
|
|
|
|
|
+ global_pooling=True,
|
|
|
|
|
+ use_cudnn=False)
|
|
|
conv = fluid.layers.conv2d(
|
|
conv = fluid.layers.conv2d(
|
|
|
input=conv,
|
|
input=conv,
|
|
|
num_filters=self.cls_ch_expand,
|
|
num_filters=self.cls_ch_expand,
|
|
@@ -312,27 +323,25 @@ class MobileNetV3():
|
|
|
bias_attr=False)
|
|
bias_attr=False)
|
|
|
conv = self._hard_swish(conv)
|
|
conv = self._hard_swish(conv)
|
|
|
drop = fluid.layers.dropout(x=conv, dropout_prob=0.2)
|
|
drop = fluid.layers.dropout(x=conv, dropout_prob=0.2)
|
|
|
- out = fluid.layers.fc(
|
|
|
|
|
- input=drop,
|
|
|
|
|
- size=self.num_classes,
|
|
|
|
|
- param_attr=ParamAttr(name='fc_weights'),
|
|
|
|
|
- bias_attr=ParamAttr(name='fc_offset'))
|
|
|
|
|
|
|
+ out = fluid.layers.fc(input=drop,
|
|
|
|
|
+ size=self.num_classes,
|
|
|
|
|
+ param_attr=ParamAttr(name='fc_weights'),
|
|
|
|
|
+ bias_attr=ParamAttr(name='fc_offset'))
|
|
|
return out
|
|
return out
|
|
|
|
|
|
|
|
if not self.with_extra_blocks:
|
|
if not self.with_extra_blocks:
|
|
|
return blocks
|
|
return blocks
|
|
|
|
|
|
|
|
# extra block
|
|
# extra block
|
|
|
- conv_extra = self._conv_bn_layer(
|
|
|
|
|
- conv,
|
|
|
|
|
- filter_size=1,
|
|
|
|
|
- num_filters=int(scale * cfg[-1][1]),
|
|
|
|
|
- stride=1,
|
|
|
|
|
- padding="SAME",
|
|
|
|
|
- num_groups=1,
|
|
|
|
|
- if_act=True,
|
|
|
|
|
- act='hard_swish',
|
|
|
|
|
- name='conv' + str(i + 2))
|
|
|
|
|
|
|
+ conv_extra = self._conv_bn_layer(conv,
|
|
|
|
|
+ filter_size=1,
|
|
|
|
|
+ num_filters=int(scale * cfg[-1][1]),
|
|
|
|
|
+ stride=1,
|
|
|
|
|
+ padding="SAME",
|
|
|
|
|
+ num_groups=1,
|
|
|
|
|
+ if_act=True,
|
|
|
|
|
+ act='hard_swish',
|
|
|
|
|
+ name='conv' + str(i + 2))
|
|
|
self.end_points.append(conv_extra)
|
|
self.end_points.append(conv_extra)
|
|
|
i += 1
|
|
i += 1
|
|
|
for block_filter in self.extra_block_filters:
|
|
for block_filter in self.extra_block_filters:
|