Bläddra i källkod

Merge pull request #13 from SunAhong1993/develop

add ssld
Jason 5 år sedan
förälder
incheckning
a29d49dcdb

+ 4 - 0
paddlex/cls.py

@@ -21,11 +21,15 @@ ResNet50 = cv.models.ResNet50
 ResNet101 = cv.models.ResNet101
 ResNet50_vd = cv.models.ResNet50_vd
 ResNet101_vd = cv.models.ResNet101_vd
+ResNet50_vd_ssld = cv.models.ResNet50_vd_ssld
+ResNet101_vd_ssld = cv.models.ResNet101_vd_ssld
 DarkNet53 = cv.models.DarkNet53
 MobileNetV1 = cv.models.MobileNetV1
 MobileNetV2 = cv.models.MobileNetV2
 MobileNetV3_small = cv.models.MobileNetV3_small
 MobileNetV3_large = cv.models.MobileNetV3_large
+MobileNetV3_small_ssld = cv.models.MobileNetV3_small_ssld
+MobileNetV3_large_ssld = cv.models.MobileNetV3_large_ssld
 Xception41 = cv.models.Xception41
 Xception65 = cv.models.Xception65
 DenseNet121 = cv.models.DenseNet121

+ 4 - 0
paddlex/cv/models/__init__.py

@@ -19,11 +19,15 @@ from .classifier import ResNet50
 from .classifier import ResNet101
 from .classifier import ResNet50_vd
 from .classifier import ResNet101_vd
+from .classifier import ResNet50_vd_ssld
+from .classifier import ResNet101_vd_ssld
 from .classifier import DarkNet53
 from .classifier import MobileNetV1
 from .classifier import MobileNetV2
 from .classifier import MobileNetV3_small
 from .classifier import MobileNetV3_large
+from .classifier import MobileNetV3_small_ssld
+from .classifier import MobileNetV3_large_ssld
 from .classifier import Xception41
 from .classifier import Xception65
 from .classifier import DenseNet121

+ 24 - 0
paddlex/cv/models/classifier.py

@@ -300,6 +300,17 @@ class ResNet101_vd(BaseClassifier):
     def __init__(self, num_classes=1000):
         super(ResNet101_vd, self).__init__(
             model_name='ResNet101_vd', num_classes=num_classes)
+        
+        
+class ResNet50_vd_ssld(BaseClassifier):
+    def __init__(self, num_classes=1000):
+        super(ResNet50_vd_ssld, self).__init__(model_name='ResNet50_vd_ssld',
+                                               num_classes=num_classes)
+        
+class ResNet101_vd_ssld(BaseClassifier):
+    def __init__(self, num_classes=1000):
+        super(ResNet101_vd_ssld, self).__init__(model_name='ResNet101_vd_ssld',
+                                               num_classes=num_classes)
 
 
 class DarkNet53(BaseClassifier):
@@ -330,6 +341,19 @@ class MobileNetV3_large(BaseClassifier):
     def __init__(self, num_classes=1000):
         super(MobileNetV3_large, self).__init__(
             model_name='MobileNetV3_large', num_classes=num_classes)
+        
+        
+        
+class MobileNetV3_small_ssld(BaseClassifier):
+    def __init__(self, num_classes=1000):
+        super(MobileNetV3_small_ssld, self).__init__(model_name='MobileNetV3_small_ssld',
+                                                num_classes=num_classes)
+
+
+class MobileNetV3_large_ssld(BaseClassifier):
+    def __init__(self, num_classes=1000):
+        super(MobileNetV3_large_ssld, self).__init__(model_name='MobileNetV3_large_ssld',
+                                                num_classes=num_classes)
 
 
 class Xception65(BaseClassifier):

+ 12 - 0
paddlex/cv/models/utils/pretrain_weights.py

@@ -16,6 +16,10 @@ image_pretrain = {
     'https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar',
     'ResNet101_vd':
     'https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar',
+    'ResNet50_vd_ssld':
+    'https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar',
+    'ResNet101_vd_ssld':
+    'https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_ssld_pretrained.tar',
     'MobileNetV1':
     'http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar',
     'MobileNetV2_x1.0':
@@ -32,6 +36,10 @@ image_pretrain = {
     'https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_pretrained.tar',
     'MobileNetV3_large':
     'https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_pretrained.tar',
+    'MobileNetV3_small_x1_0_ssld':
+    'https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_ssld_pretrained.tar',
+    'MobileNetV3_large_x1_0_ssld':
+    'https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_ssld_pretrained.tar',
     'DarkNet53':
     'https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_ImageNet1k_pretrained.tar',
     'DenseNet121':
@@ -68,6 +76,10 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
             backbone = 'Seg{}'.format(backbone)
         elif backbone == 'MobileNetV2':
             backbone = 'MobileNetV2_x1.0'
+        elif backbone == 'MobileNetV3_small_ssld':
+            backbone = 'MobileNetV3_small_x1_0_ssld'
+        elif backbone == 'MobileNetV3_large_ssld':
+            backbone = 'MobileNetV3_large_x1_0_ssld'
         if model_type == 'detector':
             if backbone == 'ResNet50':
                 backbone = 'DetResNet50'

+ 24 - 1
paddlex/cv/nets/__init__.py

@@ -50,6 +50,18 @@ def resnet50_vd(input, num_classes=1000):
     return model(input)
 
 
+def resnet50_vd_ssld(input, num_classes=1000):
+    model = ResNet(layers=50, num_classes=num_classes, 
+                   variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
+    return model(input)
+
+
+def resnet101_vd_ssld(input, num_classes=1000):
+    model = ResNet(layers=101, num_classes=num_classes, 
+                   variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
+    return model(input)
+
+
 def resnet101_vd(input, num_classes=1000):
     model = ResNet(layers=101, num_classes=num_classes, variant='d')
     return model(input)
@@ -80,6 +92,18 @@ def mobilenetv3_large(input, num_classes=1000):
     return model(input)
 
 
+def mobilenetv3_small_ssld(input, num_classes=1000):
+    model = MobileNetV3(num_classes=num_classes, model_name='small',
+                        lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
+    return model(input)
+
+
+def mobilenetv3_large_ssld(input, num_classes=1000):
+    model = MobileNetV3(num_classes=num_classes, model_name='large',
+                        lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
+    return model(input)
+
+
 def xception65(input, num_classes=1000):
     model = Xception(layers=65, num_classes=num_classes)
     return model(input)
@@ -109,7 +133,6 @@ def densenet201(input, num_classes=1000):
     model = DenseNet(layers=201, num_classes=num_classes)
     return model(input)
 
-
 def shufflenetv2(input, num_classes=1000):
     model = ShuffleNetV2(num_classes=num_classes)
     return model(input)

+ 140 - 131
paddlex/cv/nets/mobilenet_v3.py

@@ -31,7 +31,6 @@ class MobileNetV3():
         with_extra_blocks (bool): if extra blocks should be added.
         extra_block_filters (list): number of filter for each extra block.
     """
-
     def __init__(self,
                  scale=1.0,
                  model_name='small',
@@ -41,7 +40,11 @@ class MobileNetV3():
                  norm_decay=0.0,
                  extra_block_filters=[[256, 512], [128, 256], [128, 256],
                                       [64, 128]],
-                 num_classes=None):
+                 num_classes=None,
+                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0]):
+        assert len(lr_mult_list) == 5, \
+            "lr_mult_list length in MobileNetV3 must be 5 but got {}!!".format(
+            len(lr_mult_list))
         self.scale = scale
         self.with_extra_blocks = with_extra_blocks
         self.extra_block_filters = extra_block_filters
@@ -51,6 +54,8 @@ class MobileNetV3():
         self.end_points = []
         self.block_stride = 1
         self.num_classes = num_classes
+        self.lr_mult_list = lr_mult_list
+        self.curr_stage = 0
         if model_name == "large":
             self.cfg = [
                 # kernel_size, expand, channel, se_block, act_mode, stride
@@ -72,6 +77,7 @@ class MobileNetV3():
             ]
             self.cls_ch_squeeze = 960
             self.cls_ch_expand = 1280
+            self.lr_interval = 3
         elif model_name == "small":
             self.cfg = [
                 # kernel_size, expand, channel, se_block, act_mode, stride
@@ -89,6 +95,7 @@ class MobileNetV3():
             ]
             self.cls_ch_squeeze = 576
             self.cls_ch_expand = 1280
+            self.lr_interval = 2
         else:
             raise NotImplementedError
 
@@ -103,30 +110,32 @@ class MobileNetV3():
                        act=None,
                        name=None,
                        use_cudnn=True):
-        conv_param_attr = ParamAttr(
-            name=name + '_weights', regularizer=L2Decay(self.conv_decay))
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            act=None,
-            use_cudnn=use_cudnn,
-            param_attr=conv_param_attr,
-            bias_attr=False)
+        lr_idx = self.curr_stage // self.lr_interval
+        lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
+        lr_mult = self.lr_mult_list[lr_idx]
+        conv_param_attr = ParamAttr(name=name + '_weights',
+                                    learning_rate=lr_mult,
+                                    regularizer=L2Decay(self.conv_decay))
+        conv = fluid.layers.conv2d(input=input,
+                                   num_filters=num_filters,
+                                   filter_size=filter_size,
+                                   stride=stride,
+                                   padding=padding,
+                                   groups=num_groups,
+                                   act=None,
+                                   use_cudnn=use_cudnn,
+                                   param_attr=conv_param_attr,
+                                   bias_attr=False)
         bn_name = name + '_bn'
-        bn_param_attr = ParamAttr(
-            name=bn_name + "_scale", regularizer=L2Decay(self.norm_decay))
-        bn_bias_attr = ParamAttr(
-            name=bn_name + "_offset", regularizer=L2Decay(self.norm_decay))
-        bn = fluid.layers.batch_norm(
-            input=conv,
-            param_attr=bn_param_attr,
-            bias_attr=bn_bias_attr,
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
+        bn_param_attr = ParamAttr(name=bn_name + "_scale",
+                                  regularizer=L2Decay(self.norm_decay))
+        bn_bias_attr = ParamAttr(name=bn_name + "_offset",
+                                 regularizer=L2Decay(self.norm_decay))
+        bn = fluid.layers.batch_norm(input=conv,
+                                     param_attr=bn_param_attr,
+                                     bias_attr=bn_bias_attr,
+                                     moving_mean_name=bn_name + '_mean',
+                                     moving_variance_name=bn_name + '_variance')
         if if_act:
             if act == 'relu':
                 bn = fluid.layers.relu(bn)
@@ -140,23 +149,33 @@ class MobileNetV3():
         return x * fluid.layers.relu6(x + 3) / 6.
 
     def _se_block(self, input, num_out_filter, ratio=4, name=None):
+        lr_idx = self.curr_stage // self.lr_interval
+        lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
+        lr_mult = self.lr_mult_list[lr_idx]
+        
         num_mid_filter = int(num_out_filter // ratio)
-        pool = fluid.layers.pool2d(
-            input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
+        pool = fluid.layers.pool2d(input=input,
+                                   pool_type='avg',
+                                   global_pooling=True,
+                                   use_cudnn=False)
         conv1 = fluid.layers.conv2d(
             input=pool,
             filter_size=1,
             num_filters=num_mid_filter,
             act='relu',
-            param_attr=ParamAttr(name=name + '_1_weights'),
-            bias_attr=ParamAttr(name=name + '_1_offset'))
+            param_attr=ParamAttr(
+                name=name + '_1_weights', learning_rate=lr_mult),
+            bias_attr=ParamAttr(
+                name=name + '_1_offset', learning_rate=lr_mult))
         conv2 = fluid.layers.conv2d(
             input=conv1,
             filter_size=1,
             num_filters=num_out_filter,
             act='hard_sigmoid',
-            param_attr=ParamAttr(name=name + '_2_weights'),
-            bias_attr=ParamAttr(name=name + '_2_offset'))
+            param_attr=ParamAttr(
+                name=name + '_2_weights', learning_rate=lr_mult),
+            bias_attr=ParamAttr(
+                name=name + '_2_offset', learning_rate=lr_mult))
 
         scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
         return scale
@@ -172,46 +191,43 @@ class MobileNetV3():
                        use_se=False,
                        name=None):
         input_data = input
-        conv0 = self._conv_bn_layer(
-            input=input,
-            filter_size=1,
-            num_filters=num_mid_filter,
-            stride=1,
-            padding=0,
-            if_act=True,
-            act=act,
-            name=name + '_expand')
+        conv0 = self._conv_bn_layer(input=input,
+                                    filter_size=1,
+                                    num_filters=num_mid_filter,
+                                    stride=1,
+                                    padding=0,
+                                    if_act=True,
+                                    act=act,
+                                    name=name + '_expand')
         if self.block_stride == 16 and stride == 2:
             self.end_points.append(conv0)
-        conv1 = self._conv_bn_layer(
-            input=conv0,
-            filter_size=filter_size,
-            num_filters=num_mid_filter,
-            stride=stride,
-            padding=int((filter_size - 1) // 2),
-            if_act=True,
-            act=act,
-            num_groups=num_mid_filter,
-            use_cudnn=False,
-            name=name + '_depthwise')
+        conv1 = self._conv_bn_layer(input=conv0,
+                                    filter_size=filter_size,
+                                    num_filters=num_mid_filter,
+                                    stride=stride,
+                                    padding=int((filter_size - 1) // 2),
+                                    if_act=True,
+                                    act=act,
+                                    num_groups=num_mid_filter,
+                                    use_cudnn=False,
+                                    name=name + '_depthwise')
 
         if use_se:
-            conv1 = self._se_block(
-                input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
+            conv1 = self._se_block(input=conv1,
+                                   num_out_filter=num_mid_filter,
+                                   name=name + '_se')
 
-        conv2 = self._conv_bn_layer(
-            input=conv1,
-            filter_size=1,
-            num_filters=num_out_filter,
-            stride=1,
-            padding=0,
-            if_act=False,
-            name=name + '_linear')
+        conv2 = self._conv_bn_layer(input=conv1,
+                                    filter_size=1,
+                                    num_filters=num_out_filter,
+                                    stride=1,
+                                    padding=0,
+                                    if_act=False,
+                                    name=name + '_linear')
         if num_in_filter != num_out_filter or stride != 1:
             return conv2
         else:
-            return fluid.layers.elementwise_add(
-                x=input_data, y=conv2, act=None)
+            return fluid.layers.elementwise_add(x=input_data, y=conv2, act=None)
 
     def _extra_block_dw(self,
                         input,
@@ -219,32 +235,29 @@ class MobileNetV3():
                         num_filters2,
                         stride,
                         name=None):
-        pointwise_conv = self._conv_bn_layer(
-            input=input,
-            filter_size=1,
-            num_filters=int(num_filters1),
-            stride=1,
-            padding="SAME",
-            act='relu6',
-            name=name + "_extra1")
-        depthwise_conv = self._conv_bn_layer(
-            input=pointwise_conv,
-            filter_size=3,
-            num_filters=int(num_filters2),
-            stride=stride,
-            padding="SAME",
-            num_groups=int(num_filters1),
-            act='relu6',
-            use_cudnn=False,
-            name=name + "_extra2_dw")
-        normal_conv = self._conv_bn_layer(
-            input=depthwise_conv,
-            filter_size=1,
-            num_filters=int(num_filters2),
-            stride=1,
-            padding="SAME",
-            act='relu6',
-            name=name + "_extra2_sep")
+        pointwise_conv = self._conv_bn_layer(input=input,
+                                             filter_size=1,
+                                             num_filters=int(num_filters1),
+                                             stride=1,
+                                             padding="SAME",
+                                             act='relu6',
+                                             name=name + "_extra1")
+        depthwise_conv = self._conv_bn_layer(input=pointwise_conv,
+                                             filter_size=3,
+                                             num_filters=int(num_filters2),
+                                             stride=stride,
+                                             padding="SAME",
+                                             num_groups=int(num_filters1),
+                                             act='relu6',
+                                             use_cudnn=False,
+                                             name=name + "_extra2_dw")
+        normal_conv = self._conv_bn_layer(input=depthwise_conv,
+                                          filter_size=1,
+                                          num_filters=int(num_filters2),
+                                          stride=1,
+                                          padding="SAME",
+                                          act='relu6',
+                                          name=name + "_extra2_sep")
         return normal_conv
 
     def __call__(self, input):
@@ -269,38 +282,36 @@ class MobileNetV3():
             self.block_stride *= layer_cfg[5]
             if layer_cfg[5] == 2:
                 blocks.append(conv)
-            conv = self._residual_unit(
-                input=conv,
-                num_in_filter=inplanes,
-                num_mid_filter=int(scale * layer_cfg[1]),
-                num_out_filter=int(scale * layer_cfg[2]),
-                act=layer_cfg[4],
-                stride=layer_cfg[5],
-                filter_size=layer_cfg[0],
-                use_se=layer_cfg[3],
-                name='conv' + str(i + 2))
-
+            conv = self._residual_unit(input=conv,
+                                       num_in_filter=inplanes,
+                                       num_mid_filter=int(scale * layer_cfg[1]),
+                                       num_out_filter=int(scale * layer_cfg[2]),
+                                       act=layer_cfg[4],
+                                       stride=layer_cfg[5],
+                                       filter_size=layer_cfg[0],
+                                       use_se=layer_cfg[3],
+                                       name='conv' + str(i + 2))
+            
             inplanes = int(scale * layer_cfg[2])
             i += 1
+            self.curr_stage = i
         blocks.append(conv)
 
         if self.num_classes:
-            conv = self._conv_bn_layer(
-                input=conv,
-                filter_size=1,
-                num_filters=int(scale * self.cls_ch_squeeze),
-                stride=1,
-                padding=0,
-                num_groups=1,
-                if_act=True,
-                act='hard_swish',
-                name='conv_last')
-
-            conv = fluid.layers.pool2d(
-                input=conv,
-                pool_type='avg',
-                global_pooling=True,
-                use_cudnn=False)
+            conv = self._conv_bn_layer(input=conv,
+                                       filter_size=1,
+                                       num_filters=int(scale * self.cls_ch_squeeze),
+                                       stride=1,
+                                       padding=0,
+                                       num_groups=1,
+                                       if_act=True,
+                                       act='hard_swish',
+                                       name='conv_last')
+            
+            conv = fluid.layers.pool2d(input=conv,
+                                       pool_type='avg',
+                                       global_pooling=True,
+                                       use_cudnn=False)
             conv = fluid.layers.conv2d(
                 input=conv,
                 num_filters=self.cls_ch_expand,
@@ -312,27 +323,25 @@ class MobileNetV3():
                 bias_attr=False)
             conv = self._hard_swish(conv)
             drop = fluid.layers.dropout(x=conv, dropout_prob=0.2)
-            out = fluid.layers.fc(
-                input=drop,
-                size=self.num_classes,
-                param_attr=ParamAttr(name='fc_weights'),
-                bias_attr=ParamAttr(name='fc_offset'))
+            out = fluid.layers.fc(input=drop,
+                                  size=self.num_classes,
+                                  param_attr=ParamAttr(name='fc_weights'),
+                                  bias_attr=ParamAttr(name='fc_offset'))            
             return out
 
         if not self.with_extra_blocks:
             return blocks
 
         # extra block
-        conv_extra = self._conv_bn_layer(
-            conv,
-            filter_size=1,
-            num_filters=int(scale * cfg[-1][1]),
-            stride=1,
-            padding="SAME",
-            num_groups=1,
-            if_act=True,
-            act='hard_swish',
-            name='conv' + str(i + 2))
+        conv_extra = self._conv_bn_layer(conv,
+                                         filter_size=1,
+                                         num_filters=int(scale * cfg[-1][1]),
+                                         stride=1,
+                                         padding="SAME",
+                                         num_groups=1,
+                                         if_act=True,
+                                         act='hard_swish',
+                                         name='conv' + str(i + 2))
         self.end_points.append(conv_extra)
         i += 1
         for block_filter in self.extra_block_filters:

+ 18 - 7
paddlex/cv/nets/resnet.py

@@ -65,7 +65,8 @@ class ResNet(object):
                  nonlocal_stages=[],
                  gcb_stages=[],
                  gcb_params=dict(),
-                 num_classes=None):
+                 num_classes=None,
+                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0]):
         super(ResNet, self).__init__()
 
         if isinstance(feature_maps, Integral):
@@ -79,6 +80,10 @@ class ResNet(object):
         assert norm_type in ['bn', 'sync_bn', 'affine_channel']
         assert not (len(nonlocal_stages)>0 and layers<50), \
                     "non-local is not supported for resnet18 or resnet34"
+        assert len(
+            lr_mult_list
+        ) == 5, "lr_mult_list length in ResNet must be 5 but got {}!!".format(
+            len(lr_mult_list))
 
         self.layers = layers
         self.freeze_at = freeze_at
@@ -113,6 +118,8 @@ class ResNet(object):
         self.gcb_stages = gcb_stages
         self.gcb_params = gcb_params
         self.num_classes = num_classes
+        self.lr_mult_list = lr_mult_list
+        self.curr_stage = 0
 
     def _conv_offset(self,
                      input,
@@ -128,8 +135,7 @@ class ResNet(object):
             filter_size=filter_size,
             stride=stride,
             padding=padding,
-            param_attr=ParamAttr(
-                initializer=Constant(0.0), name=name + ".w_0"),
+            param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"),
             bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"),
             act=act,
             name=name)
@@ -143,7 +149,9 @@ class ResNet(object):
                    groups=1,
                    act=None,
                    name=None,
-                   dcn_v2=False):
+                   dcn_v2=False,
+                   use_lr_mult_list=False):
+        lr_mult = self.lr_mult_list[self.curr_stage] if use_lr_mult_list else 1.0
         _name = self.prefix_name + name if self.prefix_name != '' else name
         if not dcn_v2:
             conv = fluid.layers.conv2d(
@@ -154,7 +162,8 @@ class ResNet(object):
                 padding=(filter_size - 1) // 2,
                 groups=groups,
                 act=None,
-                param_attr=ParamAttr(name=_name + "_weights"),
+                param_attr=ParamAttr(name=_name + "_weights",
+                                     learning_rate=lr_mult),
                 bias_attr=False,
                 name=_name + '.conv2d.output.1')
         else:
@@ -191,7 +200,7 @@ class ResNet(object):
         bn_name = self.na.fix_conv_norm_name(name)
         bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name
 
-        norm_lr = 0. if self.freeze_norm else 1.
+        norm_lr = 0. if self.freeze_norm else lr_mult
         norm_decay = self.norm_decay
         pattr = ParamAttr(
             name=bn_name + '_scale',
@@ -253,7 +262,8 @@ class ResNet(object):
                     pool_padding=0,
                     ceil_mode=True,
                     pool_type='avg')
-                return self._conv_norm(input, ch_out, 1, 1, name=name)
+                return self._conv_norm(input, ch_out, 1, 1, name=name,
+                                      use_lr_mult_list=True)
             return self._conv_norm(input, ch_out, 1, stride, name=name)
         else:
             return input
@@ -448,6 +458,7 @@ class ResNet(object):
             feature_maps = range(2, max(self.feature_maps) + 1)
 
         for i in feature_maps:
+            self.curr_stage += 1
             res = self.layer_warp(res, i)
             if i in self.feature_maps:
                 res_endpoints.append(res)