| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782 |
- # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """
- MixNet for ImageNet-1K, implemented in Paddle.
- Original paper: 'MixConv: Mixed Depthwise Convolutional Kernels,'
- https://arxiv.org/abs/1907.09595.
- """
- __all__ = ['MixNet_S', 'MixNet_M', 'MixNet_L']
- import os
- from inspect import isfunction
- from functools import reduce
- import paddle
- import paddle.nn as nn
- class Identity(nn.Layer):
- """
- Identity block.
- """
- def __init__(self):
- super(Identity, self).__init__()
- def forward(self, x):
- return x
- def round_channels(channels, divisor=8):
- """
- Round weighted channel number (make divisible operation).
- Parameters:
- ----------
- channels : int or float
- Original number of channels.
- divisor : int, default 8
- Alignment value.
- Returns:
- -------
- int
- Weighted number of channels.
- """
- rounded_channels = max(
- int(channels + divisor / 2.0) // divisor * divisor, divisor)
- if float(rounded_channels) < 0.9 * channels:
- rounded_channels += divisor
- return rounded_channels
- def get_activation_layer(activation):
- """
- Create activation layer from string/function.
- Parameters:
- ----------
- activation : function, or str, or nn.Module
- Activation function or name of activation function.
- Returns:
- -------
- nn.Module
- Activation layer.
- """
- assert activation is not None
- if isfunction(activation):
- return activation()
- elif isinstance(activation, str):
- if activation == "relu":
- return nn.ReLU()
- elif activation == "relu6":
- return nn.ReLU6()
- elif activation == "swish":
- return nn.Swish()
- elif activation == "hswish":
- return nn.Hardswish()
- elif activation == "sigmoid":
- return nn.Sigmoid()
- elif activation == "hsigmoid":
- return nn.Hardsigmoid()
- elif activation == "identity":
- return Identity()
- else:
- raise NotImplementedError()
- else:
- assert isinstance(activation, nn.Layer)
- return activation
- class ConvBlock(nn.Layer):
- """
- Standard convolution block with Batch normalization and activation.
- Parameters:
- ----------
- in_channels : int
- Number of input channels.
- out_channels : int
- Number of output channels.
- kernel_size : int or tuple/list of 2 int
- Convolution window size.
- stride : int or tuple/list of 2 int
- Strides of the convolution.
- padding : int, or tuple/list of 2 int, or tuple/list of 4 int
- Padding value for convolution layer.
- dilation : int or tuple/list of 2 int, default 1
- Dilation value for convolution layer.
- groups : int, default 1
- Number of groups.
- bias : bool, default False
- Whether the layer uses a bias vector.
- use_bn : bool, default True
- Whether to use BatchNorm layer.
- bn_eps : float, default 1e-5
- Small float added to variance in Batch norm.
- activation : function or str or None, default nn.ReLU()
- Activation function or name of activation function.
- """
- def __init__(self,
- in_channels,
- out_channels,
- kernel_size,
- stride,
- padding,
- dilation=1,
- groups=1,
- bias=False,
- use_bn=True,
- bn_eps=1e-5,
- activation=nn.ReLU()):
- super(ConvBlock, self).__init__()
- self.activate = (activation is not None)
- self.use_bn = use_bn
- self.use_pad = (isinstance(padding, (list, tuple)) and
- (len(padding) == 4))
- if self.use_pad:
- self.pad = padding
- self.conv = nn.Conv2D(
- in_channels=in_channels,
- out_channels=out_channels,
- kernel_size=kernel_size,
- stride=stride,
- padding=padding,
- dilation=dilation,
- groups=groups,
- bias_attr=bias,
- weight_attr=None)
- if self.use_bn:
- self.bn = nn.BatchNorm2D(num_features=out_channels, epsilon=bn_eps)
- if self.activate:
- self.activ = get_activation_layer(activation)
- def forward(self, x):
- x = self.conv(x)
- if self.use_bn:
- x = self.bn(x)
- if self.activate:
- x = self.activ(x)
- return x
- class SEBlock(nn.Layer):
- def __init__(self,
- channels,
- reduction=16,
- mid_channels=None,
- round_mid=False,
- use_conv=True,
- mid_activation=nn.ReLU(),
- out_activation=nn.Sigmoid()):
- super(SEBlock, self).__init__()
- self.use_conv = use_conv
- if mid_channels is None:
- mid_channels = channels // reduction if not round_mid else round_channels(
- float(channels) / reduction)
- self.pool = nn.AdaptiveAvgPool2D(output_size=1)
- if use_conv:
- self.conv1 = nn.Conv2D(
- in_channels=channels,
- out_channels=mid_channels,
- kernel_size=1,
- stride=1,
- groups=1,
- bias_attr=True,
- weight_attr=None)
- else:
- self.fc1 = nn.Linear(
- in_features=channels, out_features=mid_channels)
- self.activ = get_activation_layer(mid_activation)
- if use_conv:
- self.conv2 = nn.Conv2D(
- in_channels=mid_channels,
- out_channels=channels,
- kernel_size=1,
- stride=1,
- groups=1,
- bias_attr=True,
- weight_attr=None)
- else:
- self.fc2 = nn.Linear(
- in_features=mid_channels, out_features=channels)
- self.sigmoid = get_activation_layer(out_activation)
- def forward(self, x):
- w = self.pool(x)
- if not self.use_conv:
- w = w.reshape(shape=[w.shape[0], -1])
- w = self.conv1(w) if self.use_conv else self.fc1(w)
- w = self.activ(w)
- w = self.conv2(w) if self.use_conv else self.fc2(w)
- w = self.sigmoid(w)
- if not self.use_conv:
- w = w.unsqueeze(2).unsqueeze(3)
- x = x * w
- return x
- class MixConv(nn.Layer):
- """
- Mixed convolution layer from 'MixConv: Mixed Depthwise Convolutional Kernels,'
- https://arxiv.org/abs/1907.09595.
- Parameters:
- ----------
- in_channels : int
- Number of input channels.
- out_channels : int
- Number of output channels.
- kernel_size : int or tuple/list of int, or tuple/list of tuple/list of 2 int
- Convolution window size.
- stride : int or tuple/list of 2 int
- Strides of the convolution.
- padding : int or tuple/list of int, or tuple/list of tuple/list of 2 int
- Padding value for convolution layer.
- dilation : int or tuple/list of 2 int, default 1
- Dilation value for convolution layer.
- groups : int, default 1
- Number of groups.
- bias : bool, default False
- Whether the layer uses a bias vector.
- axis : int, default 1
- The axis on which to concatenate the outputs.
- """
- def __init__(self,
- in_channels,
- out_channels,
- kernel_size,
- stride,
- padding,
- dilation=1,
- groups=1,
- bias=False,
- axis=1):
- super(MixConv, self).__init__()
- kernel_size = kernel_size if isinstance(kernel_size,
- list) else [kernel_size]
- padding = padding if isinstance(padding, list) else [padding]
- kernel_count = len(kernel_size)
- self.splitted_in_channels = self.split_channels(in_channels,
- kernel_count)
- splitted_out_channels = self.split_channels(out_channels, kernel_count)
- for i, kernel_size_i in enumerate(kernel_size):
- in_channels_i = self.splitted_in_channels[i]
- out_channels_i = splitted_out_channels[i]
- padding_i = padding[i]
- _ = self.add_sublayer(
- name=str(i),
- sublayer=nn.Conv2D(
- in_channels=in_channels_i,
- out_channels=out_channels_i,
- kernel_size=kernel_size_i,
- stride=stride,
- padding=padding_i,
- dilation=dilation,
- groups=(out_channels_i
- if out_channels == groups else groups),
- bias_attr=bias,
- weight_attr=None))
- self.axis = axis
- def forward(self, x):
- xx = paddle.split(x, self.splitted_in_channels, axis=self.axis)
- xx = paddle.split(x, self.splitted_in_channels, axis=self.axis)
- out = [
- conv_i(x_i) for x_i, conv_i in zip(xx, self._sub_layers.values())
- ]
- x = paddle.concat(tuple(out), axis=self.axis)
- return x
- @staticmethod
- def split_channels(channels, kernel_count):
- splitted_channels = [channels // kernel_count] * kernel_count
- splitted_channels[0] += channels - sum(splitted_channels)
- return splitted_channels
- class MixConvBlock(nn.Layer):
- """
- Mixed convolution block with Batch normalization and activation.
- Parameters:
- ----------
- in_channels : int
- Number of input channels.
- out_channels : int
- Number of output channels.
- kernel_size : int or tuple/list of int, or tuple/list of tuple/list of 2 int
- Convolution window size.
- stride : int or tuple/list of 2 int
- Strides of the convolution.
- padding : int or tuple/list of int, or tuple/list of tuple/list of 2 int
- Padding value for convolution layer.
- dilation : int or tuple/list of 2 int, default 1
- Dilation value for convolution layer.
- groups : int, default 1
- Number of groups.
- bias : bool, default False
- Whether the layer uses a bias vector.
- use_bn : bool, default True
- Whether to use BatchNorm layer.
- bn_eps : float, default 1e-5
- Small float added to variance in Batch norm.
- activation : function or str or None, default nn.ReLU()
- Activation function or name of activation function.
- activate : bool, default True
- Whether activate the convolution block.
- """
- def __init__(self,
- in_channels,
- out_channels,
- kernel_size,
- stride,
- padding,
- dilation=1,
- groups=1,
- bias=False,
- use_bn=True,
- bn_eps=1e-5,
- activation=nn.ReLU()):
- super(MixConvBlock, self).__init__()
- self.activate = (activation is not None)
- self.use_bn = use_bn
- self.conv = MixConv(
- in_channels=in_channels,
- out_channels=out_channels,
- kernel_size=kernel_size,
- stride=stride,
- padding=padding,
- dilation=dilation,
- groups=groups,
- bias=bias)
- if self.use_bn:
- self.bn = nn.BatchNorm2D(num_features=out_channels, epsilon=bn_eps)
- if self.activate:
- self.activ = get_activation_layer(activation)
- def forward(self, x):
- x = self.conv(x)
- if self.use_bn:
- x = self.bn(x)
- if self.activate:
- x = self.activ(x)
- return x
- def mixconv1x1_block(in_channels,
- out_channels,
- kernel_count,
- stride=1,
- groups=1,
- bias=False,
- use_bn=True,
- bn_eps=1e-5,
- activation=nn.ReLU()):
- """
- 1x1 version of the mixed convolution block.
- Parameters:
- ----------
- in_channels : int
- Number of input channels.
- out_channels : int
- Number of output channels.
- kernel_count : int
- Kernel count.
- stride : int or tuple/list of 2 int, default 1
- Strides of the convolution.
- groups : int, default 1
- Number of groups.
- bias : bool, default False
- Whether the layer uses a bias vector.
- use_bn : bool, default True
- Whether to use BatchNorm layer.
- bn_eps : float, default 1e-5
- Small float added to variance in Batch norm.
- activation : function or str, or None, default nn.ReLU()
- Activation function or name of activation function.
- """
- return MixConvBlock(
- in_channels=in_channels,
- out_channels=out_channels,
- kernel_size=([1] * kernel_count),
- stride=stride,
- padding=([0] * kernel_count),
- groups=groups,
- bias=bias,
- use_bn=use_bn,
- bn_eps=bn_eps,
- activation=activation)
- class MixUnit(nn.Layer):
- """
- MixNet unit.
- Parameters:
- ----------
- in_channels : int
- Number of input channels.
- out_channels : int
- Number of output channels. exp_channels : int
- Number of middle (expanded) channels.
- stride : int or tuple/list of 2 int
- Strides of the second convolution layer.
- exp_kernel_count : int
- Expansion convolution kernel count for each unit.
- conv1_kernel_count : int
- Conv1 kernel count for each unit.
- conv2_kernel_count : int
- Conv2 kernel count for each unit.
- exp_factor : int
- Expansion factor for each unit.
- se_factor : int
- SE reduction factor for each unit.
- activation : str
- Activation function or name of activation function.
- """
- def __init__(self, in_channels, out_channels, stride, exp_kernel_count,
- conv1_kernel_count, conv2_kernel_count, exp_factor, se_factor,
- activation):
- super(MixUnit, self).__init__()
- assert exp_factor >= 1
- assert se_factor >= 0
- self.residual = (in_channels == out_channels) and (stride == 1)
- self.use_se = se_factor > 0
- mid_channels = exp_factor * in_channels
- self.use_exp_conv = exp_factor > 1
- if self.use_exp_conv:
- if exp_kernel_count == 1:
- self.exp_conv = ConvBlock(
- in_channels=in_channels,
- out_channels=mid_channels,
- kernel_size=1,
- stride=1,
- padding=0,
- groups=1,
- bias=False,
- use_bn=True,
- bn_eps=1e-5,
- activation=activation)
- else:
- self.exp_conv = mixconv1x1_block(
- in_channels=in_channels,
- out_channels=mid_channels,
- kernel_count=exp_kernel_count,
- activation=activation)
- if conv1_kernel_count == 1:
- self.conv1 = ConvBlock(
- in_channels=mid_channels,
- out_channels=mid_channels,
- kernel_size=3,
- stride=stride,
- padding=1,
- dilation=1,
- groups=mid_channels,
- bias=False,
- use_bn=True,
- bn_eps=1e-5,
- activation=activation)
- else:
- self.conv1 = MixConvBlock(
- in_channels=mid_channels,
- out_channels=mid_channels,
- kernel_size=[3 + 2 * i for i in range(conv1_kernel_count)],
- stride=stride,
- padding=[1 + i for i in range(conv1_kernel_count)],
- groups=mid_channels,
- activation=activation)
- if self.use_se:
- self.se = SEBlock(
- channels=mid_channels,
- reduction=(exp_factor * se_factor),
- round_mid=False,
- mid_activation=activation)
- if conv2_kernel_count == 1:
- self.conv2 = ConvBlock(
- in_channels=mid_channels,
- out_channels=out_channels,
- activation=None,
- kernel_size=1,
- stride=1,
- padding=0,
- groups=1,
- bias=False,
- use_bn=True,
- bn_eps=1e-5)
- else:
- self.conv2 = mixconv1x1_block(
- in_channels=mid_channels,
- out_channels=out_channels,
- kernel_count=conv2_kernel_count,
- activation=None)
- def forward(self, x):
- if self.residual:
- identity = x
- if self.use_exp_conv:
- x = self.exp_conv(x)
- x = self.conv1(x)
- if self.use_se:
- x = self.se(x)
- x = self.conv2(x)
- if self.residual:
- x = x + identity
- return x
- class MixInitBlock(nn.Layer):
- """
- MixNet specific initial block.
- Parameters:
- ----------
- in_channels : int
- Number of input channels.
- out_channels : int
- Number of output channels.
- """
- def __init__(self, in_channels, out_channels):
- super(MixInitBlock, self).__init__()
- self.conv1 = ConvBlock(
- in_channels=in_channels,
- out_channels=out_channels,
- stride=2,
- kernel_size=3,
- padding=1)
- self.conv2 = MixUnit(
- in_channels=out_channels,
- out_channels=out_channels,
- stride=1,
- exp_kernel_count=1,
- conv1_kernel_count=1,
- conv2_kernel_count=1,
- exp_factor=1,
- se_factor=0,
- activation="relu")
- def forward(self, x):
- x = self.conv1(x)
- x = self.conv2(x)
- return x
- class MixNet(nn.Layer):
- """
- MixNet model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
- https://arxiv.org/abs/1907.09595.
- Parameters:
- ----------
- channels : list of list of int
- Number of output channels for each unit.
- init_block_channels : int
- Number of output channels for the initial unit.
- final_block_channels : int
- Number of output channels for the final block of the feature extractor.
- exp_kernel_counts : list of list of int
- Expansion convolution kernel count for each unit.
- conv1_kernel_counts : list of list of int
- Conv1 kernel count for each unit.
- conv2_kernel_counts : list of list of int
- Conv2 kernel count for each unit.
- exp_factors : list of list of int
- Expansion factor for each unit.
- se_factors : list of list of int
- SE reduction factor for each unit.
- in_channels : int, default 3
- Number of input channels.
- in_size : tuple of two ints, default (224, 224)
- Spatial size of the expected input image.
- class_dim : int, default 1000
- Number of classification classes.
- """
- def __init__(self,
- channels,
- init_block_channels,
- final_block_channels,
- exp_kernel_counts,
- conv1_kernel_counts,
- conv2_kernel_counts,
- exp_factors,
- se_factors,
- in_channels=3,
- in_size=(224, 224),
- class_dim=1000):
- super(MixNet, self).__init__()
- self.in_size = in_size
- self.class_dim = class_dim
- self.features = nn.Sequential()
- self.features.add_sublayer(
- "init_block",
- MixInitBlock(
- in_channels=in_channels, out_channels=init_block_channels))
- in_channels = init_block_channels
- for i, channels_per_stage in enumerate(channels):
- stage = nn.Sequential()
- for j, out_channels in enumerate(channels_per_stage):
- stride = 2 if ((j == 0) and (i != 3)) or (
- (j == len(channels_per_stage) // 2) and (i == 3)) else 1
- exp_kernel_count = exp_kernel_counts[i][j]
- conv1_kernel_count = conv1_kernel_counts[i][j]
- conv2_kernel_count = conv2_kernel_counts[i][j]
- exp_factor = exp_factors[i][j]
- se_factor = se_factors[i][j]
- activation = "relu" if i == 0 else "swish"
- stage.add_sublayer(
- "unit{}".format(j + 1),
- MixUnit(
- in_channels=in_channels,
- out_channels=out_channels,
- stride=stride,
- exp_kernel_count=exp_kernel_count,
- conv1_kernel_count=conv1_kernel_count,
- conv2_kernel_count=conv2_kernel_count,
- exp_factor=exp_factor,
- se_factor=se_factor,
- activation=activation))
- in_channels = out_channels
- self.features.add_sublayer("stage{}".format(i + 1), stage)
- self.features.add_sublayer(
- "final_block",
- ConvBlock(
- in_channels=in_channels,
- out_channels=final_block_channels,
- kernel_size=1,
- stride=1,
- padding=0,
- groups=1,
- bias=False,
- use_bn=True,
- bn_eps=1e-5,
- activation=nn.ReLU()))
- in_channels = final_block_channels
- self.features.add_sublayer(
- "final_pool", nn.AvgPool2D(
- kernel_size=7, stride=1))
- self.output = nn.Linear(
- in_features=in_channels, out_features=class_dim)
- def forward(self, x):
- x = self.features(x)
- reshape_dim = reduce(lambda x, y: x * y, x.shape[1:])
- x = x.reshape(shape=[x.shape[0], reshape_dim])
- x = self.output(x)
- return x
- def get_mixnet(version, width_scale, model_name=None, **kwargs):
- """
- Create MixNet model with specific parameters.
- Parameters:
- ----------
- version : str
- Version of MobileNetV3 ('s' or 'm').
- width_scale : float
- Scale factor for width of layers.
- model_name : str or None, default None
- Model name.
- """
- if version == "s":
- init_block_channels = 16
- channels = [[24, 24], [40, 40, 40, 40], [80, 80, 80],
- [120, 120, 120, 200, 200, 200]]
- exp_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 1, 1],
- [2, 2, 2, 1, 1, 1]]
- conv1_kernel_counts = [[1, 1], [3, 2, 2, 2], [3, 2, 2],
- [3, 4, 4, 5, 4, 4]]
- conv2_kernel_counts = [[2, 2], [1, 2, 2, 2], [2, 2, 2],
- [2, 2, 2, 1, 2, 2]]
- exp_factors = [[6, 3], [6, 6, 6, 6], [6, 6, 6], [6, 3, 3, 6, 6, 6]]
- se_factors = [[0, 0], [2, 2, 2, 2], [4, 4, 4], [2, 2, 2, 2, 2, 2]]
- elif version == "m":
- init_block_channels = 24
- channels = [[32, 32], [40, 40, 40, 40], [80, 80, 80, 80],
- [120, 120, 120, 120, 200, 200, 200, 200]]
- exp_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 2, 2, 2],
- [1, 2, 2, 2, 1, 1, 1, 1]]
- conv1_kernel_counts = [[3, 1], [4, 2, 2, 2], [3, 4, 4, 4],
- [1, 4, 4, 4, 4, 4, 4, 4]]
- conv2_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 2, 2, 2],
- [1, 2, 2, 2, 1, 2, 2, 2]]
- exp_factors = [[6, 3], [6, 6, 6, 6], [6, 6, 6, 6],
- [6, 3, 3, 3, 6, 6, 6, 6]]
- se_factors = [[0, 0], [2, 2, 2, 2], [4, 4, 4, 4],
- [2, 2, 2, 2, 2, 2, 2, 2]]
- else:
- raise ValueError("Unsupported MixNet version {}".format(version))
- final_block_channels = 1536
- if width_scale != 1.0:
- channels = [[round_channels(cij * width_scale) for cij in ci]
- for ci in channels]
- init_block_channels = round_channels(init_block_channels * width_scale)
- net = MixNet(
- channels=channels,
- init_block_channels=init_block_channels,
- final_block_channels=final_block_channels,
- exp_kernel_counts=exp_kernel_counts,
- conv1_kernel_counts=conv1_kernel_counts,
- conv2_kernel_counts=conv2_kernel_counts,
- exp_factors=exp_factors,
- se_factors=se_factors,
- **kwargs)
- return net
- def MixNet_S(**kwargs):
- """
- MixNet-S model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
- https://arxiv.org/abs/1907.09595.
- """
- return get_mixnet(
- version="s", width_scale=1.0, model_name="MixNet_S", **kwargs)
- def MixNet_M(**kwargs):
- """
- MixNet-M model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
- https://arxiv.org/abs/1907.09595.
- """
- return get_mixnet(
- version="m", width_scale=1.0, model_name="MixNet_M", **kwargs)
- def MixNet_L(**kwargs):
- """
- MixNet-L model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
- https://arxiv.org/abs/1907.09595.
- """
- return get_mixnet(
- version="m", width_scale=1.3, model_name="MixNet_L", **kwargs)
|