pan.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import numpy as np
  15. import paddle
  16. import paddle.nn as nn
  17. import paddle.nn.functional as F
  18. from paddle import ParamAttr
  19. from paddle.nn.initializer import XavierUniform
  20. from paddle.regularizer import L2Decay
  21. from paddlex.ppdet.core.workspace import register, serializable
  22. from paddlex.ppdet.modeling.layers import ConvNormLayer
  23. from ..shape_spec import ShapeSpec
  24. __all__ = ['PAN']
  25. @register
  26. @serializable
  27. class PAN(nn.Layer):
  28. """
  29. Path Aggregation Network, see https://arxiv.org/abs/1803.01534
  30. Args:
  31. in_channels (list[int]): input channels of each level which can be
  32. derived from the output shape of backbone by from_config
  33. out_channel (list[int]): output channel of each level
  34. spatial_scales (list[float]): the spatial scales between input feature
  35. maps and original input image which can be derived from the output
  36. shape of backbone by from_config
  37. start_level (int): Index of the start input backbone level used to
  38. build the feature pyramid. Default: 0.
  39. end_level (int): Index of the end input backbone level (exclusive) to
  40. build the feature pyramid. Default: -1, which means the last level.
  41. norm_type (string|None): The normalization type in FPN module. If
  42. norm_type is None, norm will not be used after conv and if
  43. norm_type is string, bn, gn, sync_bn are available. default None
  44. """
  45. def __init__(self,
  46. in_channels,
  47. out_channel,
  48. spatial_scales=[0.125, 0.0625, 0.03125],
  49. start_level=0,
  50. end_level=-1,
  51. norm_type=None):
  52. super(PAN, self).__init__()
  53. self.out_channel = out_channel
  54. self.num_ins = len(in_channels)
  55. self.spatial_scales = spatial_scales
  56. if end_level == -1:
  57. self.end_level = self.num_ins
  58. else:
  59. # if end_level < inputs, no extra level is allowed
  60. self.end_level = end_level
  61. assert end_level <= len(in_channels)
  62. self.start_level = start_level
  63. self.norm_type = norm_type
  64. self.lateral_convs = []
  65. for i in range(self.start_level, self.end_level):
  66. in_c = in_channels[i - self.start_level]
  67. if self.norm_type is not None:
  68. lateral = self.add_sublayer(
  69. 'pan_lateral' + str(i),
  70. ConvNormLayer(
  71. ch_in=in_c,
  72. ch_out=self.out_channel,
  73. filter_size=1,
  74. stride=1,
  75. norm_type=self.norm_type,
  76. norm_decay=self.norm_decay,
  77. freeze_norm=self.freeze_norm,
  78. initializer=XavierUniform(fan_out=in_c)))
  79. else:
  80. lateral = self.add_sublayer(
  81. 'pan_lateral' + str(i),
  82. nn.Conv2D(
  83. in_channels=in_c,
  84. out_channels=self.out_channel,
  85. kernel_size=1,
  86. weight_attr=ParamAttr(
  87. initializer=XavierUniform(fan_out=in_c))))
  88. self.lateral_convs.append(lateral)
  89. @classmethod
  90. def from_config(cls, cfg, input_shape):
  91. return {'in_channels': [i.channels for i in input_shape], }
  92. def forward(self, body_feats):
  93. laterals = []
  94. for i, lateral_conv in enumerate(self.lateral_convs):
  95. laterals.append(lateral_conv(body_feats[i + self.start_level]))
  96. num_levels = len(laterals)
  97. for i in range(1, num_levels):
  98. lvl = num_levels - i
  99. upsample = F.interpolate(
  100. laterals[lvl],
  101. scale_factor=2.,
  102. mode='bilinear', )
  103. laterals[lvl - 1] += upsample
  104. outs = [laterals[i] for i in range(num_levels)]
  105. for i in range(0, num_levels - 1):
  106. outs[i + 1] += F.interpolate(
  107. outs[i], scale_factor=0.5, mode='bilinear')
  108. return outs
  109. @property
  110. def out_shape(self):
  111. return [
  112. ShapeSpec(
  113. channels=self.out_channel, stride=1. / s)
  114. for s in self.spatial_scales
  115. ]