hrnet.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. from collections import OrderedDict
  18. from paddle import fluid
  19. from paddle.fluid.param_attr import ParamAttr
  20. from paddle.fluid.framework import Variable
  21. from paddle.fluid.regularizer import L2Decay
  22. from numbers import Integral
  23. from paddle.fluid.initializer import MSRA
  24. import math
  25. __all__ = ['HRNet']
  26. class HRNet(object):
  27. def __init__(self,
  28. width=40,
  29. has_se=False,
  30. freeze_at=0,
  31. norm_type='bn',
  32. freeze_norm=False,
  33. norm_decay=0.,
  34. feature_maps=[2, 3, 4, 5],
  35. num_classes=None):
  36. super(HRNet, self).__init__()
  37. if isinstance(feature_maps, Integral):
  38. feature_maps = [feature_maps]
  39. assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4"
  40. assert len(feature_maps) > 0, "need one or more feature maps"
  41. assert norm_type in ['bn', 'sync_bn']
  42. self.width = width
  43. self.has_se = has_se
  44. self.channels = {
  45. 18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]],
  46. 30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
  47. 32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]],
  48. 40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
  49. 44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]],
  50. 48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]],
  51. 60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]],
  52. 64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]],
  53. }
  54. self.freeze_at = freeze_at
  55. self.norm_type = norm_type
  56. self.norm_decay = norm_decay
  57. self.freeze_norm = freeze_norm
  58. self.feature_maps = feature_maps
  59. self.num_classes = num_classes
  60. self.end_points = []
  61. return
  62. def net(self, input):
  63. width = self.width
  64. channels_2, channels_3, channels_4 = self.channels[width]
  65. num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3
  66. x = self.conv_bn_layer(
  67. input=input,
  68. filter_size=3,
  69. num_filters=64,
  70. stride=2,
  71. if_act=True,
  72. name='layer1_1')
  73. x = self.conv_bn_layer(
  74. input=x,
  75. filter_size=3,
  76. num_filters=64,
  77. stride=2,
  78. if_act=True,
  79. name='layer1_2')
  80. la1 = self.layer1(x, name='layer2')
  81. tr1 = self.transition_layer([la1], [256], channels_2, name='tr1')
  82. st2 = self.stage(tr1, num_modules_2, channels_2, name='st2')
  83. tr2 = self.transition_layer(st2, channels_2, channels_3, name='tr2')
  84. st3 = self.stage(tr2, num_modules_3, channels_3, name='st3')
  85. tr3 = self.transition_layer(st3, channels_3, channels_4, name='tr3')
  86. st4 = self.stage(tr3, num_modules_4, channels_4, name='st4')
  87. # classification
  88. if self.num_classes:
  89. last_cls = self.last_cls_out(x=st4, name='cls_head')
  90. y = last_cls[0]
  91. last_num_filters = [256, 512, 1024]
  92. for i in range(3):
  93. y = fluid.layers.elementwise_add(
  94. last_cls[i + 1],
  95. self.conv_bn_layer(
  96. input=y,
  97. filter_size=3,
  98. num_filters=last_num_filters[i],
  99. stride=2,
  100. name='cls_head_add' + str(i + 1)))
  101. y = self.conv_bn_layer(
  102. input=y,
  103. filter_size=1,
  104. num_filters=2048,
  105. stride=1,
  106. name='cls_head_last_conv')
  107. pool = fluid.layers.pool2d(
  108. input=y, pool_type='avg', global_pooling=True)
  109. stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
  110. out = fluid.layers.fc(
  111. input=pool,
  112. size=self.num_classes,
  113. param_attr=ParamAttr(
  114. name='fc_weights',
  115. initializer=fluid.initializer.Uniform(-stdv, stdv)),
  116. bias_attr=ParamAttr(name='fc_offset'))
  117. return out
  118. # segmentation
  119. if self.feature_maps == "stage4":
  120. return st4
  121. self.end_points = st4
  122. return st4[-1]
  123. def layer1(self, input, name=None):
  124. conv = input
  125. for i in range(4):
  126. conv = self.bottleneck_block(
  127. conv,
  128. num_filters=64,
  129. downsample=True if i == 0 else False,
  130. name=name + '_' + str(i + 1))
  131. return conv
  132. def transition_layer(self, x, in_channels, out_channels, name=None):
  133. num_in = len(in_channels)
  134. num_out = len(out_channels)
  135. out = []
  136. for i in range(num_out):
  137. if i < num_in:
  138. if in_channels[i] != out_channels[i]:
  139. residual = self.conv_bn_layer(
  140. x[i],
  141. filter_size=3,
  142. num_filters=out_channels[i],
  143. name=name + '_layer_' + str(i + 1))
  144. out.append(residual)
  145. else:
  146. out.append(x[i])
  147. else:
  148. residual = self.conv_bn_layer(
  149. x[-1],
  150. filter_size=3,
  151. num_filters=out_channels[i],
  152. stride=2,
  153. name=name + '_layer_' + str(i + 1))
  154. out.append(residual)
  155. return out
  156. def branches(self, x, block_num, channels, name=None):
  157. out = []
  158. for i in range(len(channels)):
  159. residual = x[i]
  160. for j in range(block_num):
  161. residual = self.basic_block(
  162. residual,
  163. channels[i],
  164. name=name + '_branch_layer_' + str(i + 1) + '_' +
  165. str(j + 1))
  166. out.append(residual)
  167. return out
  168. def fuse_layers(self, x, channels, multi_scale_output=True, name=None):
  169. out = []
  170. for i in range(len(channels) if multi_scale_output else 1):
  171. residual = x[i]
  172. if self.feature_maps == "stage4":
  173. shape = fluid.layers.shape(residual)
  174. width = shape[-1]
  175. height = shape[-2]
  176. for j in range(len(channels)):
  177. if j > i:
  178. y = self.conv_bn_layer(
  179. x[j],
  180. filter_size=1,
  181. num_filters=channels[i],
  182. if_act=False,
  183. name=name + '_layer_' + str(i + 1) + '_' + str(j + 1))
  184. if self.feature_maps == "stage4":
  185. y = fluid.layers.resize_bilinear(
  186. input=y, out_shape=[height, width])
  187. else:
  188. y = fluid.layers.resize_nearest(
  189. input=y, scale=2**(j - i))
  190. residual = fluid.layers.elementwise_add(
  191. x=residual, y=y, act=None)
  192. elif j < i:
  193. y = x[j]
  194. for k in range(i - j):
  195. if k == i - j - 1:
  196. y = self.conv_bn_layer(
  197. y,
  198. filter_size=3,
  199. num_filters=channels[i],
  200. stride=2,
  201. if_act=False,
  202. name=name + '_layer_' + str(i + 1) + '_' +
  203. str(j + 1) + '_' + str(k + 1))
  204. else:
  205. y = self.conv_bn_layer(
  206. y,
  207. filter_size=3,
  208. num_filters=channels[j],
  209. stride=2,
  210. name=name + '_layer_' + str(i + 1) + '_' +
  211. str(j + 1) + '_' + str(k + 1))
  212. residual = fluid.layers.elementwise_add(
  213. x=residual, y=y, act=None)
  214. residual = fluid.layers.relu(residual)
  215. out.append(residual)
  216. return out
  217. def high_resolution_module(self,
  218. x,
  219. channels,
  220. multi_scale_output=True,
  221. name=None):
  222. residual = self.branches(x, 4, channels, name=name)
  223. out = self.fuse_layers(
  224. residual,
  225. channels,
  226. multi_scale_output=multi_scale_output,
  227. name=name)
  228. return out
  229. def stage(self,
  230. x,
  231. num_modules,
  232. channels,
  233. multi_scale_output=True,
  234. name=None):
  235. out = x
  236. for i in range(num_modules):
  237. if i == num_modules - 1 and multi_scale_output == False:
  238. out = self.high_resolution_module(
  239. out,
  240. channels,
  241. multi_scale_output=False,
  242. name=name + '_' + str(i + 1))
  243. else:
  244. out = self.high_resolution_module(
  245. out, channels, name=name + '_' + str(i + 1))
  246. return out
  247. def last_cls_out(self, x, name=None):
  248. out = []
  249. num_filters_list = [32, 64, 128, 256]
  250. for i in range(len(x)):
  251. out.append(
  252. self.bottleneck_block(
  253. input=x[i],
  254. num_filters=num_filters_list[i],
  255. name=name + 'conv_' + str(i + 1),
  256. downsample=True))
  257. return out
  258. def basic_block(self,
  259. input,
  260. num_filters,
  261. stride=1,
  262. downsample=False,
  263. name=None):
  264. residual = input
  265. conv = self.conv_bn_layer(
  266. input=input,
  267. filter_size=3,
  268. num_filters=num_filters,
  269. stride=stride,
  270. name=name + '_conv1')
  271. conv = self.conv_bn_layer(
  272. input=conv,
  273. filter_size=3,
  274. num_filters=num_filters,
  275. if_act=False,
  276. name=name + '_conv2')
  277. if downsample:
  278. residual = self.conv_bn_layer(
  279. input=input,
  280. filter_size=1,
  281. num_filters=num_filters,
  282. if_act=False,
  283. name=name + '_downsample')
  284. if self.has_se:
  285. conv = self.squeeze_excitation(
  286. input=conv,
  287. num_channels=num_filters,
  288. reduction_ratio=16,
  289. name=name + '_fc')
  290. return fluid.layers.elementwise_add(x=residual, y=conv, act='relu')
  291. def bottleneck_block(self,
  292. input,
  293. num_filters,
  294. stride=1,
  295. downsample=False,
  296. name=None):
  297. residual = input
  298. conv = self.conv_bn_layer(
  299. input=input,
  300. filter_size=1,
  301. num_filters=num_filters,
  302. name=name + '_conv1')
  303. conv = self.conv_bn_layer(
  304. input=conv,
  305. filter_size=3,
  306. num_filters=num_filters,
  307. stride=stride,
  308. name=name + '_conv2')
  309. conv = self.conv_bn_layer(
  310. input=conv,
  311. filter_size=1,
  312. num_filters=num_filters * 4,
  313. if_act=False,
  314. name=name + '_conv3')
  315. if downsample:
  316. residual = self.conv_bn_layer(
  317. input=input,
  318. filter_size=1,
  319. num_filters=num_filters * 4,
  320. if_act=False,
  321. name=name + '_downsample')
  322. if self.has_se:
  323. conv = self.squeeze_excitation(
  324. input=conv,
  325. num_channels=num_filters * 4,
  326. reduction_ratio=16,
  327. name=name + '_fc')
  328. return fluid.layers.elementwise_add(x=residual, y=conv, act='relu')
  329. def squeeze_excitation(self,
  330. input,
  331. num_channels,
  332. reduction_ratio,
  333. name=None):
  334. pool = fluid.layers.pool2d(
  335. input=input, pool_size=0, pool_type='avg', global_pooling=True)
  336. stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
  337. squeeze = fluid.layers.fc(
  338. input=pool,
  339. size=num_channels / reduction_ratio,
  340. act='relu',
  341. param_attr=fluid.param_attr.ParamAttr(
  342. initializer=fluid.initializer.Uniform(-stdv, stdv),
  343. name=name + '_sqz_weights'),
  344. bias_attr=ParamAttr(name=name + '_sqz_offset'))
  345. stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
  346. excitation = fluid.layers.fc(
  347. input=squeeze,
  348. size=num_channels,
  349. act='sigmoid',
  350. param_attr=fluid.param_attr.ParamAttr(
  351. initializer=fluid.initializer.Uniform(-stdv, stdv),
  352. name=name + '_exc_weights'),
  353. bias_attr=ParamAttr(name=name + '_exc_offset'))
  354. scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
  355. return scale
  356. def conv_bn_layer(self,
  357. input,
  358. filter_size,
  359. num_filters,
  360. stride=1,
  361. padding=1,
  362. num_groups=1,
  363. if_act=True,
  364. name=None):
  365. conv = fluid.layers.conv2d(
  366. input=input,
  367. num_filters=num_filters,
  368. filter_size=filter_size,
  369. stride=stride,
  370. padding=(filter_size - 1) // 2,
  371. groups=num_groups,
  372. act=None,
  373. param_attr=ParamAttr(
  374. initializer=MSRA(), name=name + '_weights'),
  375. bias_attr=False)
  376. bn_name = name + '_bn'
  377. bn = self._bn(input=conv, bn_name=bn_name)
  378. if if_act:
  379. bn = fluid.layers.relu(bn)
  380. return bn
  381. def _bn(self, input, act=None, bn_name=None):
  382. norm_lr = 0. if self.freeze_norm else 1.
  383. norm_decay = self.norm_decay
  384. if self.num_classes or self.feature_maps == "stage4":
  385. regularizer = None
  386. pattr_initializer = fluid.initializer.Constant(1.0)
  387. battr_initializer = fluid.initializer.Constant(0.0)
  388. else:
  389. regularizer = L2Decay(norm_decay)
  390. pattr_initializer = None
  391. battr_initializer = None
  392. pattr = ParamAttr(
  393. name=bn_name + '_scale',
  394. learning_rate=norm_lr,
  395. regularizer=regularizer,
  396. initializer=pattr_initializer)
  397. battr = ParamAttr(
  398. name=bn_name + '_offset',
  399. learning_rate=norm_lr,
  400. regularizer=regularizer,
  401. initializer=battr_initializer)
  402. global_stats = True if self.freeze_norm else False
  403. out = fluid.layers.batch_norm(
  404. input=input,
  405. act=act,
  406. name=bn_name + '.output.1',
  407. param_attr=pattr,
  408. bias_attr=battr,
  409. moving_mean_name=bn_name + '_mean',
  410. moving_variance_name=bn_name + '_variance',
  411. use_global_stats=global_stats)
  412. scale = fluid.framework._get_var(pattr.name)
  413. bias = fluid.framework._get_var(battr.name)
  414. if self.freeze_norm:
  415. scale.stop_gradient = True
  416. bias.stop_gradient = True
  417. return out
  418. def __call__(self, input):
  419. assert isinstance(input, Variable)
  420. if isinstance(self.feature_maps, (list, tuple)):
  421. assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \
  422. "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps)
  423. res_endpoints = []
  424. res = input
  425. feature_maps = self.feature_maps
  426. out = self.net(input)
  427. if self.num_classes or self.feature_maps == "stage4":
  428. return out
  429. for i in feature_maps:
  430. res = self.end_points[i - 2]
  431. if i in self.feature_maps:
  432. res_endpoints.append(res)
  433. if self.freeze_at >= i:
  434. res.stop_gradient = True
  435. return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat)
  436. for idx, feat in enumerate(res_endpoints)])