resnext101_wsl.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. import paddle
  2. from paddle import ParamAttr
  3. import paddle.nn as nn
  4. import paddle.nn.functional as F
  5. from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
  6. from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
  7. from paddle.nn.initializer import Uniform
  8. __all__ = [
  9. "ResNeXt101_32x8d_wsl", "ResNeXt101_32x16d_wsl", "ResNeXt101_32x32d_wsl",
  10. "ResNeXt101_32x48d_wsl"
  11. ]
  12. class ConvBNLayer(nn.Layer):
  13. def __init__(self,
  14. input_channels,
  15. output_channels,
  16. filter_size,
  17. stride=1,
  18. groups=1,
  19. act=None,
  20. name=None):
  21. super(ConvBNLayer, self).__init__()
  22. if "downsample" in name:
  23. conv_name = name + ".0"
  24. else:
  25. conv_name = name
  26. self._conv = Conv2D(
  27. in_channels=input_channels,
  28. out_channels=output_channels,
  29. kernel_size=filter_size,
  30. stride=stride,
  31. padding=(filter_size - 1) // 2,
  32. groups=groups,
  33. weight_attr=ParamAttr(name=conv_name + ".weight"),
  34. bias_attr=False)
  35. if "downsample" in name:
  36. bn_name = name[:9] + "downsample.1"
  37. else:
  38. if "conv1" == name:
  39. bn_name = "bn" + name[-1]
  40. else:
  41. bn_name = (name[:10] if name[7:9].isdigit() else name[:9]
  42. ) + "bn" + name[-1]
  43. self._bn = BatchNorm(
  44. num_channels=output_channels,
  45. act=act,
  46. param_attr=ParamAttr(name=bn_name + ".weight"),
  47. bias_attr=ParamAttr(name=bn_name + ".bias"),
  48. moving_mean_name=bn_name + ".running_mean",
  49. moving_variance_name=bn_name + ".running_var")
  50. def forward(self, inputs):
  51. x = self._conv(inputs)
  52. x = self._bn(x)
  53. return x
  54. class ShortCut(nn.Layer):
  55. def __init__(self, input_channels, output_channels, stride, name=None):
  56. super(ShortCut, self).__init__()
  57. self.input_channels = input_channels
  58. self.output_channels = output_channels
  59. self.stride = stride
  60. if input_channels != output_channels or stride != 1:
  61. self._conv = ConvBNLayer(
  62. input_channels,
  63. output_channels,
  64. filter_size=1,
  65. stride=stride,
  66. name=name)
  67. def forward(self, inputs):
  68. if self.input_channels != self.output_channels or self.stride != 1:
  69. return self._conv(inputs)
  70. return inputs
  71. class BottleneckBlock(nn.Layer):
  72. def __init__(self, input_channels, output_channels, stride, cardinality,
  73. width, name):
  74. super(BottleneckBlock, self).__init__()
  75. self._conv0 = ConvBNLayer(
  76. input_channels,
  77. output_channels,
  78. filter_size=1,
  79. act="relu",
  80. name=name + ".conv1")
  81. self._conv1 = ConvBNLayer(
  82. output_channels,
  83. output_channels,
  84. filter_size=3,
  85. act="relu",
  86. stride=stride,
  87. groups=cardinality,
  88. name=name + ".conv2")
  89. self._conv2 = ConvBNLayer(
  90. output_channels,
  91. output_channels // (width // 8),
  92. filter_size=1,
  93. act=None,
  94. name=name + ".conv3")
  95. self._short = ShortCut(
  96. input_channels,
  97. output_channels // (width // 8),
  98. stride=stride,
  99. name=name + ".downsample")
  100. def forward(self, inputs):
  101. x = self._conv0(inputs)
  102. x = self._conv1(x)
  103. x = self._conv2(x)
  104. y = self._short(inputs)
  105. y = paddle.add(x, y)
  106. y = F.relu(y)
  107. return y
  108. class ResNeXt101WSL(nn.Layer):
  109. def __init__(self, layers=101, cardinality=32, width=48, class_dim=1000):
  110. super(ResNeXt101WSL, self).__init__()
  111. self.class_dim = class_dim
  112. self.layers = layers
  113. self.cardinality = cardinality
  114. self.width = width
  115. self.scale = width // 8
  116. self.depth = [3, 4, 23, 3]
  117. self.base_width = cardinality * width
  118. num_filters = [self.base_width * i
  119. for i in [1, 2, 4, 8]] # [256, 512, 1024, 2048]
  120. self._conv_stem = ConvBNLayer(
  121. 3, 64, 7, stride=2, act="relu", name="conv1")
  122. self._pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
  123. self._conv1_0 = BottleneckBlock(
  124. 64,
  125. num_filters[0],
  126. stride=1,
  127. cardinality=self.cardinality,
  128. width=self.width,
  129. name="layer1.0")
  130. self._conv1_1 = BottleneckBlock(
  131. num_filters[0] // (width // 8),
  132. num_filters[0],
  133. stride=1,
  134. cardinality=self.cardinality,
  135. width=self.width,
  136. name="layer1.1")
  137. self._conv1_2 = BottleneckBlock(
  138. num_filters[0] // (width // 8),
  139. num_filters[0],
  140. stride=1,
  141. cardinality=self.cardinality,
  142. width=self.width,
  143. name="layer1.2")
  144. self._conv2_0 = BottleneckBlock(
  145. num_filters[0] // (width // 8),
  146. num_filters[1],
  147. stride=2,
  148. cardinality=self.cardinality,
  149. width=self.width,
  150. name="layer2.0")
  151. self._conv2_1 = BottleneckBlock(
  152. num_filters[1] // (width // 8),
  153. num_filters[1],
  154. stride=1,
  155. cardinality=self.cardinality,
  156. width=self.width,
  157. name="layer2.1")
  158. self._conv2_2 = BottleneckBlock(
  159. num_filters[1] // (width // 8),
  160. num_filters[1],
  161. stride=1,
  162. cardinality=self.cardinality,
  163. width=self.width,
  164. name="layer2.2")
  165. self._conv2_3 = BottleneckBlock(
  166. num_filters[1] // (width // 8),
  167. num_filters[1],
  168. stride=1,
  169. cardinality=self.cardinality,
  170. width=self.width,
  171. name="layer2.3")
  172. self._conv3_0 = BottleneckBlock(
  173. num_filters[1] // (width // 8),
  174. num_filters[2],
  175. stride=2,
  176. cardinality=self.cardinality,
  177. width=self.width,
  178. name="layer3.0")
  179. self._conv3_1 = BottleneckBlock(
  180. num_filters[2] // (width // 8),
  181. num_filters[2],
  182. stride=1,
  183. cardinality=self.cardinality,
  184. width=self.width,
  185. name="layer3.1")
  186. self._conv3_2 = BottleneckBlock(
  187. num_filters[2] // (width // 8),
  188. num_filters[2],
  189. stride=1,
  190. cardinality=self.cardinality,
  191. width=self.width,
  192. name="layer3.2")
  193. self._conv3_3 = BottleneckBlock(
  194. num_filters[2] // (width // 8),
  195. num_filters[2],
  196. stride=1,
  197. cardinality=self.cardinality,
  198. width=self.width,
  199. name="layer3.3")
  200. self._conv3_4 = BottleneckBlock(
  201. num_filters[2] // (width // 8),
  202. num_filters[2],
  203. stride=1,
  204. cardinality=self.cardinality,
  205. width=self.width,
  206. name="layer3.4")
  207. self._conv3_5 = BottleneckBlock(
  208. num_filters[2] // (width // 8),
  209. num_filters[2],
  210. stride=1,
  211. cardinality=self.cardinality,
  212. width=self.width,
  213. name="layer3.5")
  214. self._conv3_6 = BottleneckBlock(
  215. num_filters[2] // (width // 8),
  216. num_filters[2],
  217. stride=1,
  218. cardinality=self.cardinality,
  219. width=self.width,
  220. name="layer3.6")
  221. self._conv3_7 = BottleneckBlock(
  222. num_filters[2] // (width // 8),
  223. num_filters[2],
  224. stride=1,
  225. cardinality=self.cardinality,
  226. width=self.width,
  227. name="layer3.7")
  228. self._conv3_8 = BottleneckBlock(
  229. num_filters[2] // (width // 8),
  230. num_filters[2],
  231. stride=1,
  232. cardinality=self.cardinality,
  233. width=self.width,
  234. name="layer3.8")
  235. self._conv3_9 = BottleneckBlock(
  236. num_filters[2] // (width // 8),
  237. num_filters[2],
  238. stride=1,
  239. cardinality=self.cardinality,
  240. width=self.width,
  241. name="layer3.9")
  242. self._conv3_10 = BottleneckBlock(
  243. num_filters[2] // (width // 8),
  244. num_filters[2],
  245. stride=1,
  246. cardinality=self.cardinality,
  247. width=self.width,
  248. name="layer3.10")
  249. self._conv3_11 = BottleneckBlock(
  250. num_filters[2] // (width // 8),
  251. num_filters[2],
  252. stride=1,
  253. cardinality=self.cardinality,
  254. width=self.width,
  255. name="layer3.11")
  256. self._conv3_12 = BottleneckBlock(
  257. num_filters[2] // (width // 8),
  258. num_filters[2],
  259. stride=1,
  260. cardinality=self.cardinality,
  261. width=self.width,
  262. name="layer3.12")
  263. self._conv3_13 = BottleneckBlock(
  264. num_filters[2] // (width // 8),
  265. num_filters[2],
  266. stride=1,
  267. cardinality=self.cardinality,
  268. width=self.width,
  269. name="layer3.13")
  270. self._conv3_14 = BottleneckBlock(
  271. num_filters[2] // (width // 8),
  272. num_filters[2],
  273. stride=1,
  274. cardinality=self.cardinality,
  275. width=self.width,
  276. name="layer3.14")
  277. self._conv3_15 = BottleneckBlock(
  278. num_filters[2] // (width // 8),
  279. num_filters[2],
  280. stride=1,
  281. cardinality=self.cardinality,
  282. width=self.width,
  283. name="layer3.15")
  284. self._conv3_16 = BottleneckBlock(
  285. num_filters[2] // (width // 8),
  286. num_filters[2],
  287. stride=1,
  288. cardinality=self.cardinality,
  289. width=self.width,
  290. name="layer3.16")
  291. self._conv3_17 = BottleneckBlock(
  292. num_filters[2] // (width // 8),
  293. num_filters[2],
  294. stride=1,
  295. cardinality=self.cardinality,
  296. width=self.width,
  297. name="layer3.17")
  298. self._conv3_18 = BottleneckBlock(
  299. num_filters[2] // (width // 8),
  300. num_filters[2],
  301. stride=1,
  302. cardinality=self.cardinality,
  303. width=self.width,
  304. name="layer3.18")
  305. self._conv3_19 = BottleneckBlock(
  306. num_filters[2] // (width // 8),
  307. num_filters[2],
  308. stride=1,
  309. cardinality=self.cardinality,
  310. width=self.width,
  311. name="layer3.19")
  312. self._conv3_20 = BottleneckBlock(
  313. num_filters[2] // (width // 8),
  314. num_filters[2],
  315. stride=1,
  316. cardinality=self.cardinality,
  317. width=self.width,
  318. name="layer3.20")
  319. self._conv3_21 = BottleneckBlock(
  320. num_filters[2] // (width // 8),
  321. num_filters[2],
  322. stride=1,
  323. cardinality=self.cardinality,
  324. width=self.width,
  325. name="layer3.21")
  326. self._conv3_22 = BottleneckBlock(
  327. num_filters[2] // (width // 8),
  328. num_filters[2],
  329. stride=1,
  330. cardinality=self.cardinality,
  331. width=self.width,
  332. name="layer3.22")
  333. self._conv4_0 = BottleneckBlock(
  334. num_filters[2] // (width // 8),
  335. num_filters[3],
  336. stride=2,
  337. cardinality=self.cardinality,
  338. width=self.width,
  339. name="layer4.0")
  340. self._conv4_1 = BottleneckBlock(
  341. num_filters[3] // (width // 8),
  342. num_filters[3],
  343. stride=1,
  344. cardinality=self.cardinality,
  345. width=self.width,
  346. name="layer4.1")
  347. self._conv4_2 = BottleneckBlock(
  348. num_filters[3] // (width // 8),
  349. num_filters[3],
  350. stride=1,
  351. cardinality=self.cardinality,
  352. width=self.width,
  353. name="layer4.2")
  354. self._avg_pool = AdaptiveAvgPool2D(1)
  355. self._out = Linear(
  356. num_filters[3] // (width // 8),
  357. class_dim,
  358. weight_attr=ParamAttr(name="fc.weight"),
  359. bias_attr=ParamAttr(name="fc.bias"))
  360. def forward(self, inputs):
  361. x = self._conv_stem(inputs)
  362. x = self._pool(x)
  363. x = self._conv1_0(x)
  364. x = self._conv1_1(x)
  365. x = self._conv1_2(x)
  366. x = self._conv2_0(x)
  367. x = self._conv2_1(x)
  368. x = self._conv2_2(x)
  369. x = self._conv2_3(x)
  370. x = self._conv3_0(x)
  371. x = self._conv3_1(x)
  372. x = self._conv3_2(x)
  373. x = self._conv3_3(x)
  374. x = self._conv3_4(x)
  375. x = self._conv3_5(x)
  376. x = self._conv3_6(x)
  377. x = self._conv3_7(x)
  378. x = self._conv3_8(x)
  379. x = self._conv3_9(x)
  380. x = self._conv3_10(x)
  381. x = self._conv3_11(x)
  382. x = self._conv3_12(x)
  383. x = self._conv3_13(x)
  384. x = self._conv3_14(x)
  385. x = self._conv3_15(x)
  386. x = self._conv3_16(x)
  387. x = self._conv3_17(x)
  388. x = self._conv3_18(x)
  389. x = self._conv3_19(x)
  390. x = self._conv3_20(x)
  391. x = self._conv3_21(x)
  392. x = self._conv3_22(x)
  393. x = self._conv4_0(x)
  394. x = self._conv4_1(x)
  395. x = self._conv4_2(x)
  396. x = self._avg_pool(x)
  397. x = paddle.squeeze(x, axis=[2, 3])
  398. x = self._out(x)
  399. return x
  400. def ResNeXt101_32x8d_wsl(**args):
  401. model = ResNeXt101WSL(cardinality=32, width=8, **args)
  402. return model
  403. def ResNeXt101_32x16d_wsl(**args):
  404. model = ResNeXt101WSL(cardinality=32, width=16, **args)
  405. return model
  406. def ResNeXt101_32x32d_wsl(**args):
  407. model = ResNeXt101WSL(cardinality=32, width=32, **args)
  408. return model
  409. def ResNeXt101_32x48d_wsl(**args):
  410. model = ResNeXt101WSL(cardinality=32, width=48, **args)
  411. return model