inception_v3.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import paddle
  18. from paddle import ParamAttr
  19. import paddle.nn as nn
  20. import paddle.nn.functional as F
  21. from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
  22. from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
  23. from paddle.nn.initializer import Uniform
  24. import math
  25. __all__ = ["InceptionV3"]
  26. class ConvBNLayer(nn.Layer):
  27. def __init__(self,
  28. num_channels,
  29. num_filters,
  30. filter_size,
  31. stride=1,
  32. padding=0,
  33. groups=1,
  34. act="relu",
  35. name=None):
  36. super(ConvBNLayer, self).__init__()
  37. self.conv = Conv2D(
  38. in_channels=num_channels,
  39. out_channels=num_filters,
  40. kernel_size=filter_size,
  41. stride=stride,
  42. padding=padding,
  43. groups=groups,
  44. weight_attr=ParamAttr(name=name+"_weights"),
  45. bias_attr=False)
  46. self.batch_norm = BatchNorm(
  47. num_filters,
  48. act=act,
  49. param_attr=ParamAttr(name=name+"_bn_scale"),
  50. bias_attr=ParamAttr(name=name+"_bn_offset"),
  51. moving_mean_name=name+"_bn_mean",
  52. moving_variance_name=name+"_bn_variance")
  53. def forward(self, inputs):
  54. y = self.conv(inputs)
  55. y = self.batch_norm(y)
  56. return y
  57. class InceptionStem(nn.Layer):
  58. def __init__(self):
  59. super(InceptionStem, self).__init__()
  60. self.conv_1a_3x3 = ConvBNLayer(num_channels=3,
  61. num_filters=32,
  62. filter_size=3,
  63. stride=2,
  64. act="relu",
  65. name="conv_1a_3x3")
  66. self.conv_2a_3x3 = ConvBNLayer(num_channels=32,
  67. num_filters=32,
  68. filter_size=3,
  69. stride=1,
  70. act="relu",
  71. name="conv_2a_3x3")
  72. self.conv_2b_3x3 = ConvBNLayer(num_channels=32,
  73. num_filters=64,
  74. filter_size=3,
  75. padding=1,
  76. act="relu",
  77. name="conv_2b_3x3")
  78. self.maxpool = MaxPool2D(kernel_size=3, stride=2, padding=0)
  79. self.conv_3b_1x1 = ConvBNLayer(num_channels=64,
  80. num_filters=80,
  81. filter_size=1,
  82. act="relu",
  83. name="conv_3b_1x1")
  84. self.conv_4a_3x3 = ConvBNLayer(num_channels=80,
  85. num_filters=192,
  86. filter_size=3,
  87. act="relu",
  88. name="conv_4a_3x3")
  89. def forward(self, x):
  90. y = self.conv_1a_3x3(x)
  91. y = self.conv_2a_3x3(y)
  92. y = self.conv_2b_3x3(y)
  93. y = self.maxpool(y)
  94. y = self.conv_3b_1x1(y)
  95. y = self.conv_4a_3x3(y)
  96. y = self.maxpool(y)
  97. return y
  98. class InceptionA(nn.Layer):
  99. def __init__(self, num_channels, pool_features, name=None):
  100. super(InceptionA, self).__init__()
  101. self.branch1x1 = ConvBNLayer(num_channels=num_channels,
  102. num_filters=64,
  103. filter_size=1,
  104. act="relu",
  105. name="inception_a_branch1x1_"+name)
  106. self.branch5x5_1 = ConvBNLayer(num_channels=num_channels,
  107. num_filters=48,
  108. filter_size=1,
  109. act="relu",
  110. name="inception_a_branch5x5_1_"+name)
  111. self.branch5x5_2 = ConvBNLayer(num_channels=48,
  112. num_filters=64,
  113. filter_size=5,
  114. padding=2,
  115. act="relu",
  116. name="inception_a_branch5x5_2_"+name)
  117. self.branch3x3dbl_1 = ConvBNLayer(num_channels=num_channels,
  118. num_filters=64,
  119. filter_size=1,
  120. act="relu",
  121. name="inception_a_branch3x3dbl_1_"+name)
  122. self.branch3x3dbl_2 = ConvBNLayer(num_channels=64,
  123. num_filters=96,
  124. filter_size=3,
  125. padding=1,
  126. act="relu",
  127. name="inception_a_branch3x3dbl_2_"+name)
  128. self.branch3x3dbl_3 = ConvBNLayer(num_channels=96,
  129. num_filters=96,
  130. filter_size=3,
  131. padding=1,
  132. act="relu",
  133. name="inception_a_branch3x3dbl_3_"+name)
  134. self.branch_pool = AvgPool2D(kernel_size=3, stride=1, padding=1, exclusive=False)
  135. self.branch_pool_conv = ConvBNLayer(num_channels=num_channels,
  136. num_filters=pool_features,
  137. filter_size=1,
  138. act="relu",
  139. name="inception_a_branch_pool_"+name)
  140. def forward(self, x):
  141. branch1x1 = self.branch1x1(x)
  142. branch5x5 = self.branch5x5_1(x)
  143. branch5x5 = self.branch5x5_2(branch5x5)
  144. branch3x3dbl = self.branch3x3dbl_1(x)
  145. branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
  146. branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
  147. branch_pool = self.branch_pool(x)
  148. branch_pool = self.branch_pool_conv(branch_pool)
  149. outputs = paddle.concat([branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=1)
  150. return outputs
  151. class InceptionB(nn.Layer):
  152. def __init__(self, num_channels, name=None):
  153. super(InceptionB, self).__init__()
  154. self.branch3x3 = ConvBNLayer(num_channels=num_channels,
  155. num_filters=384,
  156. filter_size=3,
  157. stride=2,
  158. act="relu",
  159. name="inception_b_branch3x3_"+name)
  160. self.branch3x3dbl_1 = ConvBNLayer(num_channels=num_channels,
  161. num_filters=64,
  162. filter_size=1,
  163. act="relu",
  164. name="inception_b_branch3x3dbl_1_"+name)
  165. self.branch3x3dbl_2 = ConvBNLayer(num_channels=64,
  166. num_filters=96,
  167. filter_size=3,
  168. padding=1,
  169. act="relu",
  170. name="inception_b_branch3x3dbl_2_"+name)
  171. self.branch3x3dbl_3 = ConvBNLayer(num_channels=96,
  172. num_filters=96,
  173. filter_size=3,
  174. stride=2,
  175. act="relu",
  176. name="inception_b_branch3x3dbl_3_"+name)
  177. self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
  178. def forward(self, x):
  179. branch3x3 = self.branch3x3(x)
  180. branch3x3dbl = self.branch3x3dbl_1(x)
  181. branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
  182. branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
  183. branch_pool = self.branch_pool(x)
  184. outputs = paddle.concat([branch3x3, branch3x3dbl, branch_pool], axis=1)
  185. return outputs
  186. class InceptionC(nn.Layer):
  187. def __init__(self, num_channels, channels_7x7, name=None):
  188. super(InceptionC, self).__init__()
  189. self.branch1x1 = ConvBNLayer(num_channels=num_channels,
  190. num_filters=192,
  191. filter_size=1,
  192. act="relu",
  193. name="inception_c_branch1x1_"+name)
  194. self.branch7x7_1 = ConvBNLayer(num_channels=num_channels,
  195. num_filters=channels_7x7,
  196. filter_size=1,
  197. stride=1,
  198. act="relu",
  199. name="inception_c_branch7x7_1_"+name)
  200. self.branch7x7_2 = ConvBNLayer(num_channels=channels_7x7,
  201. num_filters=channels_7x7,
  202. filter_size=(1, 7),
  203. stride=1,
  204. padding=(0, 3),
  205. act="relu",
  206. name="inception_c_branch7x7_2_"+name)
  207. self.branch7x7_3 = ConvBNLayer(num_channels=channels_7x7,
  208. num_filters=192,
  209. filter_size=(7, 1),
  210. stride=1,
  211. padding=(3, 0),
  212. act="relu",
  213. name="inception_c_branch7x7_3_"+name)
  214. self.branch7x7dbl_1 = ConvBNLayer(num_channels=num_channels,
  215. num_filters=channels_7x7,
  216. filter_size=1,
  217. act="relu",
  218. name="inception_c_branch7x7dbl_1_"+name)
  219. self.branch7x7dbl_2 = ConvBNLayer(num_channels=channels_7x7,
  220. num_filters=channels_7x7,
  221. filter_size=(7, 1),
  222. padding = (3, 0),
  223. act="relu",
  224. name="inception_c_branch7x7dbl_2_"+name)
  225. self.branch7x7dbl_3 = ConvBNLayer(num_channels=channels_7x7,
  226. num_filters=channels_7x7,
  227. filter_size=(1, 7),
  228. padding = (0, 3),
  229. act="relu",
  230. name="inception_c_branch7x7dbl_3_"+name)
  231. self.branch7x7dbl_4 = ConvBNLayer(num_channels=channels_7x7,
  232. num_filters=channels_7x7,
  233. filter_size=(7, 1),
  234. padding = (3, 0),
  235. act="relu",
  236. name="inception_c_branch7x7dbl_4_"+name)
  237. self.branch7x7dbl_5 = ConvBNLayer(num_channels=channels_7x7,
  238. num_filters=192,
  239. filter_size=(1, 7),
  240. padding = (0, 3),
  241. act="relu",
  242. name="inception_c_branch7x7dbl_5_"+name)
  243. self.branch_pool = AvgPool2D(kernel_size=3, stride=1, padding=1, exclusive=False)
  244. self.branch_pool_conv = ConvBNLayer(num_channels=num_channels,
  245. num_filters=192,
  246. filter_size=1,
  247. act="relu",
  248. name="inception_c_branch_pool_"+name)
  249. def forward(self, x):
  250. branch1x1 = self.branch1x1(x)
  251. branch7x7 = self.branch7x7_1(x)
  252. branch7x7 = self.branch7x7_2(branch7x7)
  253. branch7x7 = self.branch7x7_3(branch7x7)
  254. branch7x7dbl = self.branch7x7dbl_1(x)
  255. branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
  256. branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
  257. branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
  258. branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
  259. branch_pool = self.branch_pool(x)
  260. branch_pool = self.branch_pool_conv(branch_pool)
  261. outputs = paddle.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=1)
  262. return outputs
  263. class InceptionD(nn.Layer):
  264. def __init__(self, num_channels, name=None):
  265. super(InceptionD, self).__init__()
  266. self.branch3x3_1 = ConvBNLayer(num_channels=num_channels,
  267. num_filters=192,
  268. filter_size=1,
  269. act="relu",
  270. name="inception_d_branch3x3_1_"+name)
  271. self.branch3x3_2 = ConvBNLayer(num_channels=192,
  272. num_filters=320,
  273. filter_size=3,
  274. stride=2,
  275. act="relu",
  276. name="inception_d_branch3x3_2_"+name)
  277. self.branch7x7x3_1 = ConvBNLayer(num_channels=num_channels,
  278. num_filters=192,
  279. filter_size=1,
  280. act="relu",
  281. name="inception_d_branch7x7x3_1_"+name)
  282. self.branch7x7x3_2 = ConvBNLayer(num_channels=192,
  283. num_filters=192,
  284. filter_size=(1, 7),
  285. padding=(0, 3),
  286. act="relu",
  287. name="inception_d_branch7x7x3_2_"+name)
  288. self.branch7x7x3_3 = ConvBNLayer(num_channels=192,
  289. num_filters=192,
  290. filter_size=(7, 1),
  291. padding=(3, 0),
  292. act="relu",
  293. name="inception_d_branch7x7x3_3_"+name)
  294. self.branch7x7x3_4 = ConvBNLayer(num_channels=192,
  295. num_filters=192,
  296. filter_size=3,
  297. stride=2,
  298. act="relu",
  299. name="inception_d_branch7x7x3_4_"+name)
  300. self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
  301. def forward(self, x):
  302. branch3x3 = self.branch3x3_1(x)
  303. branch3x3 = self.branch3x3_2(branch3x3)
  304. branch7x7x3 = self.branch7x7x3_1(x)
  305. branch7x7x3 = self.branch7x7x3_2(branch7x7x3)
  306. branch7x7x3 = self.branch7x7x3_3(branch7x7x3)
  307. branch7x7x3 = self.branch7x7x3_4(branch7x7x3)
  308. branch_pool = self.branch_pool(x)
  309. outputs = paddle.concat([branch3x3, branch7x7x3, branch_pool], axis=1)
  310. return outputs
  311. class InceptionE(nn.Layer):
  312. def __init__(self, num_channels, name=None):
  313. super(InceptionE, self).__init__()
  314. self.branch1x1 = ConvBNLayer(num_channels=num_channels,
  315. num_filters=320,
  316. filter_size=1,
  317. act="relu",
  318. name="inception_e_branch1x1_"+name)
  319. self.branch3x3_1 = ConvBNLayer(num_channels=num_channels,
  320. num_filters=384,
  321. filter_size=1,
  322. act="relu",
  323. name="inception_e_branch3x3_1_"+name)
  324. self.branch3x3_2a = ConvBNLayer(num_channels=384,
  325. num_filters=384,
  326. filter_size=(1, 3),
  327. padding=(0, 1),
  328. act="relu",
  329. name="inception_e_branch3x3_2a_"+name)
  330. self.branch3x3_2b = ConvBNLayer(num_channels=384,
  331. num_filters=384,
  332. filter_size=(3, 1),
  333. padding=(1, 0),
  334. act="relu",
  335. name="inception_e_branch3x3_2b_"+name)
  336. self.branch3x3dbl_1 = ConvBNLayer(num_channels=num_channels,
  337. num_filters=448,
  338. filter_size=1,
  339. act="relu",
  340. name="inception_e_branch3x3dbl_1_"+name)
  341. self.branch3x3dbl_2 = ConvBNLayer(num_channels=448,
  342. num_filters=384,
  343. filter_size=3,
  344. padding=1,
  345. act="relu",
  346. name="inception_e_branch3x3dbl_2_"+name)
  347. self.branch3x3dbl_3a = ConvBNLayer(num_channels=384,
  348. num_filters=384,
  349. filter_size=(1, 3),
  350. padding=(0, 1),
  351. act="relu",
  352. name="inception_e_branch3x3dbl_3a_"+name)
  353. self.branch3x3dbl_3b = ConvBNLayer(num_channels=384,
  354. num_filters=384,
  355. filter_size=(3, 1),
  356. padding=(1, 0),
  357. act="relu",
  358. name="inception_e_branch3x3dbl_3b_"+name)
  359. self.branch_pool = AvgPool2D(kernel_size=3, stride=1, padding=1, exclusive=False)
  360. self.branch_pool_conv = ConvBNLayer(num_channels=num_channels,
  361. num_filters=192,
  362. filter_size=1,
  363. act="relu",
  364. name="inception_e_branch_pool_"+name)
  365. def forward(self, x):
  366. branch1x1 = self.branch1x1(x)
  367. branch3x3 = self.branch3x3_1(x)
  368. branch3x3 = [
  369. self.branch3x3_2a(branch3x3),
  370. self.branch3x3_2b(branch3x3),
  371. ]
  372. branch3x3 = paddle.concat(branch3x3, axis=1)
  373. branch3x3dbl = self.branch3x3dbl_1(x)
  374. branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
  375. branch3x3dbl = [
  376. self.branch3x3dbl_3a(branch3x3dbl),
  377. self.branch3x3dbl_3b(branch3x3dbl),
  378. ]
  379. branch3x3dbl = paddle.concat(branch3x3dbl, axis=1)
  380. branch_pool = self.branch_pool(x)
  381. branch_pool = self.branch_pool_conv(branch_pool)
  382. outputs = paddle.concat([branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=1)
  383. return outputs
  384. class InceptionV3(nn.Layer):
  385. def __init__(self, class_dim=1000):
  386. super(InceptionV3, self).__init__()
  387. self.inception_a_list = [[192, 256, 288], [32, 64, 64]]
  388. self.inception_c_list = [[768, 768, 768, 768], [128, 160, 160, 192]]
  389. self.inception_stem = InceptionStem()
  390. self.inception_block_list = []
  391. for i in range(len(self.inception_a_list[0])):
  392. inception_a = self.add_sublayer("inception_a_"+str(i+1),
  393. InceptionA(self.inception_a_list[0][i],
  394. self.inception_a_list[1][i],
  395. name=str(i+1)))
  396. self.inception_block_list.append(inception_a)
  397. inception_b = self.add_sublayer("nception_b_1",
  398. InceptionB(288, name="1"))
  399. self.inception_block_list.append(inception_b)
  400. for i in range(len(self.inception_c_list[0])):
  401. inception_c = self.add_sublayer("inception_c_"+str(i+1),
  402. InceptionC(self.inception_c_list[0][i],
  403. self.inception_c_list[1][i],
  404. name=str(i+1)))
  405. self.inception_block_list.append(inception_c)
  406. inception_d = self.add_sublayer("inception_d_1",
  407. InceptionD(768, name="1"))
  408. self.inception_block_list.append(inception_d)
  409. inception_e = self.add_sublayer("inception_e_1",
  410. InceptionE(1280, name="1"))
  411. self.inception_block_list.append(inception_e)
  412. inception_e = self.add_sublayer("inception_e_2",
  413. InceptionE(2048, name="2"))
  414. self.inception_block_list.append(inception_e)
  415. self.gap = AdaptiveAvgPool2D(1)
  416. self.drop = Dropout(p=0.2, mode="downscale_in_infer")
  417. stdv = 1.0 / math.sqrt(2048 * 1.0)
  418. self.out = Linear(
  419. 2048,
  420. class_dim,
  421. weight_attr=ParamAttr(
  422. initializer=Uniform(-stdv, stdv), name="fc_weights"),
  423. bias_attr=ParamAttr(name="fc_offset"))
  424. def forward(self, x):
  425. y = self.inception_stem(x)
  426. for inception_block in self.inception_block_list:
  427. y = inception_block(y)
  428. y = self.gap(y)
  429. y = paddle.reshape(y, shape=[-1, 2048])
  430. y = self.drop(y)
  431. y = self.out(y)
  432. return y