inception_v3.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import paddle
  18. from paddle import ParamAttr
  19. import paddle.nn as nn
  20. import paddle.nn.functional as F
  21. from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
  22. from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
  23. from paddle.nn.initializer import Uniform
  24. import math
  25. __all__ = ["InceptionV3"]
  26. class ConvBNLayer(nn.Layer):
  27. def __init__(self,
  28. num_channels,
  29. num_filters,
  30. filter_size,
  31. stride=1,
  32. padding=0,
  33. groups=1,
  34. act="relu",
  35. name=None):
  36. super(ConvBNLayer, self).__init__()
  37. self.conv = Conv2D(
  38. in_channels=num_channels,
  39. out_channels=num_filters,
  40. kernel_size=filter_size,
  41. stride=stride,
  42. padding=padding,
  43. groups=groups,
  44. weight_attr=ParamAttr(name=name + "_weights"),
  45. bias_attr=False)
  46. self.batch_norm = BatchNorm(
  47. num_filters,
  48. act=act,
  49. param_attr=ParamAttr(name=name + "_bn_scale"),
  50. bias_attr=ParamAttr(name=name + "_bn_offset"),
  51. moving_mean_name=name + "_bn_mean",
  52. moving_variance_name=name + "_bn_variance")
  53. def forward(self, inputs):
  54. y = self.conv(inputs)
  55. y = self.batch_norm(y)
  56. return y
  57. class InceptionStem(nn.Layer):
  58. def __init__(self):
  59. super(InceptionStem, self).__init__()
  60. self.conv_1a_3x3 = ConvBNLayer(
  61. num_channels=3,
  62. num_filters=32,
  63. filter_size=3,
  64. stride=2,
  65. act="relu",
  66. name="conv_1a_3x3")
  67. self.conv_2a_3x3 = ConvBNLayer(
  68. num_channels=32,
  69. num_filters=32,
  70. filter_size=3,
  71. stride=1,
  72. act="relu",
  73. name="conv_2a_3x3")
  74. self.conv_2b_3x3 = ConvBNLayer(
  75. num_channels=32,
  76. num_filters=64,
  77. filter_size=3,
  78. padding=1,
  79. act="relu",
  80. name="conv_2b_3x3")
  81. self.maxpool = MaxPool2D(kernel_size=3, stride=2, padding=0)
  82. self.conv_3b_1x1 = ConvBNLayer(
  83. num_channels=64,
  84. num_filters=80,
  85. filter_size=1,
  86. act="relu",
  87. name="conv_3b_1x1")
  88. self.conv_4a_3x3 = ConvBNLayer(
  89. num_channels=80,
  90. num_filters=192,
  91. filter_size=3,
  92. act="relu",
  93. name="conv_4a_3x3")
  94. def forward(self, x):
  95. y = self.conv_1a_3x3(x)
  96. y = self.conv_2a_3x3(y)
  97. y = self.conv_2b_3x3(y)
  98. y = self.maxpool(y)
  99. y = self.conv_3b_1x1(y)
  100. y = self.conv_4a_3x3(y)
  101. y = self.maxpool(y)
  102. return y
  103. class InceptionA(nn.Layer):
  104. def __init__(self, num_channels, pool_features, name=None):
  105. super(InceptionA, self).__init__()
  106. self.branch1x1 = ConvBNLayer(
  107. num_channels=num_channels,
  108. num_filters=64,
  109. filter_size=1,
  110. act="relu",
  111. name="inception_a_branch1x1_" + name)
  112. self.branch5x5_1 = ConvBNLayer(
  113. num_channels=num_channels,
  114. num_filters=48,
  115. filter_size=1,
  116. act="relu",
  117. name="inception_a_branch5x5_1_" + name)
  118. self.branch5x5_2 = ConvBNLayer(
  119. num_channels=48,
  120. num_filters=64,
  121. filter_size=5,
  122. padding=2,
  123. act="relu",
  124. name="inception_a_branch5x5_2_" + name)
  125. self.branch3x3dbl_1 = ConvBNLayer(
  126. num_channels=num_channels,
  127. num_filters=64,
  128. filter_size=1,
  129. act="relu",
  130. name="inception_a_branch3x3dbl_1_" + name)
  131. self.branch3x3dbl_2 = ConvBNLayer(
  132. num_channels=64,
  133. num_filters=96,
  134. filter_size=3,
  135. padding=1,
  136. act="relu",
  137. name="inception_a_branch3x3dbl_2_" + name)
  138. self.branch3x3dbl_3 = ConvBNLayer(
  139. num_channels=96,
  140. num_filters=96,
  141. filter_size=3,
  142. padding=1,
  143. act="relu",
  144. name="inception_a_branch3x3dbl_3_" + name)
  145. self.branch_pool = AvgPool2D(
  146. kernel_size=3, stride=1, padding=1, exclusive=False)
  147. self.branch_pool_conv = ConvBNLayer(
  148. num_channels=num_channels,
  149. num_filters=pool_features,
  150. filter_size=1,
  151. act="relu",
  152. name="inception_a_branch_pool_" + name)
  153. def forward(self, x):
  154. branch1x1 = self.branch1x1(x)
  155. branch5x5 = self.branch5x5_1(x)
  156. branch5x5 = self.branch5x5_2(branch5x5)
  157. branch3x3dbl = self.branch3x3dbl_1(x)
  158. branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
  159. branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
  160. branch_pool = self.branch_pool(x)
  161. branch_pool = self.branch_pool_conv(branch_pool)
  162. outputs = paddle.concat(
  163. [branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=1)
  164. return outputs
  165. class InceptionB(nn.Layer):
  166. def __init__(self, num_channels, name=None):
  167. super(InceptionB, self).__init__()
  168. self.branch3x3 = ConvBNLayer(
  169. num_channels=num_channels,
  170. num_filters=384,
  171. filter_size=3,
  172. stride=2,
  173. act="relu",
  174. name="inception_b_branch3x3_" + name)
  175. self.branch3x3dbl_1 = ConvBNLayer(
  176. num_channels=num_channels,
  177. num_filters=64,
  178. filter_size=1,
  179. act="relu",
  180. name="inception_b_branch3x3dbl_1_" + name)
  181. self.branch3x3dbl_2 = ConvBNLayer(
  182. num_channels=64,
  183. num_filters=96,
  184. filter_size=3,
  185. padding=1,
  186. act="relu",
  187. name="inception_b_branch3x3dbl_2_" + name)
  188. self.branch3x3dbl_3 = ConvBNLayer(
  189. num_channels=96,
  190. num_filters=96,
  191. filter_size=3,
  192. stride=2,
  193. act="relu",
  194. name="inception_b_branch3x3dbl_3_" + name)
  195. self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
  196. def forward(self, x):
  197. branch3x3 = self.branch3x3(x)
  198. branch3x3dbl = self.branch3x3dbl_1(x)
  199. branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
  200. branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
  201. branch_pool = self.branch_pool(x)
  202. outputs = paddle.concat([branch3x3, branch3x3dbl, branch_pool], axis=1)
  203. return outputs
  204. class InceptionC(nn.Layer):
  205. def __init__(self, num_channels, channels_7x7, name=None):
  206. super(InceptionC, self).__init__()
  207. self.branch1x1 = ConvBNLayer(
  208. num_channels=num_channels,
  209. num_filters=192,
  210. filter_size=1,
  211. act="relu",
  212. name="inception_c_branch1x1_" + name)
  213. self.branch7x7_1 = ConvBNLayer(
  214. num_channels=num_channels,
  215. num_filters=channels_7x7,
  216. filter_size=1,
  217. stride=1,
  218. act="relu",
  219. name="inception_c_branch7x7_1_" + name)
  220. self.branch7x7_2 = ConvBNLayer(
  221. num_channels=channels_7x7,
  222. num_filters=channels_7x7,
  223. filter_size=(1, 7),
  224. stride=1,
  225. padding=(0, 3),
  226. act="relu",
  227. name="inception_c_branch7x7_2_" + name)
  228. self.branch7x7_3 = ConvBNLayer(
  229. num_channels=channels_7x7,
  230. num_filters=192,
  231. filter_size=(7, 1),
  232. stride=1,
  233. padding=(3, 0),
  234. act="relu",
  235. name="inception_c_branch7x7_3_" + name)
  236. self.branch7x7dbl_1 = ConvBNLayer(
  237. num_channels=num_channels,
  238. num_filters=channels_7x7,
  239. filter_size=1,
  240. act="relu",
  241. name="inception_c_branch7x7dbl_1_" + name)
  242. self.branch7x7dbl_2 = ConvBNLayer(
  243. num_channels=channels_7x7,
  244. num_filters=channels_7x7,
  245. filter_size=(7, 1),
  246. padding=(3, 0),
  247. act="relu",
  248. name="inception_c_branch7x7dbl_2_" + name)
  249. self.branch7x7dbl_3 = ConvBNLayer(
  250. num_channels=channels_7x7,
  251. num_filters=channels_7x7,
  252. filter_size=(1, 7),
  253. padding=(0, 3),
  254. act="relu",
  255. name="inception_c_branch7x7dbl_3_" + name)
  256. self.branch7x7dbl_4 = ConvBNLayer(
  257. num_channels=channels_7x7,
  258. num_filters=channels_7x7,
  259. filter_size=(7, 1),
  260. padding=(3, 0),
  261. act="relu",
  262. name="inception_c_branch7x7dbl_4_" + name)
  263. self.branch7x7dbl_5 = ConvBNLayer(
  264. num_channels=channels_7x7,
  265. num_filters=192,
  266. filter_size=(1, 7),
  267. padding=(0, 3),
  268. act="relu",
  269. name="inception_c_branch7x7dbl_5_" + name)
  270. self.branch_pool = AvgPool2D(
  271. kernel_size=3, stride=1, padding=1, exclusive=False)
  272. self.branch_pool_conv = ConvBNLayer(
  273. num_channels=num_channels,
  274. num_filters=192,
  275. filter_size=1,
  276. act="relu",
  277. name="inception_c_branch_pool_" + name)
  278. def forward(self, x):
  279. branch1x1 = self.branch1x1(x)
  280. branch7x7 = self.branch7x7_1(x)
  281. branch7x7 = self.branch7x7_2(branch7x7)
  282. branch7x7 = self.branch7x7_3(branch7x7)
  283. branch7x7dbl = self.branch7x7dbl_1(x)
  284. branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
  285. branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
  286. branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
  287. branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
  288. branch_pool = self.branch_pool(x)
  289. branch_pool = self.branch_pool_conv(branch_pool)
  290. outputs = paddle.concat(
  291. [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=1)
  292. return outputs
  293. class InceptionD(nn.Layer):
  294. def __init__(self, num_channels, name=None):
  295. super(InceptionD, self).__init__()
  296. self.branch3x3_1 = ConvBNLayer(
  297. num_channels=num_channels,
  298. num_filters=192,
  299. filter_size=1,
  300. act="relu",
  301. name="inception_d_branch3x3_1_" + name)
  302. self.branch3x3_2 = ConvBNLayer(
  303. num_channels=192,
  304. num_filters=320,
  305. filter_size=3,
  306. stride=2,
  307. act="relu",
  308. name="inception_d_branch3x3_2_" + name)
  309. self.branch7x7x3_1 = ConvBNLayer(
  310. num_channels=num_channels,
  311. num_filters=192,
  312. filter_size=1,
  313. act="relu",
  314. name="inception_d_branch7x7x3_1_" + name)
  315. self.branch7x7x3_2 = ConvBNLayer(
  316. num_channels=192,
  317. num_filters=192,
  318. filter_size=(1, 7),
  319. padding=(0, 3),
  320. act="relu",
  321. name="inception_d_branch7x7x3_2_" + name)
  322. self.branch7x7x3_3 = ConvBNLayer(
  323. num_channels=192,
  324. num_filters=192,
  325. filter_size=(7, 1),
  326. padding=(3, 0),
  327. act="relu",
  328. name="inception_d_branch7x7x3_3_" + name)
  329. self.branch7x7x3_4 = ConvBNLayer(
  330. num_channels=192,
  331. num_filters=192,
  332. filter_size=3,
  333. stride=2,
  334. act="relu",
  335. name="inception_d_branch7x7x3_4_" + name)
  336. self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
  337. def forward(self, x):
  338. branch3x3 = self.branch3x3_1(x)
  339. branch3x3 = self.branch3x3_2(branch3x3)
  340. branch7x7x3 = self.branch7x7x3_1(x)
  341. branch7x7x3 = self.branch7x7x3_2(branch7x7x3)
  342. branch7x7x3 = self.branch7x7x3_3(branch7x7x3)
  343. branch7x7x3 = self.branch7x7x3_4(branch7x7x3)
  344. branch_pool = self.branch_pool(x)
  345. outputs = paddle.concat([branch3x3, branch7x7x3, branch_pool], axis=1)
  346. return outputs
  347. class InceptionE(nn.Layer):
  348. def __init__(self, num_channels, name=None):
  349. super(InceptionE, self).__init__()
  350. self.branch1x1 = ConvBNLayer(
  351. num_channels=num_channels,
  352. num_filters=320,
  353. filter_size=1,
  354. act="relu",
  355. name="inception_e_branch1x1_" + name)
  356. self.branch3x3_1 = ConvBNLayer(
  357. num_channels=num_channels,
  358. num_filters=384,
  359. filter_size=1,
  360. act="relu",
  361. name="inception_e_branch3x3_1_" + name)
  362. self.branch3x3_2a = ConvBNLayer(
  363. num_channels=384,
  364. num_filters=384,
  365. filter_size=(1, 3),
  366. padding=(0, 1),
  367. act="relu",
  368. name="inception_e_branch3x3_2a_" + name)
  369. self.branch3x3_2b = ConvBNLayer(
  370. num_channels=384,
  371. num_filters=384,
  372. filter_size=(3, 1),
  373. padding=(1, 0),
  374. act="relu",
  375. name="inception_e_branch3x3_2b_" + name)
  376. self.branch3x3dbl_1 = ConvBNLayer(
  377. num_channels=num_channels,
  378. num_filters=448,
  379. filter_size=1,
  380. act="relu",
  381. name="inception_e_branch3x3dbl_1_" + name)
  382. self.branch3x3dbl_2 = ConvBNLayer(
  383. num_channels=448,
  384. num_filters=384,
  385. filter_size=3,
  386. padding=1,
  387. act="relu",
  388. name="inception_e_branch3x3dbl_2_" + name)
  389. self.branch3x3dbl_3a = ConvBNLayer(
  390. num_channels=384,
  391. num_filters=384,
  392. filter_size=(1, 3),
  393. padding=(0, 1),
  394. act="relu",
  395. name="inception_e_branch3x3dbl_3a_" + name)
  396. self.branch3x3dbl_3b = ConvBNLayer(
  397. num_channels=384,
  398. num_filters=384,
  399. filter_size=(3, 1),
  400. padding=(1, 0),
  401. act="relu",
  402. name="inception_e_branch3x3dbl_3b_" + name)
  403. self.branch_pool = AvgPool2D(
  404. kernel_size=3, stride=1, padding=1, exclusive=False)
  405. self.branch_pool_conv = ConvBNLayer(
  406. num_channels=num_channels,
  407. num_filters=192,
  408. filter_size=1,
  409. act="relu",
  410. name="inception_e_branch_pool_" + name)
  411. def forward(self, x):
  412. branch1x1 = self.branch1x1(x)
  413. branch3x3 = self.branch3x3_1(x)
  414. branch3x3 = [
  415. self.branch3x3_2a(branch3x3),
  416. self.branch3x3_2b(branch3x3),
  417. ]
  418. branch3x3 = paddle.concat(branch3x3, axis=1)
  419. branch3x3dbl = self.branch3x3dbl_1(x)
  420. branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
  421. branch3x3dbl = [
  422. self.branch3x3dbl_3a(branch3x3dbl),
  423. self.branch3x3dbl_3b(branch3x3dbl),
  424. ]
  425. branch3x3dbl = paddle.concat(branch3x3dbl, axis=1)
  426. branch_pool = self.branch_pool(x)
  427. branch_pool = self.branch_pool_conv(branch_pool)
  428. outputs = paddle.concat(
  429. [branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=1)
  430. return outputs
  431. class InceptionV3(nn.Layer):
  432. def __init__(self, class_dim=1000):
  433. super(InceptionV3, self).__init__()
  434. self.inception_a_list = [[192, 256, 288], [32, 64, 64]]
  435. self.inception_c_list = [[768, 768, 768, 768], [128, 160, 160, 192]]
  436. self.inception_stem = InceptionStem()
  437. self.inception_block_list = []
  438. for i in range(len(self.inception_a_list[0])):
  439. inception_a = self.add_sublayer(
  440. "inception_a_" + str(i + 1),
  441. InceptionA(
  442. self.inception_a_list[0][i],
  443. self.inception_a_list[1][i],
  444. name=str(i + 1)))
  445. self.inception_block_list.append(inception_a)
  446. inception_b = self.add_sublayer(
  447. "nception_b_1", InceptionB(
  448. 288, name="1"))
  449. self.inception_block_list.append(inception_b)
  450. for i in range(len(self.inception_c_list[0])):
  451. inception_c = self.add_sublayer(
  452. "inception_c_" + str(i + 1),
  453. InceptionC(
  454. self.inception_c_list[0][i],
  455. self.inception_c_list[1][i],
  456. name=str(i + 1)))
  457. self.inception_block_list.append(inception_c)
  458. inception_d = self.add_sublayer(
  459. "inception_d_1", InceptionD(
  460. 768, name="1"))
  461. self.inception_block_list.append(inception_d)
  462. inception_e = self.add_sublayer(
  463. "inception_e_1", InceptionE(
  464. 1280, name="1"))
  465. self.inception_block_list.append(inception_e)
  466. inception_e = self.add_sublayer(
  467. "inception_e_2", InceptionE(
  468. 2048, name="2"))
  469. self.inception_block_list.append(inception_e)
  470. self.gap = AdaptiveAvgPool2D(1)
  471. self.drop = Dropout(p=0.2, mode="downscale_in_infer")
  472. stdv = 1.0 / math.sqrt(2048 * 1.0)
  473. self.out = Linear(
  474. 2048,
  475. class_dim,
  476. weight_attr=ParamAttr(
  477. initializer=Uniform(-stdv, stdv), name="fc_weights"),
  478. bias_attr=ParamAttr(name="fc_offset"))
  479. def forward(self, x):
  480. y = self.inception_stem(x)
  481. for inception_block in self.inception_block_list:
  482. y = inception_block(y)
  483. y = self.gap(y)
  484. y = paddle.reshape(y, shape=[-1, 2048])
  485. y = self.drop(y)
  486. y = self.out(y)
  487. return y