arch_config.yaml 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777
  1. ch_ptocr_mobile_v2.0_cls_infer:
  2. model_type: cls
  3. algorithm: CLS
  4. Transform:
  5. Backbone:
  6. name: MobileNetV3
  7. scale: 0.35
  8. model_name: small
  9. Neck:
  10. Head:
  11. name: ClsHead
  12. class_dim: 2
  13. Multilingual_PP-OCRv3_det_infer:
  14. model_type: det
  15. algorithm: DB
  16. Transform:
  17. Backbone:
  18. name: MobileNetV3
  19. scale: 0.5
  20. model_name: large
  21. disable_se: True
  22. Neck:
  23. name: RSEFPN
  24. out_channels: 96
  25. shortcut: True
  26. Head:
  27. name: DBHead
  28. k: 50
  29. en_PP-OCRv3_det_infer:
  30. model_type: det
  31. algorithm: DB
  32. Transform:
  33. Backbone:
  34. name: MobileNetV3
  35. scale: 0.5
  36. model_name: large
  37. disable_se: True
  38. Neck:
  39. name: RSEFPN
  40. out_channels: 96
  41. shortcut: True
  42. Head:
  43. name: DBHead
  44. k: 50
  45. ch_PP-OCRv3_det_infer:
  46. model_type: det
  47. algorithm: DB
  48. Transform:
  49. Backbone:
  50. name: MobileNetV3
  51. scale: 0.5
  52. model_name: large
  53. disable_se: True
  54. Neck:
  55. name: RSEFPN
  56. out_channels: 96
  57. shortcut: True
  58. Head:
  59. name: DBHead
  60. k: 50
  61. en_PP-OCRv4_rec_infer:
  62. model_type: rec
  63. algorithm: SVTR_LCNet
  64. Transform:
  65. Backbone:
  66. name: PPLCNetV3
  67. scale: 0.95
  68. Head:
  69. name: MultiHead
  70. out_channels_list:
  71. CTCLabelDecode: 97 #'blank' + ...(62) + ' '
  72. head_list:
  73. - CTCHead:
  74. Neck:
  75. name: svtr
  76. dims: 120
  77. depth: 2
  78. hidden_dims: 120
  79. kernel_size: [ 1, 3 ]
  80. use_guide: True
  81. Head:
  82. fc_decay: 0.00001
  83. - NRTRHead:
  84. nrtr_dim: 384
  85. max_text_length: 25
  86. ch_PP-OCRv4_det_infer:
  87. model_type: det
  88. algorithm: DB
  89. Transform: null
  90. Backbone:
  91. name: PPLCNetV3
  92. scale: 0.75
  93. det: True
  94. Neck:
  95. name: RSEFPN
  96. out_channels: 96
  97. shortcut: True
  98. Head:
  99. name: DBHead
  100. k: 50
  101. ch_PP-OCRv5_det_infer:
  102. model_type: det
  103. algorithm: DB
  104. Transform: null
  105. Backbone:
  106. name: PPLCNetV3
  107. scale: 0.75
  108. det: True
  109. Neck:
  110. name: RSEFPN
  111. out_channels: 96
  112. shortcut: True
  113. Head:
  114. name: DBHead
  115. k: 50
  116. ch_PP-OCRv5_det_server_infer:
  117. model_type: det
  118. algorithm: DB
  119. Transform: null
  120. Backbone:
  121. name: PPHGNetV2_B4
  122. det: True
  123. Neck:
  124. name: LKPAN
  125. out_channels: 256
  126. intracl: True
  127. Head:
  128. name: PFHeadLocal
  129. k: 50
  130. mode: "large"
  131. ch_PP-OCRv4_det_server_infer:
  132. model_type: det
  133. algorithm: DB
  134. Transform: null
  135. Backbone:
  136. name: PPHGNet_small
  137. det: True
  138. Neck:
  139. name: LKPAN
  140. out_channels: 256
  141. intracl: true
  142. Head:
  143. name: PFHeadLocal
  144. k: 50
  145. mode: "large"
  146. ch_PP-OCRv4_rec_infer:
  147. model_type: rec
  148. algorithm: SVTR_LCNet
  149. Transform:
  150. Backbone:
  151. name: PPLCNetV3
  152. scale: 0.95
  153. Head:
  154. name: MultiHead
  155. out_channels_list:
  156. CTCLabelDecode: 6625 #'blank' + ...(6623) + ' '
  157. head_list:
  158. - CTCHead:
  159. Neck:
  160. name: svtr
  161. dims: 120
  162. depth: 2
  163. hidden_dims: 120
  164. kernel_size: [ 1, 3 ]
  165. use_guide: True
  166. Head:
  167. fc_decay: 0.00001
  168. - NRTRHead:
  169. nrtr_dim: 384
  170. max_text_length: 25
  171. ch_PP-OCRv4_rec_server_infer:
  172. model_type: rec
  173. algorithm: SVTR_HGNet
  174. Transform:
  175. Backbone:
  176. name: PPHGNet_small
  177. Head:
  178. name: MultiHead
  179. out_channels_list:
  180. CTCLabelDecode: 6625 #'blank' + ...(6623) + ' '
  181. head_list:
  182. - CTCHead:
  183. Neck:
  184. name: svtr
  185. dims: 120
  186. depth: 2
  187. hidden_dims: 120
  188. kernel_size: [ 1, 3 ]
  189. use_guide: True
  190. Head:
  191. fc_decay: 0.00001
  192. - NRTRHead:
  193. nrtr_dim: 384
  194. max_text_length: 25
  195. ch_PP-OCRv4_rec_server_doc_infer:
  196. model_type: rec
  197. algorithm: SVTR_HGNet
  198. Transform:
  199. Backbone:
  200. name: PPHGNet_small
  201. Head:
  202. name: MultiHead
  203. out_channels_list:
  204. CTCLabelDecode: 15631
  205. head_list:
  206. - CTCHead:
  207. Neck:
  208. name: svtr
  209. dims: 120
  210. depth: 2
  211. hidden_dims: 120
  212. kernel_size: [ 1, 3 ]
  213. use_guide: True
  214. Head:
  215. fc_decay: 0.00001
  216. - NRTRHead:
  217. nrtr_dim: 384
  218. max_text_length: 25
  219. ch_PP-OCRv5_rec_server_infer:
  220. model_type: rec
  221. algorithm: SVTR_HGNet
  222. Transform:
  223. Backbone:
  224. name: PPHGNetV2_B4
  225. text_rec: True
  226. Head:
  227. name: MultiHead
  228. out_channels_list:
  229. CTCLabelDecode: 18385
  230. head_list:
  231. - CTCHead:
  232. Neck:
  233. name: svtr
  234. dims: 120
  235. depth: 2
  236. hidden_dims: 120
  237. kernel_size: [ 1, 3 ]
  238. use_guide: True
  239. Head:
  240. fc_decay: 0.00001
  241. - NRTRHead:
  242. nrtr_dim: 384
  243. max_text_length: 25
  244. ch_PP-OCRv5_rec_infer:
  245. model_type: rec
  246. algorithm: SVTR_HGNet
  247. Transform:
  248. Backbone:
  249. name: PPLCNetV3
  250. scale: 0.95
  251. Head:
  252. name: MultiHead
  253. out_channels_list:
  254. CTCLabelDecode: 18385
  255. head_list:
  256. - CTCHead:
  257. Neck:
  258. name: svtr
  259. dims: 120
  260. depth: 2
  261. hidden_dims: 120
  262. kernel_size: [ 1, 3 ]
  263. use_guide: True
  264. Head:
  265. fc_decay: 0.00001
  266. - NRTRHead:
  267. nrtr_dim: 384
  268. max_text_length: 25
  269. chinese_cht_PP-OCRv3_rec_infer:
  270. model_type: rec
  271. algorithm: SVTR
  272. Transform:
  273. Backbone:
  274. name: MobileNetV1Enhance
  275. scale: 0.5
  276. last_conv_stride: [1, 2]
  277. last_pool_type: avg
  278. Neck:
  279. name: SequenceEncoder
  280. encoder_type: svtr
  281. dims: 64
  282. depth: 2
  283. hidden_dims: 120
  284. use_guide: True
  285. Head:
  286. name: CTCHead
  287. # out_channels: 8423
  288. fc_decay: 0.00001
  289. latin_PP-OCRv3_rec_infer:
  290. model_type: rec
  291. algorithm: SVTR
  292. Transform:
  293. Backbone:
  294. name: MobileNetV1Enhance
  295. scale: 0.5
  296. last_conv_stride: [ 1, 2 ]
  297. last_pool_type: avg
  298. Neck:
  299. name: SequenceEncoder
  300. encoder_type: svtr
  301. dims: 64
  302. depth: 2
  303. hidden_dims: 120
  304. use_guide: True
  305. Head:
  306. name: CTCHead
  307. # out_channels: 187
  308. fc_decay: 0.00001
  309. cyrillic_PP-OCRv3_rec_infer:
  310. model_type: rec
  311. algorithm: SVTR
  312. Transform:
  313. Backbone:
  314. name: MobileNetV1Enhance
  315. scale: 0.5
  316. last_conv_stride: [ 1, 2 ]
  317. last_pool_type: avg
  318. Neck:
  319. name: SequenceEncoder
  320. encoder_type: svtr
  321. dims: 64
  322. depth: 2
  323. hidden_dims: 120
  324. use_guide: True
  325. Head:
  326. name: CTCHead
  327. # out_channels: 165
  328. fc_decay: 0.00001
  329. arabic_PP-OCRv3_rec_infer:
  330. model_type: rec
  331. algorithm: SVTR
  332. Transform:
  333. Backbone:
  334. name: MobileNetV1Enhance
  335. scale: 0.5
  336. last_conv_stride: [ 1, 2 ]
  337. last_pool_type: avg
  338. Neck:
  339. name: SequenceEncoder
  340. encoder_type: svtr
  341. dims: 64
  342. depth: 2
  343. hidden_dims: 120
  344. use_guide: True
  345. Head:
  346. name: CTCHead
  347. # out_channels: 164
  348. fc_decay: 0.00001
  349. korean_PP-OCRv3_rec_infer:
  350. model_type: rec
  351. algorithm: SVTR
  352. Transform:
  353. Backbone:
  354. name: MobileNetV1Enhance
  355. scale: 0.5
  356. last_conv_stride: [ 1, 2 ]
  357. last_pool_type: avg
  358. Neck:
  359. name: SequenceEncoder
  360. encoder_type: svtr
  361. dims: 64
  362. depth: 2
  363. hidden_dims: 120
  364. use_guide: True
  365. Head:
  366. name: CTCHead
  367. # out_channels: 3690
  368. fc_decay: 0.00001
  369. japan_PP-OCRv3_rec_infer:
  370. model_type: rec
  371. algorithm: SVTR
  372. Transform:
  373. Backbone:
  374. name: MobileNetV1Enhance
  375. scale: 0.5
  376. last_conv_stride: [ 1, 2 ]
  377. last_pool_type: avg
  378. Neck:
  379. name: SequenceEncoder
  380. encoder_type: svtr
  381. dims: 64
  382. depth: 2
  383. hidden_dims: 120
  384. use_guide: True
  385. Head:
  386. name: CTCHead
  387. # out_channels: 4401
  388. fc_decay: 0.00001
  389. ta_PP-OCRv3_rec_infer:
  390. model_type: rec
  391. algorithm: SVTR
  392. Transform:
  393. Backbone:
  394. name: MobileNetV1Enhance
  395. scale: 0.5
  396. last_conv_stride: [ 1, 2 ]
  397. last_pool_type: avg
  398. Neck:
  399. name: SequenceEncoder
  400. encoder_type: svtr
  401. dims: 64
  402. depth: 2
  403. hidden_dims: 120
  404. use_guide: True
  405. Head:
  406. name: CTCHead
  407. # out_channels: 130
  408. fc_decay: 0.00001
  409. te_PP-OCRv3_rec_infer:
  410. model_type: rec
  411. algorithm: SVTR
  412. Transform:
  413. Backbone:
  414. name: MobileNetV1Enhance
  415. scale: 0.5
  416. last_conv_stride: [ 1, 2 ]
  417. last_pool_type: avg
  418. Neck:
  419. name: SequenceEncoder
  420. encoder_type: svtr
  421. dims: 64
  422. depth: 2
  423. hidden_dims: 120
  424. use_guide: True
  425. Head:
  426. name: CTCHead
  427. # out_channels: 153
  428. fc_decay: 0.00001
  429. ka_PP-OCRv3_rec_infer:
  430. model_type: rec
  431. algorithm: SVTR
  432. Transform:
  433. Backbone:
  434. name: MobileNetV1Enhance
  435. scale: 0.5
  436. last_conv_stride: [ 1, 2 ]
  437. last_pool_type: avg
  438. Neck:
  439. name: SequenceEncoder
  440. encoder_type: svtr
  441. dims: 64
  442. depth: 2
  443. hidden_dims: 120
  444. use_guide: True
  445. Head:
  446. name: CTCHead
  447. # out_channels: 155
  448. fc_decay: 0.00001
  449. devanagari_PP-OCRv3_rec_infer:
  450. model_type: rec
  451. algorithm: SVTR
  452. Transform:
  453. Backbone:
  454. name: MobileNetV1Enhance
  455. scale: 0.5
  456. last_conv_stride: [ 1, 2 ]
  457. last_pool_type: avg
  458. Neck:
  459. name: SequenceEncoder
  460. encoder_type: svtr
  461. dims: 64
  462. depth: 2
  463. hidden_dims: 120
  464. use_guide: True
  465. Head:
  466. name: CTCHead
  467. # out_channels: 169
  468. fc_decay: 0.00001
  469. korean_PP-OCRv5_rec_infer:
  470. model_type: rec
  471. algorithm: SVTR_HGNet
  472. Transform:
  473. Backbone:
  474. name: PPLCNetV3
  475. scale: 0.95
  476. Head:
  477. name: MultiHead
  478. out_channels_list:
  479. CTCLabelDecode: 11947
  480. head_list:
  481. - CTCHead:
  482. Neck:
  483. name: svtr
  484. dims: 120
  485. depth: 2
  486. hidden_dims: 120
  487. kernel_size: [ 1, 3 ]
  488. use_guide: True
  489. Head:
  490. fc_decay: 0.00001
  491. - NRTRHead:
  492. nrtr_dim: 384
  493. max_text_length: 25
  494. latin_PP-OCRv5_rec_infer:
  495. model_type: rec
  496. algorithm: SVTR_HGNet
  497. Transform:
  498. Backbone:
  499. name: PPLCNetV3
  500. scale: 0.95
  501. Head:
  502. name: MultiHead
  503. out_channels_list:
  504. CTCLabelDecode: 504
  505. head_list:
  506. - CTCHead:
  507. Neck:
  508. name: svtr
  509. dims: 120
  510. depth: 2
  511. hidden_dims: 120
  512. kernel_size: [ 1, 3 ]
  513. use_guide: True
  514. Head:
  515. fc_decay: 0.00001
  516. - NRTRHead:
  517. nrtr_dim: 384
  518. max_text_length: 25
  519. eslav_PP-OCRv5_rec_infer:
  520. model_type: rec
  521. algorithm: SVTR_HGNet
  522. Transform:
  523. Backbone:
  524. name: PPLCNetV3
  525. scale: 0.95
  526. Head:
  527. name: MultiHead
  528. out_channels_list:
  529. CTCLabelDecode: 519
  530. head_list:
  531. - CTCHead:
  532. Neck:
  533. name: svtr
  534. dims: 120
  535. depth: 2
  536. hidden_dims: 120
  537. kernel_size: [ 1, 3 ]
  538. use_guide: True
  539. Head:
  540. fc_decay: 0.00001
  541. - NRTRHead:
  542. nrtr_dim: 384
  543. max_text_length: 25
  544. el_PP-OCRv5_rec_infer:
  545. model_type: rec
  546. algorithm: SVTR_LCNet
  547. Transform:
  548. Backbone:
  549. name: PPLCNetV3
  550. scale: 0.95
  551. Head:
  552. name: MultiHead
  553. out_channels_list:
  554. CTCLabelDecode: 356
  555. head_list:
  556. - CTCHead:
  557. Neck:
  558. name: svtr
  559. dims: 120
  560. depth: 2
  561. hidden_dims: 120
  562. kernel_size: [ 1, 3 ]
  563. use_guide: True
  564. Head:
  565. fc_decay: 0.00001
  566. - NRTRHead:
  567. nrtr_dim: 384
  568. max_text_length: 25
  569. th_PP-OCRv5_rec_infer:
  570. model_type: rec
  571. algorithm: SVTR_LCNet
  572. Transform:
  573. Backbone:
  574. name: PPLCNetV3
  575. scale: 0.95
  576. Head:
  577. name: MultiHead
  578. out_channels_list:
  579. CTCLabelDecode: 526
  580. head_list:
  581. - CTCHead:
  582. Neck:
  583. name: svtr
  584. dims: 120
  585. depth: 2
  586. hidden_dims: 120
  587. kernel_size: [ 1, 3 ]
  588. use_guide: True
  589. Head:
  590. fc_decay: 0.00001
  591. - NRTRHead:
  592. nrtr_dim: 384
  593. max_text_length: 25
  594. en_PP-OCRv5_rec_infer:
  595. model_type: rec
  596. algorithm: SVTR_LCNet
  597. Transform:
  598. Backbone:
  599. name: PPLCNetV3
  600. scale: 0.95
  601. Head:
  602. name: MultiHead
  603. out_channels_list:
  604. CTCLabelDecode: 438
  605. head_list:
  606. - CTCHead:
  607. Neck:
  608. name: svtr
  609. dims: 120
  610. depth: 2
  611. hidden_dims: 120
  612. kernel_size: [ 1, 3 ]
  613. use_guide: True
  614. Head:
  615. fc_decay: 0.00001
  616. - NRTRHead:
  617. nrtr_dim: 384
  618. max_text_length: 25
  619. arabic_PP-OCRv5_rec_infer:
  620. model_type: rec
  621. algorithm: SVTR_LCNet
  622. Transform:
  623. Backbone:
  624. name: PPLCNetV3
  625. scale: 0.95
  626. Head:
  627. name: MultiHead
  628. out_channels_list:
  629. CTCLabelDecode: 749
  630. head_list:
  631. - CTCHead:
  632. Neck:
  633. name: svtr
  634. dims: 120
  635. depth: 2
  636. hidden_dims: 120
  637. kernel_size: [ 1, 3 ]
  638. use_guide: True
  639. Head:
  640. fc_decay: 0.00001
  641. - NRTRHead:
  642. nrtr_dim: 384
  643. max_text_length: 25
  644. cyrillic_PP-OCRv5_rec_infer:
  645. model_type: rec
  646. algorithm: SVTR_LCNet
  647. Transform:
  648. Backbone:
  649. name: PPLCNetV3
  650. scale: 0.95
  651. Head:
  652. name: MultiHead
  653. out_channels_list:
  654. CTCLabelDecode: 852
  655. head_list:
  656. - CTCHead:
  657. Neck:
  658. name: svtr
  659. dims: 120
  660. depth: 2
  661. hidden_dims: 120
  662. kernel_size: [ 1, 3 ]
  663. use_guide: True
  664. Head:
  665. fc_decay: 0.00001
  666. - NRTRHead:
  667. nrtr_dim: 384
  668. max_text_length: 25
  669. devanagari_PP-OCRv5_rec_infer:
  670. model_type: rec
  671. algorithm: SVTR_LCNet
  672. Transform:
  673. Backbone:
  674. name: PPLCNetV3
  675. scale: 0.95
  676. Head:
  677. name: MultiHead
  678. out_channels_list:
  679. CTCLabelDecode: 570
  680. head_list:
  681. - CTCHead:
  682. Neck:
  683. name: svtr
  684. dims: 120
  685. depth: 2
  686. hidden_dims: 120
  687. kernel_size: [ 1, 3 ]
  688. use_guide: True
  689. Head:
  690. fc_decay: 0.00001
  691. - NRTRHead:
  692. nrtr_dim: 384
  693. max_text_length: 25
  694. ta_PP-OCRv5_rec_infer:
  695. model_type: rec
  696. algorithm: SVTR_LCNet
  697. Transform:
  698. Backbone:
  699. name: PPLCNetV3
  700. scale: 0.95
  701. Head:
  702. name: MultiHead
  703. out_channels_list:
  704. CTCLabelDecode: 515
  705. head_list:
  706. - CTCHead:
  707. Neck:
  708. name: svtr
  709. dims: 120
  710. depth: 2
  711. hidden_dims: 120
  712. kernel_size: [ 1, 3 ]
  713. use_guide: True
  714. Head:
  715. fc_decay: 0.00001
  716. - NRTRHead:
  717. nrtr_dim: 384
  718. max_text_length: 25
  719. te_PP-OCRv5_rec_infer:
  720. model_type: rec
  721. algorithm: SVTR_LCNet
  722. Transform:
  723. Backbone:
  724. name: PPLCNetV3
  725. scale: 0.95
  726. Head:
  727. name: MultiHead
  728. out_channels_list:
  729. CTCLabelDecode: 542
  730. head_list:
  731. - CTCHead:
  732. Neck:
  733. name: svtr
  734. dims: 120
  735. depth: 2
  736. hidden_dims: 120
  737. kernel_size: [ 1, 3 ]
  738. use_guide: True
  739. Head:
  740. fc_decay: 0.00001
  741. - NRTRHead:
  742. nrtr_dim: 384
  743. max_text_length: 25