layoutlmv3_base_inference.yaml 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. AUG:
  2. DETR: true
  3. CACHE_DIR: /mnt/localdata/users/yupanhuang/cache/huggingface
  4. CUDNN_BENCHMARK: false
  5. DATALOADER:
  6. ASPECT_RATIO_GROUPING: true
  7. FILTER_EMPTY_ANNOTATIONS: false
  8. NUM_WORKERS: 4
  9. REPEAT_THRESHOLD: 0.0
  10. SAMPLER_TRAIN: TrainingSampler
  11. DATASETS:
  12. PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
  13. PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
  14. PROPOSAL_FILES_TEST: []
  15. PROPOSAL_FILES_TRAIN: []
  16. TEST:
  17. - scihub_train
  18. TRAIN:
  19. - scihub_train
  20. GLOBAL:
  21. HACK: 1.0
  22. ICDAR_DATA_DIR_TEST: ''
  23. ICDAR_DATA_DIR_TRAIN: ''
  24. INPUT:
  25. CROP:
  26. ENABLED: true
  27. SIZE:
  28. - 384
  29. - 600
  30. TYPE: absolute_range
  31. FORMAT: RGB
  32. MASK_FORMAT: polygon
  33. MAX_SIZE_TEST: 1333
  34. MAX_SIZE_TRAIN: 1333
  35. MIN_SIZE_TEST: 800
  36. MIN_SIZE_TRAIN:
  37. - 480
  38. - 512
  39. - 544
  40. - 576
  41. - 608
  42. - 640
  43. - 672
  44. - 704
  45. - 736
  46. - 768
  47. - 800
  48. MIN_SIZE_TRAIN_SAMPLING: choice
  49. RANDOM_FLIP: horizontal
  50. MODEL:
  51. ANCHOR_GENERATOR:
  52. ANGLES:
  53. - - -90
  54. - 0
  55. - 90
  56. ASPECT_RATIOS:
  57. - - 0.5
  58. - 1.0
  59. - 2.0
  60. NAME: DefaultAnchorGenerator
  61. OFFSET: 0.0
  62. SIZES:
  63. - - 32
  64. - - 64
  65. - - 128
  66. - - 256
  67. - - 512
  68. BACKBONE:
  69. FREEZE_AT: 2
  70. NAME: build_vit_fpn_backbone
  71. CONFIG_PATH: ''
  72. DEVICE: cuda
  73. FPN:
  74. FUSE_TYPE: sum
  75. IN_FEATURES:
  76. - layer3
  77. - layer5
  78. - layer7
  79. - layer11
  80. NORM: ''
  81. OUT_CHANNELS: 256
  82. IMAGE_ONLY: true
  83. KEYPOINT_ON: false
  84. LOAD_PROPOSALS: false
  85. MASK_ON: true
  86. META_ARCHITECTURE: VLGeneralizedRCNN
  87. PANOPTIC_FPN:
  88. COMBINE:
  89. ENABLED: true
  90. INSTANCES_CONFIDENCE_THRESH: 0.5
  91. OVERLAP_THRESH: 0.5
  92. STUFF_AREA_LIMIT: 4096
  93. INSTANCE_LOSS_WEIGHT: 1.0
  94. PIXEL_MEAN:
  95. - 127.5
  96. - 127.5
  97. - 127.5
  98. PIXEL_STD:
  99. - 127.5
  100. - 127.5
  101. - 127.5
  102. PROPOSAL_GENERATOR:
  103. MIN_SIZE: 0
  104. NAME: RPN
  105. RESNETS:
  106. DEFORM_MODULATED: false
  107. DEFORM_NUM_GROUPS: 1
  108. DEFORM_ON_PER_STAGE:
  109. - false
  110. - false
  111. - false
  112. - false
  113. DEPTH: 50
  114. NORM: FrozenBN
  115. NUM_GROUPS: 1
  116. OUT_FEATURES:
  117. - res4
  118. RES2_OUT_CHANNELS: 256
  119. RES5_DILATION: 1
  120. STEM_OUT_CHANNELS: 64
  121. STRIDE_IN_1X1: true
  122. WIDTH_PER_GROUP: 64
  123. RETINANET:
  124. BBOX_REG_LOSS_TYPE: smooth_l1
  125. BBOX_REG_WEIGHTS:
  126. - 1.0
  127. - 1.0
  128. - 1.0
  129. - 1.0
  130. FOCAL_LOSS_ALPHA: 0.25
  131. FOCAL_LOSS_GAMMA: 2.0
  132. IN_FEATURES:
  133. - p3
  134. - p4
  135. - p5
  136. - p6
  137. - p7
  138. IOU_LABELS:
  139. - 0
  140. - -1
  141. - 1
  142. IOU_THRESHOLDS:
  143. - 0.4
  144. - 0.5
  145. NMS_THRESH_TEST: 0.5
  146. NORM: ''
  147. NUM_CLASSES: 10
  148. NUM_CONVS: 4
  149. PRIOR_PROB: 0.01
  150. SCORE_THRESH_TEST: 0.05
  151. SMOOTH_L1_LOSS_BETA: 0.1
  152. TOPK_CANDIDATES_TEST: 1000
  153. ROI_BOX_CASCADE_HEAD:
  154. BBOX_REG_WEIGHTS:
  155. - - 10.0
  156. - 10.0
  157. - 5.0
  158. - 5.0
  159. - - 20.0
  160. - 20.0
  161. - 10.0
  162. - 10.0
  163. - - 30.0
  164. - 30.0
  165. - 15.0
  166. - 15.0
  167. IOUS:
  168. - 0.5
  169. - 0.6
  170. - 0.7
  171. ROI_BOX_HEAD:
  172. BBOX_REG_LOSS_TYPE: smooth_l1
  173. BBOX_REG_LOSS_WEIGHT: 1.0
  174. BBOX_REG_WEIGHTS:
  175. - 10.0
  176. - 10.0
  177. - 5.0
  178. - 5.0
  179. CLS_AGNOSTIC_BBOX_REG: true
  180. CONV_DIM: 256
  181. FC_DIM: 1024
  182. NAME: FastRCNNConvFCHead
  183. NORM: ''
  184. NUM_CONV: 0
  185. NUM_FC: 2
  186. POOLER_RESOLUTION: 7
  187. POOLER_SAMPLING_RATIO: 0
  188. POOLER_TYPE: ROIAlignV2
  189. SMOOTH_L1_BETA: 0.0
  190. TRAIN_ON_PRED_BOXES: false
  191. ROI_HEADS:
  192. BATCH_SIZE_PER_IMAGE: 512
  193. IN_FEATURES:
  194. - p2
  195. - p3
  196. - p4
  197. - p5
  198. IOU_LABELS:
  199. - 0
  200. - 1
  201. IOU_THRESHOLDS:
  202. - 0.5
  203. NAME: CascadeROIHeads
  204. NMS_THRESH_TEST: 0.5
  205. NUM_CLASSES: 10
  206. POSITIVE_FRACTION: 0.25
  207. PROPOSAL_APPEND_GT: true
  208. SCORE_THRESH_TEST: 0.05
  209. ROI_KEYPOINT_HEAD:
  210. CONV_DIMS:
  211. - 512
  212. - 512
  213. - 512
  214. - 512
  215. - 512
  216. - 512
  217. - 512
  218. - 512
  219. LOSS_WEIGHT: 1.0
  220. MIN_KEYPOINTS_PER_IMAGE: 1
  221. NAME: KRCNNConvDeconvUpsampleHead
  222. NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
  223. NUM_KEYPOINTS: 17
  224. POOLER_RESOLUTION: 14
  225. POOLER_SAMPLING_RATIO: 0
  226. POOLER_TYPE: ROIAlignV2
  227. ROI_MASK_HEAD:
  228. CLS_AGNOSTIC_MASK: false
  229. CONV_DIM: 256
  230. NAME: MaskRCNNConvUpsampleHead
  231. NORM: ''
  232. NUM_CONV: 4
  233. POOLER_RESOLUTION: 14
  234. POOLER_SAMPLING_RATIO: 0
  235. POOLER_TYPE: ROIAlignV2
  236. RPN:
  237. BATCH_SIZE_PER_IMAGE: 256
  238. BBOX_REG_LOSS_TYPE: smooth_l1
  239. BBOX_REG_LOSS_WEIGHT: 1.0
  240. BBOX_REG_WEIGHTS:
  241. - 1.0
  242. - 1.0
  243. - 1.0
  244. - 1.0
  245. BOUNDARY_THRESH: -1
  246. CONV_DIMS:
  247. - -1
  248. HEAD_NAME: StandardRPNHead
  249. IN_FEATURES:
  250. - p2
  251. - p3
  252. - p4
  253. - p5
  254. - p6
  255. IOU_LABELS:
  256. - 0
  257. - -1
  258. - 1
  259. IOU_THRESHOLDS:
  260. - 0.3
  261. - 0.7
  262. LOSS_WEIGHT: 1.0
  263. NMS_THRESH: 0.7
  264. POSITIVE_FRACTION: 0.5
  265. POST_NMS_TOPK_TEST: 1000
  266. POST_NMS_TOPK_TRAIN: 2000
  267. PRE_NMS_TOPK_TEST: 1000
  268. PRE_NMS_TOPK_TRAIN: 2000
  269. SMOOTH_L1_BETA: 0.0
  270. SEM_SEG_HEAD:
  271. COMMON_STRIDE: 4
  272. CONVS_DIM: 128
  273. IGNORE_VALUE: 255
  274. IN_FEATURES:
  275. - p2
  276. - p3
  277. - p4
  278. - p5
  279. LOSS_WEIGHT: 1.0
  280. NAME: SemSegFPNHead
  281. NORM: GN
  282. NUM_CLASSES: 10
  283. VIT:
  284. DROP_PATH: 0.1
  285. IMG_SIZE:
  286. - 224
  287. - 224
  288. NAME: layoutlmv3_base
  289. OUT_FEATURES:
  290. - layer3
  291. - layer5
  292. - layer7
  293. - layer11
  294. POS_TYPE: abs
  295. WEIGHTS:
  296. OUTPUT_DIR:
  297. SCIHUB_DATA_DIR_TRAIN: /mnt/petrelfs/share_data/zhaozhiyuan/publaynet/layout_scihub/train
  298. SEED: 42
  299. SOLVER:
  300. AMP:
  301. ENABLED: true
  302. BACKBONE_MULTIPLIER: 1.0
  303. BASE_LR: 0.0002
  304. BIAS_LR_FACTOR: 1.0
  305. CHECKPOINT_PERIOD: 2000
  306. CLIP_GRADIENTS:
  307. CLIP_TYPE: full_model
  308. CLIP_VALUE: 1.0
  309. ENABLED: true
  310. NORM_TYPE: 2.0
  311. GAMMA: 0.1
  312. GRADIENT_ACCUMULATION_STEPS: 1
  313. IMS_PER_BATCH: 32
  314. LR_SCHEDULER_NAME: WarmupCosineLR
  315. MAX_ITER: 20000
  316. MOMENTUM: 0.9
  317. NESTEROV: false
  318. OPTIMIZER: ADAMW
  319. REFERENCE_WORLD_SIZE: 0
  320. STEPS:
  321. - 10000
  322. WARMUP_FACTOR: 0.01
  323. WARMUP_ITERS: 333
  324. WARMUP_METHOD: linear
  325. WEIGHT_DECAY: 0.05
  326. WEIGHT_DECAY_BIAS: null
  327. WEIGHT_DECAY_NORM: 0.0
  328. TEST:
  329. AUG:
  330. ENABLED: false
  331. FLIP: true
  332. MAX_SIZE: 4000
  333. MIN_SIZES:
  334. - 400
  335. - 500
  336. - 600
  337. - 700
  338. - 800
  339. - 900
  340. - 1000
  341. - 1100
  342. - 1200
  343. DETECTIONS_PER_IMAGE: 100
  344. EVAL_PERIOD: 1000
  345. EXPECTED_RESULTS: []
  346. KEYPOINT_OKS_SIGMAS: []
  347. PRECISE_BN:
  348. ENABLED: false
  349. NUM_ITER: 200
  350. VERSION: 2
  351. VIS_PERIOD: 0