SLANet_plus.yaml 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. Global:
  2. use_gpu: True
  3. epoch_num: 10
  4. log_smooth_window: 20
  5. print_batch_step: 20
  6. save_model_dir: ./output/SLANet_ch
  7. save_epoch_step: 400
  8. # evaluation is run every 331 iterations after the 0th iteration
  9. eval_batch_step: [0, 100]
  10. cal_metric_during_train: True
  11. pretrained_model:
  12. checkpoints:
  13. save_inference_dir: ./output/SLANet_ch/infer
  14. use_visualdl: False
  15. infer_img: ppstructure/docs/table/table.jpg
  16. # for data or label process
  17. character_dict_path: ppocr/utils/dict/table_structure_dict_ch.txt
  18. character_type: en
  19. max_text_length: &max_text_length 500
  20. box_format: &box_format xyxyxyxy # 'xywh', 'xyxy', 'xyxyxyxy'
  21. infer_mode: False
  22. use_sync_bn: True
  23. save_res_path: output/infer
  24. d2s_train_image_shape: [3,488,488]
  25. Optimizer:
  26. name: Adam
  27. beta1: 0.9
  28. beta2: 0.999
  29. clip_norm: 5.0
  30. lr:
  31. learning_rate: 0.001
  32. regularizer:
  33. name: 'L2'
  34. factor: 0.00000
  35. Architecture:
  36. model_type: table
  37. algorithm: SLANet
  38. Backbone:
  39. name: PPLCNet
  40. scale: 1.0
  41. pretrained: True
  42. use_ssld: True
  43. Neck:
  44. name: CSPPAN
  45. out_channels: 96
  46. Head:
  47. name: SLAHead
  48. hidden_size: 256
  49. max_text_length: *max_text_length
  50. loc_reg_num: &loc_reg_num 8
  51. Loss:
  52. name: SLALoss
  53. structure_weight: 1.0
  54. loc_weight: 2.0
  55. loc_loss: smooth_l1
  56. PostProcess:
  57. name: TableLabelDecode
  58. merge_no_span_structure: &merge_no_span_structure True
  59. Metric:
  60. name: TableMetric
  61. main_indicator: acc
  62. compute_bbox_metric: False
  63. loc_reg_num: *loc_reg_num
  64. box_format: *box_format
  65. del_thead_tbody: True
  66. Train:
  67. dataset:
  68. name: PubTabTableRecDataset
  69. data_dir: train_data/table/train/
  70. label_file_list: [train_data/table/train.txt]
  71. transforms:
  72. - DecodeImage:
  73. img_mode: BGR
  74. channel_first: False
  75. - TableLabelEncode:
  76. learn_empty_box: False
  77. merge_no_span_structure: *merge_no_span_structure
  78. replace_empty_cell_token: False
  79. loc_reg_num: *loc_reg_num
  80. max_text_length: *max_text_length
  81. - TableBoxEncode:
  82. in_box_format: *box_format
  83. out_box_format: *box_format
  84. - ResizeTableImage:
  85. max_len: 488
  86. resize_bboxes: True
  87. - NormalizeImage:
  88. scale: 1./255.
  89. mean: [0.485, 0.456, 0.406]
  90. std: [0.229, 0.224, 0.225]
  91. order: 'hwc'
  92. - PaddingTableImage:
  93. size: [488, 488]
  94. - ToCHWImage:
  95. - KeepKeys:
  96. keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'length', 'shape' ]
  97. loader:
  98. shuffle: True
  99. batch_size_per_card: 48
  100. drop_last: True
  101. num_workers: 1
  102. Eval:
  103. dataset:
  104. name: PubTabTableRecDataset
  105. data_dir: train_data/table/val/
  106. label_file_list: [train_data/table/val.txt]
  107. transforms:
  108. - DecodeImage:
  109. img_mode: BGR
  110. channel_first: False
  111. - TableLabelEncode:
  112. learn_empty_box: False
  113. merge_no_span_structure: *merge_no_span_structure
  114. replace_empty_cell_token: False
  115. loc_reg_num: *loc_reg_num
  116. max_text_length: *max_text_length
  117. - TableBoxEncode:
  118. in_box_format: *box_format
  119. out_box_format: *box_format
  120. - ResizeTableImage:
  121. max_len: 488
  122. resize_bboxes: True
  123. - NormalizeImage:
  124. scale: 1./255.
  125. mean: [0.485, 0.456, 0.406]
  126. std: [0.229, 0.224, 0.225]
  127. order: 'hwc'
  128. - PaddingTableImage:
  129. size: [488, 488]
  130. - ToCHWImage:
  131. - KeepKeys:
  132. keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'length', 'shape' ]
  133. loader:
  134. shuffle: False
  135. drop_last: False
  136. batch_size_per_card: 48
  137. num_workers: 1