|
|
@@ -0,0 +1,141 @@
|
|
|
+Global:
|
|
|
+ use_gpu: True
|
|
|
+ epoch_num: 10
|
|
|
+ log_smooth_window: 20
|
|
|
+ print_batch_step: 20
|
|
|
+ save_model_dir: ./output/SLANet_ch
|
|
|
+ save_epoch_step: 400
|
|
|
+ # evaluation is run every 331 iterations after the 0th iteration
|
|
|
+ eval_batch_step: [0, 100]
|
|
|
+ cal_metric_during_train: True
|
|
|
+ pretrained_model:
|
|
|
+ checkpoints:
|
|
|
+ save_inference_dir: ./output/SLANet_ch/infer
|
|
|
+ use_visualdl: False
|
|
|
+ infer_img: ppstructure/docs/table/table.jpg
|
|
|
+ # for data or label process
|
|
|
+ character_dict_path: ppocr/utils/dict/table_structure_dict_ch.txt
|
|
|
+ character_type: en
|
|
|
+ max_text_length: &max_text_length 500
|
|
|
+ box_format: &box_format xyxyxyxy # 'xywh', 'xyxy', 'xyxyxyxy'
|
|
|
+ infer_mode: False
|
|
|
+ use_sync_bn: True
|
|
|
+ save_res_path: output/infer
|
|
|
+
|
|
|
+Optimizer:
|
|
|
+ name: Adam
|
|
|
+ beta1: 0.9
|
|
|
+ beta2: 0.999
|
|
|
+ clip_norm: 5.0
|
|
|
+ lr:
|
|
|
+ learning_rate: 0.001
|
|
|
+ regularizer:
|
|
|
+ name: 'L2'
|
|
|
+ factor: 0.00000
|
|
|
+
|
|
|
+Architecture:
|
|
|
+ model_type: table
|
|
|
+ algorithm: SLANet
|
|
|
+ Backbone:
|
|
|
+ name: PPLCNet
|
|
|
+ scale: 1.0
|
|
|
+ pretrained: True
|
|
|
+ use_ssld: True
|
|
|
+ Neck:
|
|
|
+ name: CSPPAN
|
|
|
+ out_channels: 96
|
|
|
+ Head:
|
|
|
+ name: SLAHead
|
|
|
+ hidden_size: 256
|
|
|
+ max_text_length: *max_text_length
|
|
|
+ loc_reg_num: &loc_reg_num 8
|
|
|
+
|
|
|
+Loss:
|
|
|
+ name: SLALoss
|
|
|
+ structure_weight: 1.0
|
|
|
+ loc_weight: 2.0
|
|
|
+ loc_loss: smooth_l1
|
|
|
+
|
|
|
+PostProcess:
|
|
|
+ name: TableLabelDecode
|
|
|
+ merge_no_span_structure: &merge_no_span_structure True
|
|
|
+
|
|
|
+Metric:
|
|
|
+ name: TableMetric
|
|
|
+ main_indicator: acc
|
|
|
+ compute_bbox_metric: False
|
|
|
+ loc_reg_num: *loc_reg_num
|
|
|
+ box_format: *box_format
|
|
|
+ del_thead_tbody: True
|
|
|
+
|
|
|
+Train:
|
|
|
+ dataset:
|
|
|
+ name: PubTabTableRecDataset
|
|
|
+ data_dir: train_data/table/train/
|
|
|
+ label_file_list: [train_data/table/train.txt]
|
|
|
+ transforms:
|
|
|
+ - DecodeImage:
|
|
|
+ img_mode: BGR
|
|
|
+ channel_first: False
|
|
|
+ - TableLabelEncode:
|
|
|
+ learn_empty_box: False
|
|
|
+ merge_no_span_structure: *merge_no_span_structure
|
|
|
+ replace_empty_cell_token: False
|
|
|
+ loc_reg_num: *loc_reg_num
|
|
|
+ max_text_length: *max_text_length
|
|
|
+ - TableBoxEncode:
|
|
|
+ in_box_format: *box_format
|
|
|
+ out_box_format: *box_format
|
|
|
+ - ResizeTableImage:
|
|
|
+ max_len: 488
|
|
|
+ - NormalizeImage:
|
|
|
+ scale: 1./255.
|
|
|
+ mean: [0.485, 0.456, 0.406]
|
|
|
+ std: [0.229, 0.224, 0.225]
|
|
|
+ order: 'hwc'
|
|
|
+ - PaddingTableImage:
|
|
|
+ size: [488, 488]
|
|
|
+ - ToCHWImage:
|
|
|
+ - KeepKeys:
|
|
|
+ keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
|
|
|
+ loader:
|
|
|
+ shuffle: True
|
|
|
+ batch_size_per_card: 48
|
|
|
+ drop_last: True
|
|
|
+ num_workers: 1
|
|
|
+
|
|
|
+Eval:
|
|
|
+ dataset:
|
|
|
+ name: PubTabTableRecDataset
|
|
|
+ data_dir: train_data/table/val/
|
|
|
+ label_file_list: [train_data/table/val.txt]
|
|
|
+ transforms:
|
|
|
+ - DecodeImage:
|
|
|
+ img_mode: BGR
|
|
|
+ channel_first: False
|
|
|
+ - TableLabelEncode:
|
|
|
+ learn_empty_box: False
|
|
|
+ merge_no_span_structure: *merge_no_span_structure
|
|
|
+ replace_empty_cell_token: False
|
|
|
+ loc_reg_num: *loc_reg_num
|
|
|
+ max_text_length: *max_text_length
|
|
|
+ - TableBoxEncode:
|
|
|
+ in_box_format: *box_format
|
|
|
+ out_box_format: *box_format
|
|
|
+ - ResizeTableImage:
|
|
|
+ max_len: 488
|
|
|
+ - NormalizeImage:
|
|
|
+ scale: 1./255.
|
|
|
+ mean: [0.485, 0.456, 0.406]
|
|
|
+ std: [0.229, 0.224, 0.225]
|
|
|
+ order: 'hwc'
|
|
|
+ - PaddingTableImage:
|
|
|
+ size: [488, 488]
|
|
|
+ - ToCHWImage:
|
|
|
+ - KeepKeys:
|
|
|
+ keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
|
|
|
+ loader:
|
|
|
+ shuffle: False
|
|
|
+ drop_last: False
|
|
|
+ batch_size_per_card: 48
|
|
|
+ num_workers: 1
|