zhengchun
/
PaddleX


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
							epoch: 12
use_gpu: true
use_xpu: false
use_mlu: false
use_npu: false
log_iter: 20
save_dir: output
target_metrics: mask
snapshot_epoch: 1
print_flops: false
print_params: false

# Dataset
metric: COCO
num_classes: 80
worker_num: 2

TrainDataset:
  name: COCODataSet
  image_dir: train2017
  anno_path: annotations/instances_train2017.json
  dataset_dir: dataset/coco
  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']

EvalDataset:
  name: COCODataSet
  image_dir: val2017
  anno_path: annotations/instances_val2017.json
  dataset_dir: dataset/coco

TestDataset:
  name: ImageFolder
  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'


TrainReader:
  sample_transforms:
  - Decode: {}
  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
  - RandomFlip: {prob: 0.5}
  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  - Permute: {}
  batch_transforms:
  - PadBatch: {pad_to_stride: 32}
  batch_size: 1
  shuffle: true
  drop_last: true
  collate_batch: false


EvalReader:
  sample_transforms:
  - Decode: {}
  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  - Permute: {}
  batch_transforms:
  - PadBatch: {pad_to_stride: 32}
  batch_size: 1
  shuffle: false
  drop_last: false


TestReader:
  sample_transforms:
  - Decode: {}
  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  - Permute: {}
  batch_transforms:
  - PadBatch: {pad_to_stride: 32}
  batch_size: 1
  shuffle: false
  drop_last: false

LearningRate:
  base_lr: 0.01
  schedulers:
  - !PiecewiseDecay
    gamma: 0.1
    milestones: [8, 11]
  - !LinearWarmup
    start_factor: 0.001
    steps: 1000

OptimizerBuilder:
  clip_grad_by_norm: 35.0 # avoid gradient explosion in NPU
  optimizer:
    momentum: 0.9
    type: Momentum
  regularizer:
    factor: 0.0001
    type: L2

# Model
architecture: CascadeRCNN
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams


CascadeRCNN:
  backbone: ResNet
  neck: FPN
  rpn_head: RPNHead
  bbox_head: CascadeHead
  mask_head: MaskHead
  # post process
  bbox_post_process: BBoxPostProcess
  mask_post_process: MaskPostProcess

ResNet:
  # index 0 stands for res2
  depth: 50
  norm_type: bn
  freeze_at: 0
  return_idx: [0,1,2,3]
  num_stages: 4

FPN:
  out_channel: 256

RPNHead:
  anchor_generator:
    aspect_ratios: [0.5, 1.0, 2.0]
    anchor_sizes: [[32], [64], [128], [256], [512]]
    strides: [4, 8, 16, 32, 64]
  rpn_target_assign:
    batch_size_per_im: 256
    fg_fraction: 0.5
    negative_overlap: 0.3
    positive_overlap: 0.7
    use_random: True
  train_proposal:
    min_size: 0.0
    nms_thresh: 0.7
    pre_nms_top_n: 2000
    post_nms_top_n: 2000
    topk_after_collect: True
  test_proposal:
    min_size: 0.0
    nms_thresh: 0.7
    pre_nms_top_n: 1000
    post_nms_top_n: 1000


CascadeHead:
  head: CascadeTwoFCHead
  roi_extractor:
    resolution: 7
    sampling_ratio: 0
    aligned: True
  bbox_assigner: BBoxAssigner

BBoxAssigner:
  batch_size_per_im: 512
  bg_thresh: 0.5
  fg_thresh: 0.5
  fg_fraction: 0.25
  cascade_iou: [0.5, 0.6, 0.7]
  use_random: True

CascadeTwoFCHead:
  out_channel: 1024

BBoxPostProcess:
  decode:
    name: RCNNBox
    prior_box_var: [30.0, 30.0, 15.0, 15.0]
  nms:
    name: MultiClassNMS
    keep_top_k: 100
    score_threshold: 0.05
    nms_threshold: 0.5


MaskHead:
  head: MaskFeat
  roi_extractor:
    resolution: 14
    sampling_ratio: 0
    aligned: True
  mask_assigner: MaskAssigner
  share_bbox_feat: False

MaskFeat:
  num_convs: 4
  out_channel: 256

MaskAssigner:
  mask_resolution: 28

MaskPostProcess:
  binary_thresh: 0.5


# Exporting the model
export:
  post_process: True  # Whether post-processing is included in the network when export model.
  nms: True           # Whether NMS is included in the network when export model.
  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
  fuse_conv_bn: False