batch_size: 4 iters: 160000 train_dataset: type: Dataset dataset_root: data/Cityscapes/ train_path: data/Cityscapes/train.txt num_classes: 150 transforms: - type: ResizeByShort short_size: [256, 307, 358, 409, 460, 512, 563, 614, 665, 716, 768, 819, 870, 921, 972, 1024] max_size: 2048 - type: RandomPaddingCrop crop_size: [512, 512] - type: RandomDistort brightness_range: 0.125 brightness_prob: 1.0 contrast_range: 0.5 contrast_prob: 1.0 saturation_range: 0.5 saturation_prob: 1.0 hue_range: 18 hue_prob: 1.0 - type: RandomHorizontalFlip - type: GenerateInstanceTargets num_classes: 150 ignore_index: 255 - type: Normalize mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] mode: train val_dataset: type: Dataset dataset_root: datasets/Cityscapes val_path: datasets/Cityscapes/val.txt num_classes: 150 transforms: - type: Resize target_size: [512, 512] keep_ratio: False interp: LINEAR - type: Normalize mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] mode: val model: type: MaskFormer num_classes: 150 backbone: type: SwinTransformer_tiny_patch4_window7_224_maskformer pretrained: https://bj.bcebos.com/paddleseg/paddleseg/dygraph/ade20k/maskformer_ade20k_swin_tiny/pretrain/model.pdparams optimizer: type: AdamW weight_decay: 0.01 custom_cfg: - name: backbone lr_mult: 1.0 - name: norm weight_decay_mult: 0.0 - name: relative_position_bias_table weight_decay_mult: 0.0 grad_clip_cfg: name: ClipGradByNorm clip_norm: 0.01 lr_scheduler: type: PolynomialDecay warmup_iters: 1500 warmup_start_lr: 6.0e-11 learning_rate: 6.0e-05 end_lr: 0 power: 0.9 loss: types: - type: MaskFormerLoss num_classes: 150 eos_coef: 0.1 coef: [1]