瀏覽代碼

add Mask_RCNN, SOLOv2, Mask_RT-DETR, PP-YOLOE_seg models (#1958)

* add Mask_RCNN, SOLOv2, Mask_RT-DETR models

* add Cascade-MaskRCNN

* add PP-YOLOE_seg-S
zhangyubo0722 1 年之前
父節點
當前提交
5a47020223
共有 36 個文件被更改,包括 3466 次插入21 次删除
  1. 2 1
      README.md
  2. 40 0
      paddlex/configs/instance_segmentation/Cascade-MaskRCNN-ResNet50-FPN.yaml
  3. 40 0
      paddlex/configs/instance_segmentation/Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN.yaml
  4. 40 0
      paddlex/configs/instance_segmentation/Mask-RT-DETR-M.yaml
  5. 40 0
      paddlex/configs/instance_segmentation/Mask-RT-DETR-S.yaml
  6. 40 0
      paddlex/configs/instance_segmentation/Mask-RT-DETR-X.yaml
  7. 39 0
      paddlex/configs/instance_segmentation/MaskRCNN-ResNeXt101-vd-FPN.yaml
  8. 40 0
      paddlex/configs/instance_segmentation/MaskRCNN-ResNet101-FPN.yaml
  9. 40 0
      paddlex/configs/instance_segmentation/MaskRCNN-ResNet101-vd-FPN.yaml
  10. 40 0
      paddlex/configs/instance_segmentation/MaskRCNN-ResNet50-FPN.yaml
  11. 40 0
      paddlex/configs/instance_segmentation/MaskRCNN-ResNet50-vd-FPN.yaml
  12. 40 0
      paddlex/configs/instance_segmentation/MaskRCNN-ResNet50-vd-SSLDv2-FPN.yaml
  13. 40 0
      paddlex/configs/instance_segmentation/MaskRCNN-ResNet50.yaml
  14. 40 0
      paddlex/configs/instance_segmentation/PP-YOLOE_seg-S.yaml
  15. 14 0
      paddlex/modules/base/predictor/utils/official_models.py
  16. 14 0
      paddlex/modules/instance_segmentation/model_list.py
  17. 3 0
      paddlex/modules/instance_segmentation/predictor/keys.py
  18. 31 14
      paddlex/modules/instance_segmentation/predictor/predictor.py
  19. 3 0
      paddlex/modules/object_detection/predictor/keys.py
  20. 73 4
      paddlex/modules/object_detection/predictor/transforms.py
  21. 1 2
      paddlex/repo_apis/PaddleDetection_api/config_helper.py
  22. 201 0
      paddlex/repo_apis/PaddleDetection_api/configs/Cascade-MaskRCNN-ResNet50-FPN.yaml
  23. 201 0
      paddlex/repo_apis/PaddleDetection_api/configs/Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN.yaml
  24. 177 0
      paddlex/repo_apis/PaddleDetection_api/configs/Mask-RT-DETR-M.yaml
  25. 177 0
      paddlex/repo_apis/PaddleDetection_api/configs/Mask-RT-DETR-S.yaml
  26. 177 0
      paddlex/repo_apis/PaddleDetection_api/configs/Mask-RT-DETR-X.yaml
  27. 197 0
      paddlex/repo_apis/PaddleDetection_api/configs/MaskRCNN-ResNeXt101-vd-FPN.yaml
  28. 193 0
      paddlex/repo_apis/PaddleDetection_api/configs/MaskRCNN-ResNet101-FPN.yaml
  29. 194 0
      paddlex/repo_apis/PaddleDetection_api/configs/MaskRCNN-ResNet101-vd-FPN.yaml
  30. 193 0
      paddlex/repo_apis/PaddleDetection_api/configs/MaskRCNN-ResNet50-FPN.yaml
  31. 194 0
      paddlex/repo_apis/PaddleDetection_api/configs/MaskRCNN-ResNet50-vd-FPN.yaml
  32. 197 0
      paddlex/repo_apis/PaddleDetection_api/configs/MaskRCNN-ResNet50-vd-SSLDv2-FPN.yaml
  33. 189 0
      paddlex/repo_apis/PaddleDetection_api/configs/MaskRCNN-ResNet50.yaml
  34. 156 0
      paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE_seg-S.yaml
  35. 148 0
      paddlex/repo_apis/PaddleDetection_api/configs/SOLOv2.yaml
  36. 212 0
      paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py

+ 2 - 1
README.md

@@ -86,7 +86,8 @@ PaddleX 3.0 覆盖了 16 条产业级模型产线,其中 9 条基础产线可
     <td>基础产线</td>
     <td>通用实例分割</td>
     <td>实例分割</td>
-    <td>Mask-RT-DETR-L<br/>Mask-RT-DETR-H</td>
+    <td>Mask-RT-DETR-S<br/>Mask-RT-DETR-M<details>
+    <summary><b>more</b></summary><br/>Mask-RT-DETR-L<br/>Mask-RT-DETR-X<br/>Mask-RT-DETR-H<br/>SOLOv2<br/>MaskRCNN-ResNet50<br/>MaskRCNN-ResNet50-FPN<br/>MaskRCNN-ResNet50-vd-FPN<br/>MaskRCNN-ResNet50-vd-SSLDv2-FPN<br/>MaskRCNN-ResNet101-FPN<br/>MaskRCNN-ResNet101-vd-FPN<br/>MaskRCNN-ResNeXt101-vd-FPN</td>Cascade-MaskRCNN-ResNet50-FPN</td>Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN</td>PP-YOLOE_seg-S</td>
   </tr>
   <tr>
     <td rowspan="3">基础产线</td>

+ 40 - 0
paddlex/configs/instance_segmentation/Cascade-MaskRCNN-ResNet50-FPN.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: Cascade-MaskRCNN-ResNet50-FPN
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/instance_seg/instance_seg_coco_examples/"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 2
+  epochs_iters: 40
+  batch_size: 1
+  learning_rate: 0.01
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_fpn_1x_coco.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_instance_segmentation_004.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 40 - 0
paddlex/configs/instance_segmentation/Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/instance_seg/instance_seg_coco_examples/"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 2
+  epochs_iters: 40
+  batch_size: 1
+  learning_rate: 0.01
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_instance_segmentation_004.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 40 - 0
paddlex/configs/instance_segmentation/Mask-RT-DETR-M.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: Mask-RT-DETR-M
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/instance_seg/instance_seg_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 2
+  epochs_iters: 40
+  batch_size: 2
+  learning_rate: 0.00005
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/mask_rtdetr_hgnetv2_m_6x_coco.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_instance_segmentation_004.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 40 - 0
paddlex/configs/instance_segmentation/Mask-RT-DETR-S.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: Mask-RT-DETR-S
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/instance_seg/instance_seg_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 2
+  epochs_iters: 40
+  batch_size: 2
+  learning_rate: 0.00005
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/mask_rtdetr_hgnetv2_s_6x_coco.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_instance_segmentation_004.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 40 - 0
paddlex/configs/instance_segmentation/Mask-RT-DETR-X.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: Mask-RT-DETR-X
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/instance_seg/instance_seg_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 2
+  epochs_iters: 40
+  batch_size: 2
+  learning_rate: 0.00005
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/mask_rtdetr_hgnetv2_x_6x_coco.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_instance_segmentation_004.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 39 - 0
paddlex/configs/instance_segmentation/MaskRCNN-ResNeXt101-vd-FPN.yaml

@@ -0,0 +1,39 @@
+Global:
+  model: MaskRCNN-ResNeXt101-vd-FPN
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/instance_seg/instance_seg_coco_examples/"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 2
+  epochs_iters: 40
+  batch_size: 2
+  learning_rate: 0.01
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/mask_rcnn_x101_vd_64x4d_fpn_1x_coco.pdparams
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_instance_segmentation_004.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 40 - 0
paddlex/configs/instance_segmentation/MaskRCNN-ResNet101-FPN.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: MaskRCNN-ResNet101-FPN
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/instance_seg/instance_seg_coco_examples/"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 2
+  epochs_iters: 40
+  batch_size: 2
+  learning_rate: 0.01
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/mask_rcnn_r101_fpn_1x_coco.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_instance_segmentation_004.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 40 - 0
paddlex/configs/instance_segmentation/MaskRCNN-ResNet101-vd-FPN.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: MaskRCNN-ResNet101-vd-FPN
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/instance_seg/instance_seg_coco_examples/"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 2
+  epochs_iters: 40
+  batch_size: 2
+  learning_rate: 0.01
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/mask_rcnn_r101_vd_fpn_1x_coco.pdparams
+  
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_instance_segmentation_004.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 40 - 0
paddlex/configs/instance_segmentation/MaskRCNN-ResNet50-FPN.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: MaskRCNN-ResNet50-FPN
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/instance_seg/instance_seg_coco_examples/"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 2
+  epochs_iters: 40
+  batch_size: 2
+  learning_rate: 0.01
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_1x_coco.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_instance_segmentation_004.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 40 - 0
paddlex/configs/instance_segmentation/MaskRCNN-ResNet50-vd-FPN.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: MaskRCNN-ResNet50-vd-FPN
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/instance_seg/instance_seg_coco_examples/"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 2
+  epochs_iters: 40
+  batch_size: 2
+  learning_rate: 0.01
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_vd_fpn_1x_coco.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_instance_segmentation_004.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 40 - 0
paddlex/configs/instance_segmentation/MaskRCNN-ResNet50-vd-SSLDv2-FPN.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: MaskRCNN-ResNet50-vd-SSLDv2-FPN
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/instance_seg/instance_seg_coco_examples/"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 2
+  epochs_iters: 40
+  batch_size: 2
+  learning_rate: 0.01
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_instance_segmentation_004.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 40 - 0
paddlex/configs/instance_segmentation/MaskRCNN-ResNet50.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: MaskRCNN-ResNet50
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/instance_seg/instance_seg_coco_examples/"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 2
+  epochs_iters: 40
+  batch_size: 2
+  learning_rate: 0.01
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_1x_coco.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_instance_segmentation_004.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 40 - 0
paddlex/configs/instance_segmentation/PP-YOLOE_seg-S.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: PP-YOLOE_seg-S
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/instance_seg/instance_seg_coco_examples/"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 2
+  epochs_iters: 40
+  batch_size: 8
+  learning_rate: 0.001
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/ppyoloe_seg_s_80e_coco.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_instance_segmentation_004.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 14 - 0
paddlex/modules/base/predictor/utils/official_models.py

@@ -158,6 +158,20 @@ Deeplabv3_Plus-R101_infer.tar",
     "SeaFormer_large": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/SeaFormer_large_infer.tar",
     "Mask-RT-DETR-H": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/Mask-RT-DETR-H_infer.tar",
     "Mask-RT-DETR-L": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/Mask-RT-DETR-L_infer.tar",
+    "Mask-RT-DETR-S": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/Mask-RT-DETR-S_infer.tar",
+    "Mask-RT-DETR-M": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/Mask-RT-DETR-M_infer.tar",
+    "Mask-RT-DETR-X": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/Mask-RT-DETR-X_infer.tar",
+    "SOLOv2": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/SOLOv2_infer.tar",
+    "MaskRCNN-ResNet50": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/MaskRCNN-ResNet50_infer.tar",
+    "MaskRCNN-ResNet50-FPN": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/MaskRCNN-ResNet50-FPN_infer.tar",
+    "MaskRCNN-ResNet50-vd-FPN": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/MaskRCNN-ResNet50-vd-FPN_infer.tar",
+    "MaskRCNN-ResNet50-vd-SSLDv2-FPN": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/MaskRCNN-ResNet50-vd-SSLDv2_infer.tar",
+    "MaskRCNN-ResNet101-FPN": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/MaskRCNN-ResNet101-FPN_infer.tar",
+    "MaskRCNN-ResNet101-vd-FPN": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/MaskRCNN-ResNet101-vd-FPN_infer.tar",
+    "MaskRCNN-ResNeXt101-vd-FPN": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/MaskRCNN-ResNeXt101-vd-FPN_infer.tar",
+    "Cascade-MaskRCNN-ResNet50-FPN": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/Cascade-MaskRCNN-ResNet50-FPN_infer.tar",
+    "Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN_infer.tar",
+    "PP-YOLOE_seg-S": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE_seg-S_infer.tar",
     "PP-OCRv4_server_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/\
 PP-OCRv4_server_rec_infer.tar",
     "PP-OCRv4_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/\

+ 14 - 0
paddlex/modules/instance_segmentation/model_list.py

@@ -14,6 +14,20 @@
 
 
 MODELS = [
+    "Mask-RT-DETR-S",
+    "Mask-RT-DETR-M",
+    "Mask-RT-DETR-X",
     "Mask-RT-DETR-H",
     "Mask-RT-DETR-L",
+    "SOLOv2",
+    "MaskRCNN-ResNet50",
+    "MaskRCNN-ResNet50-FPN",
+    "MaskRCNN-ResNet50-vd-FPN",
+    "MaskRCNN-ResNet101-FPN",
+    "MaskRCNN-ResNet101-vd-FPN",
+    "MaskRCNN-ResNeXt101-vd-FPN",
+    "MaskRCNN-ResNet50-vd-SSLDv2-FPN",
+    "Cascade-MaskRCNN-ResNet50-FPN",
+    "Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN",
+    "PP-YOLOE_seg-S",
 ]

+ 3 - 0
paddlex/modules/instance_segmentation/predictor/keys.py

@@ -29,3 +29,6 @@ class InstanceSegKeys(object):
     # Suite-specific keys
     BOXES = "boxes"
     MASKS = "masks"
+    SEGM = "segm"
+    LABEL = "label"
+    SCORE = "score"

+ 31 - 14
paddlex/modules/instance_segmentation/predictor/predictor.py

@@ -39,24 +39,41 @@ class InstanceSegPredictor(DetPredictor):
 
         input_ = [input_dict[i] for i in self._predictor.get_input_names()]
 
-        batch_np_boxes, batch_np_boxes_num, batch_np_masks = self._predictor.predict(
-            input_
-        )
-
         pred = batch_input
         box_idx_start = 0
-        for idx in range(len(batch_input)):
-            np_boxes_num = batch_np_boxes_num[idx]
-            box_idx_end = box_idx_start + np_boxes_num
-            np_boxes = batch_np_boxes[box_idx_start:box_idx_end]
-            np_masks = batch_np_masks[box_idx_start:box_idx_end]
-            box_idx_start = box_idx_end
 
-            batch_input[idx][K.BOXES] = np_boxes
-            batch_input[idx][K.MASKS] = np_masks
-        return pred
+        if self.model_name == "SOLOv2":
+            batch_np_boxes_num, batch_np_label, batch_np_score, batch_np_segm = (
+                self._predictor.predict(input_)
+            )
+            for idx in range(len(batch_input)):
+                np_boxes_num = batch_np_boxes_num
+                box_idx_end = box_idx_start + np_boxes_num
+                np_label = batch_np_label[box_idx_start:box_idx_end]
+                np_score = batch_np_score[box_idx_start:box_idx_end]
+                np_segm = batch_np_segm[box_idx_start:box_idx_end]
+                box_idx_start = box_idx_end
+
+                batch_input[idx][K.LABEL] = np_label
+                batch_input[idx][K.SCORE] = np_score
+                batch_input[idx][K.SEGM] = np_segm
+            return pred
+        else:
+            batch_np_boxes, batch_np_boxes_num, batch_np_masks = (
+                self._predictor.predict(input_)
+            )
+            for idx in range(len(batch_input)):
+                np_boxes_num = batch_np_boxes_num[idx]
+                box_idx_end = box_idx_start + np_boxes_num
+                np_boxes = batch_np_boxes[box_idx_start:box_idx_end]
+                np_masks = batch_np_masks[box_idx_start:box_idx_end]
+                box_idx_start = box_idx_end
+
+                batch_input[idx][K.BOXES] = np_boxes
+                batch_input[idx][K.MASKS] = np_masks
+            return pred
 
     @classmethod
     def get_output_keys(cls):
         """get output keys"""
-        return [K.BOXES, K.MASKS]
+        return [[K.LABEL, K.SCORE, K.SEGM], [K.BOXES, K.MASKS]]

+ 3 - 0
paddlex/modules/object_detection/predictor/keys.py

@@ -29,3 +29,6 @@ class DetKeys(object):
     # Suite-specific keys
     BOXES = "boxes"
     MASKS = "masks"
+    SEGM = "segm"
+    LABEL = "label"
+    SCORE = "score"

+ 73 - 4
paddlex/modules/object_detection/predictor/transforms.py

@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import os
+import cv2
 
 import numpy as np
 import math
@@ -234,6 +235,62 @@ def draw_mask(im, np_boxes, np_masks, labels, threshold=0.5):
     return Image.fromarray(im.astype("uint8"))
 
 
+def draw_segm(im, np_segms, np_label, np_score, labels, threshold=0.5, alpha=0.7):
+    """
+    Draw segmentation on image
+    """
+    mask_color_id = 0
+    w_ratio = 0.4
+    color_list = get_color_map_list(len(labels))
+    im = np.array(im).astype("float32")
+    clsid2color = {}
+    np_segms = np_segms.astype(np.uint8)
+    for i in range(np_segms.shape[0]):
+        mask, score, clsid = np_segms[i], np_score[i], np_label[i]
+        if score < threshold:
+            continue
+
+        if clsid not in clsid2color:
+            clsid2color[clsid] = color_list[clsid]
+        color_mask = clsid2color[clsid]
+        for c in range(3):
+            color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio * 255
+        idx = np.nonzero(mask)
+        color_mask = np.array(color_mask)
+        idx0 = np.minimum(idx[0], im.shape[0] - 1)
+        idx1 = np.minimum(idx[1], im.shape[1] - 1)
+        im[idx0, idx1, :] *= 1.0 - alpha
+        im[idx0, idx1, :] += alpha * color_mask
+        sum_x = np.sum(mask, axis=0)
+        x = np.where(sum_x > 0.5)[0]
+        sum_y = np.sum(mask, axis=1)
+        y = np.where(sum_y > 0.5)[0]
+        x0, x1, y0, y1 = x[0], x[-1], y[0], y[-1]
+        cv2.rectangle(
+            im, (x0, y0), (x1, y1), tuple(color_mask.astype("int32").tolist()), 1
+        )
+        bbox_text = "%s %.2f" % (labels[clsid], score)
+        t_size = cv2.getTextSize(bbox_text, 0, 0.3, thickness=1)[0]
+        cv2.rectangle(
+            im,
+            (x0, y0),
+            (x0 + t_size[0], y0 - t_size[1] - 3),
+            tuple(color_mask.astype("int32").tolist()),
+            -1,
+        )
+        cv2.putText(
+            im,
+            bbox_text,
+            (x0, y0 - 2),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.3,
+            (0, 0, 0),
+            1,
+            lineType=cv2.LINE_AA,
+        )
+    return Image.fromarray(im.astype("uint8"))
+
+
 class SaveDetResults(BaseTransform):
     """Save Result Transform"""
 
@@ -262,7 +319,19 @@ class SaveDetResults(BaseTransform):
                 threshold=self.threshold,
                 labels=labels,
             )
-        image = draw_box(image, data[K.BOXES], threshold=self.threshold, labels=labels)
+        if K.SEGM in data:
+            image = draw_segm(
+                image,
+                data[K.SEGM],
+                data[K.LABEL],
+                data[K.SCORE],
+                labels=labels,
+                threshold=self.threshold,
+            )
+        if K.SEGM not in data:
+            image = draw_box(
+                image, data[K.BOXES], threshold=self.threshold, labels=labels
+            )
 
         self._write_image(save_path, image)
         return data
@@ -276,7 +345,7 @@ class SaveDetResults(BaseTransform):
     @classmethod
     def get_input_keys(cls):
         """get input keys"""
-        return [K.IM_PATH, K.BOXES]
+        return [[K.IM_PATH, K.BOXES], [K.IM_PATH]]
 
     @classmethod
     def get_output_keys(cls):
@@ -433,13 +502,13 @@ class PrintResult(BaseTransform):
     def apply(self, data):
         """apply"""
         logging.info("The prediction result is:")
-        logging.info(data[K.BOXES])
+        logging.info(data[K.BOXES] if K.BOXES in data else data[K.SEGM])
         return data
 
     @classmethod
     def get_input_keys(cls):
         """get input keys"""
-        return [K.BOXES]
+        return [[], [K.BOXES]]
 
     @classmethod
     def get_output_keys(cls):

+ 1 - 2
paddlex/repo_apis/PaddleDetection_api/config_helper.py

@@ -30,11 +30,10 @@ class PPDetConfigMixin(object):
         # XXX: This function relies on implementation details of PaddleDetection.
 
         BASE_KEY = "_BASE_"
-
         with open(config_path, "r", encoding="utf-8") as f:
             dic = yaml.load(f, Loader=_PPDetSerializableLoader)
-
         if not isinstance(dic, dict):
+            print(dic)
             raise TypeError
 
         if BASE_KEY in dic:

+ 201 - 0
paddlex/repo_apis/PaddleDetection_api/configs/Cascade-MaskRCNN-ResNet50-FPN.yaml

@@ -0,0 +1,201 @@
+epoch: 12
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+target_metrics: mask
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+
+# Dataset
+metric: COCO
+num_classes: 80
+worker_num: 2
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 1000
+
+OptimizerBuilder:
+  clip_grad_by_norm: 35.0 # avoid gradient explosion in NPU
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+# Model
+architecture: CascadeRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+
+CascadeRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: CascadeHead
+  mask_head: MaskHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+  mask_post_process: MaskPostProcess
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+
+CascadeHead:
+  head: CascadeTwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  cascade_iou: [0.5, 0.6, 0.7]
+  use_random: True
+
+CascadeTwoFCHead:
+  out_channel: 1024
+
+BBoxPostProcess:
+  decode:
+    name: RCNNBox
+    prior_box_var: [30.0, 30.0, 15.0, 15.0]
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
+
+
+MaskHead:
+  head: MaskFeat
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  mask_assigner: MaskAssigner
+  share_bbox_feat: False
+
+MaskFeat:
+  num_convs: 4
+  out_channel: 256
+
+MaskAssigner:
+  mask_resolution: 28
+
+MaskPostProcess:
+  binary_thresh: 0.5
+
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 201 - 0
paddlex/repo_apis/PaddleDetection_api/configs/Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN.yaml

@@ -0,0 +1,201 @@
+epoch: 12
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+target_metrics: mask
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+
+# Dataset
+metric: COCO
+num_classes: 80
+worker_num: 2
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 1000
+
+OptimizerBuilder:
+  clip_grad_by_norm: 35.0 # avoid gradient explosion in NPU
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+# Model
+architecture: CascadeRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+
+CascadeRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: CascadeHead
+  mask_head: MaskHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+  mask_post_process: MaskPostProcess
+
+ResNet:
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  lr_mult_list: [0.05, 0.05, 0.1, 0.15]
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+
+CascadeHead:
+  head: CascadeTwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  cascade_iou: [0.5, 0.6, 0.7]
+  use_random: True
+
+CascadeTwoFCHead:
+  out_channel: 1024
+
+BBoxPostProcess:
+  decode:
+    name: RCNNBox
+    prior_box_var: [30.0, 30.0, 15.0, 15.0]
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
+
+
+MaskHead:
+  head: MaskFeat
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  mask_assigner: MaskAssigner
+  share_bbox_feat: False
+
+MaskFeat:
+  num_convs: 4
+  out_channel: 256
+
+MaskAssigner:
+  mask_resolution: 28
+
+MaskPostProcess:
+  binary_thresh: 0.5
+
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 177 - 0
paddlex/repo_apis/PaddleDetection_api/configs/Mask-RT-DETR-M.yaml

@@ -0,0 +1,177 @@
+# Runtime
+epoch: 72
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+snapshot_epoch: 1
+target_metrics: mask
+print_flops: false
+print_params: false
+use_ema: True
+ema_decay: 0.9999
+ema_decay_type: "exponential"
+ema_filter_no_grad: True
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+worker_num: 4
+
+TrainDataset:
+  name: COCOInstSegDataset
+  image_dir: train2017/
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: datasets/COCO
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCOInstSegDataset
+  image_dir: val2017/
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: datasets/COCO
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: datasets/COCO
+
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - Poly2Mask: {del_poly: True}
+    - RandomDistort: {prob: 0.8}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {prob: 0.8}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - NormalizeBox: {}
+    - BboxXYXY2XYWH: {}
+    - Permute: {}
+  batch_size: 4
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1 # mask be 1
+  shuffle: false
+  drop_last: false
+
+TestReader:
+  inputs_def:
+    image_shape: [3, 640, 640]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 1.0
+    milestones: [100]
+    use_warmup: true
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 2000
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001
+
+# Model
+architecture: DETR
+with_mask: True
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPHGNetV2_B2_ssld_stage1_pretrained.pdparams
+norm_type: sync_bn
+hidden_dim: 256
+use_focal_loss: True
+eval_size: [640, 640]
+num_prototypes: 128
+find_unused_parameters: True
+
+DETR:
+  backbone: PPHGNetV2
+  neck: MaskHybridEncoder
+  transformer: MaskRTDETR
+  detr_head: MaskDINOHead
+  post_process: DETRPostProcess
+
+PPHGNetV2:
+  arch: 'M'
+  return_idx: [0, 1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+  lr_mult_list: [0., 0.1, 0.1, 0.1, 0.1]
+
+MaskHybridEncoder:
+  hidden_dim: 256
+  use_encoder_idx: [2]
+  num_encoder_layers: 1
+  encoder_layer:
+    name: TransformerLayer
+    d_model: 256
+    nhead: 8
+    dim_feedforward: 1024
+    dropout: 0.
+    activation: 'gelu'
+  expansion: 0.5
+  mask_feat_channels: [64, 64]
+
+MaskRTDETR:
+  num_queries: 300
+  position_embed_type: sine
+  feat_strides: [8, 16, 32]
+  num_levels: 3
+  nhead: 8
+  dim_feedforward: 1024
+  dropout: 0.0
+  activation: relu
+  num_denoising: 100
+  label_noise_ratio: 0.5
+  box_noise_scale: 1.0
+  learnt_init_query: False
+  mask_enhanced: True
+
+MaskDINOHead:
+  loss:
+    name: MaskDINOLoss
+    loss_coeff: {class: 4, bbox: 5, giou: 2, mask: 5, dice: 5}
+    aux_loss: True
+    use_vfl: True
+    vfl_iou_type: 'mask'
+    matcher:
+      name: HungarianMatcher
+      matcher_coeff: {class: 4, bbox: 5, giou: 2, mask: 5, dice: 5}
+
+DETRPostProcess:
+  num_top_queries: 100
+  mask_stride: 4
+
+# Exporting the model
+export:
+  post_process: True
+  nms: True
+  benchmark: False
+  fuse_conv_bn: False

+ 177 - 0
paddlex/repo_apis/PaddleDetection_api/configs/Mask-RT-DETR-S.yaml

@@ -0,0 +1,177 @@
+# Runtime
+epoch: 72
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+snapshot_epoch: 1
+target_metrics: mask
+print_flops: false
+print_params: false
+use_ema: True
+ema_decay: 0.9999
+ema_decay_type: "exponential"
+ema_filter_no_grad: True
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+worker_num: 4
+
+TrainDataset:
+  name: COCOInstSegDataset
+  image_dir: train2017/
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: datasets/COCO
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCOInstSegDataset
+  image_dir: val2017/
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: datasets/COCO
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: datasets/COCO
+
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - Poly2Mask: {del_poly: True}
+    - RandomDistort: {prob: 0.8}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {prob: 0.8}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - NormalizeBox: {}
+    - BboxXYXY2XYWH: {}
+    - Permute: {}
+  batch_size: 4
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1 # mask be 1
+  shuffle: false
+  drop_last: false
+
+TestReader:
+  inputs_def:
+    image_shape: [3, 640, 640]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 1.0
+    milestones: [100]
+    use_warmup: true
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 2000
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001
+
+# Model
+architecture: DETR
+with_mask: True
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPHGNetV2_B1_ssld_stage1_pretrained.pdparams
+norm_type: sync_bn
+hidden_dim: 256
+use_focal_loss: True
+eval_size: [640, 640]
+num_prototypes: 128
+find_unused_parameters: True
+
+DETR:
+  backbone: PPHGNetV2
+  neck: MaskHybridEncoder
+  transformer: MaskRTDETR
+  detr_head: MaskDINOHead
+  post_process: DETRPostProcess
+
+PPHGNetV2:
+  arch: 'S'
+  return_idx: [0, 1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+  lr_mult_list: [0., 0.1, 0.1, 0.1, 0.1]
+
+MaskHybridEncoder:
+  hidden_dim: 256
+  use_encoder_idx: [2]
+  num_encoder_layers: 1
+  encoder_layer:
+    name: TransformerLayer
+    d_model: 256
+    nhead: 8
+    dim_feedforward: 1024
+    dropout: 0.
+    activation: 'gelu'
+  expansion: 0.5
+  mask_feat_channels: [64, 32]
+
+MaskRTDETR:
+  num_queries: 300
+  position_embed_type: sine
+  feat_strides: [8, 16, 32]
+  num_levels: 3
+  nhead: 8
+  dim_feedforward: 1024
+  dropout: 0.0
+  activation: relu
+  num_denoising: 100
+  label_noise_ratio: 0.5
+  box_noise_scale: 1.0
+  learnt_init_query: False
+  mask_enhanced: True
+
+MaskDINOHead:
+  loss:
+    name: MaskDINOLoss
+    loss_coeff: {class: 4, bbox: 5, giou: 2, mask: 5, dice: 5}
+    aux_loss: True
+    use_vfl: True
+    vfl_iou_type: 'mask'
+    matcher:
+      name: HungarianMatcher
+      matcher_coeff: {class: 4, bbox: 5, giou: 2, mask: 5, dice: 5}
+
+DETRPostProcess:
+  num_top_queries: 100
+  mask_stride: 4
+
+# Exporting the model
+export:
+  post_process: True
+  nms: True
+  benchmark: False
+  fuse_conv_bn: False

+ 177 - 0
paddlex/repo_apis/PaddleDetection_api/configs/Mask-RT-DETR-X.yaml

@@ -0,0 +1,177 @@
+# Runtime
+epoch: 72
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+snapshot_epoch: 1
+target_metrics: mask
+print_flops: false
+print_params: false
+use_ema: True
+ema_decay: 0.9999
+ema_decay_type: "exponential"
+ema_filter_no_grad: True
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+worker_num: 4
+
+TrainDataset:
+  name: COCOInstSegDataset
+  image_dir: train2017/
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: datasets/COCO
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCOInstSegDataset
+  image_dir: val2017/
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: datasets/COCO
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instance_val.json
+  dataset_dir: datasets/COCO
+
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - Poly2Mask: {del_poly: True}
+    - RandomDistort: {prob: 0.8}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {prob: 0.8}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - NormalizeBox: {}
+    - BboxXYXY2XYWH: {}
+    - Permute: {}
+  batch_size: 4
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1 # mask be 1
+  shuffle: false
+  drop_last: false
+
+TestReader:
+  inputs_def:
+    image_shape: [3, 640, 640]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 1.0
+    milestones: [100]
+    use_warmup: true
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 2000
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001
+
+# Model
+architecture: DETR
+with_mask: True
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPHGNetV2_B5_ssld_stage1_pretrained.pdparams
+norm_type: sync_bn
+hidden_dim: 256
+use_focal_loss: True
+eval_size: [640, 640]
+num_prototypes: 128
+find_unused_parameters: True
+
+DETR:
+  backbone: PPHGNetV2
+  neck: MaskHybridEncoder
+  transformer: MaskRTDETR
+  detr_head: MaskDINOHead
+  post_process: DETRPostProcess
+
+PPHGNetV2:
+  arch: 'X'
+  return_idx: [0, 1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+  lr_mult_list: [0., 0.01, 0.01, 0.01, 0.01]
+
+MaskHybridEncoder:
+  hidden_dim: 384
+  use_encoder_idx: [3]
+  num_encoder_layers: 1
+  encoder_layer:
+    name: TransformerLayer
+    d_model: 384
+    nhead: 8
+    dim_feedforward: 2048
+    dropout: 0.
+    activation: 'gelu'
+  expansion: 1.0
+  mask_feat_channels: [128, 128]
+
+MaskRTDETR:
+  num_queries: 300
+  position_embed_type: sine
+  feat_strides: [8, 16, 32]
+  num_levels: 3
+  nhead: 8
+  dim_feedforward: 1024
+  dropout: 0.0
+  activation: relu
+  num_denoising: 100
+  label_noise_ratio: 0.5
+  box_noise_scale: 1.0
+  learnt_init_query: False
+  mask_enhanced: True
+
+MaskDINOHead:
+  loss:
+    name: MaskDINOLoss
+    loss_coeff: {class: 4, bbox: 5, giou: 2, mask: 5, dice: 5}
+    aux_loss: True
+    use_vfl: True
+    vfl_iou_type: 'mask'
+    matcher:
+      name: HungarianMatcher
+      matcher_coeff: {class: 4, bbox: 5, giou: 2, mask: 5, dice: 5}
+
+DETRPostProcess:
+  num_top_queries: 100
+  mask_stride: 4
+
+# Exporting the model
+export:
+  post_process: True
+  nms: True
+  benchmark: False
+  fuse_conv_bn: False

+ 197 - 0
paddlex/repo_apis/PaddleDetection_api/configs/MaskRCNN-ResNeXt101-vd-FPN.yaml

@@ -0,0 +1,197 @@
+epoch: 24
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+target_metrics: mask
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+
+# dataset
+metric: COCO
+num_classes: 80
+worker_num: 2
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [16, 22]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+# model
+architecture: MaskRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
+
+MaskRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  mask_head: MaskHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+  mask_post_process: MaskPostProcess
+
+ResNet:
+  # for ResNeXt: groups, base_width, base_channels
+  depth: 101
+  variant: d
+  groups: 64
+  base_width: 4
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 1000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+BBoxHead:
+  head: TwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+TwoFCHead:
+  out_channel: 1024
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
+
+MaskHead:
+  head: MaskFeat
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  mask_assigner: MaskAssigner
+  share_bbox_feat: False
+
+MaskFeat:
+  num_convs: 4
+  out_channel: 256
+
+MaskAssigner:
+  mask_resolution: 28
+
+MaskPostProcess:
+  binary_thresh: 0.5
+
+# Exporting the model
+export:
+  post_process: True 
+  nms: True 
+  benchmark: False 
+  fuse_conv_bn: False

+ 193 - 0
paddlex/repo_apis/PaddleDetection_api/configs/MaskRCNN-ResNet101-FPN.yaml

@@ -0,0 +1,193 @@
+epoch: 12
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+target_metrics: mask
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+
+#dataset
+metric: COCO
+num_classes: 80
+worker_num: 2
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+# model
+architecture: MaskRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
+
+MaskRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  mask_head: MaskHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+  mask_post_process: MaskPostProcess
+
+ResNet:
+  # index 0 stands for res2
+  depth: 101
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 1000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+BBoxHead:
+  head: TwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+TwoFCHead:
+  out_channel: 1024
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
+
+MaskHead:
+  head: MaskFeat
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  mask_assigner: MaskAssigner
+  share_bbox_feat: False
+
+MaskFeat:
+  num_convs: 4
+  out_channel: 256
+
+MaskAssigner:
+  mask_resolution: 28
+
+MaskPostProcess:
+  binary_thresh: 0.5
+
+# Exporting the model
+export:
+  post_process: True 
+  nms: True 
+  benchmark: False 
+  fuse_conv_bn: False

+ 194 - 0
paddlex/repo_apis/PaddleDetection_api/configs/MaskRCNN-ResNet101-vd-FPN.yaml

@@ -0,0 +1,194 @@
+epoch: 12
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+target_metrics: mask
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+
+# dataset
+metric: COCO
+num_classes: 80
+worker_num: 2
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+# model
+architecture: MaskRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
+
+MaskRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  mask_head: MaskHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+  mask_post_process: MaskPostProcess
+
+ResNet:
+  # index 0 stands for res2
+  depth: 101
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 1000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+BBoxHead:
+  head: TwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+TwoFCHead:
+  out_channel: 1024
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
+
+MaskHead:
+  head: MaskFeat
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  mask_assigner: MaskAssigner
+  share_bbox_feat: False
+
+MaskFeat:
+  num_convs: 4
+  out_channel: 256
+
+MaskAssigner:
+  mask_resolution: 28
+
+MaskPostProcess:
+  binary_thresh: 0.5
+
+# Exporting the model
+export:
+  post_process: True 
+  nms: True 
+  benchmark: False 
+  fuse_conv_bn: False

+ 193 - 0
paddlex/repo_apis/PaddleDetection_api/configs/MaskRCNN-ResNet50-FPN.yaml

@@ -0,0 +1,193 @@
+epoch: 12
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+target_metrics: mask
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+
+# dataset
+metric: COCO
+num_classes: 80
+worker_num: 2
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+# model
+architecture: MaskRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+MaskRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  mask_head: MaskHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+  mask_post_process: MaskPostProcess
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 1000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+BBoxHead:
+  head: TwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+TwoFCHead:
+  out_channel: 1024
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
+
+MaskHead:
+  head: MaskFeat
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  mask_assigner: MaskAssigner
+  share_bbox_feat: False
+
+MaskFeat:
+  num_convs: 4
+  out_channel: 256
+
+MaskAssigner:
+  mask_resolution: 28
+
+MaskPostProcess:
+  binary_thresh: 0.5
+
+# Exporting the model
+export:
+  post_process: True 
+  nms: True 
+  benchmark: False 
+  fuse_conv_bn: False

+ 194 - 0
paddlex/repo_apis/PaddleDetection_api/configs/MaskRCNN-ResNet50-vd-FPN.yaml

@@ -0,0 +1,194 @@
+epoch: 12
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+target_metrics: mask
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+
+#dataset
+metric: COCO
+num_classes: 80
+worker_num: 2
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+# model
+architecture: MaskRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+
+MaskRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  mask_head: MaskHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+  mask_post_process: MaskPostProcess
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 1000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+BBoxHead:
+  head: TwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+TwoFCHead:
+  out_channel: 1024
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
+
+MaskHead:
+  head: MaskFeat
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  mask_assigner: MaskAssigner
+  share_bbox_feat: False
+
+MaskFeat:
+  num_convs: 4
+  out_channel: 256
+
+MaskAssigner:
+  mask_resolution: 28
+
+MaskPostProcess:
+  binary_thresh: 0.5
+
+# Exporting the model
+export:
+  post_process: True 
+  nms: True 
+  benchmark: False 
+  fuse_conv_bn: False

+ 197 - 0
paddlex/repo_apis/PaddleDetection_api/configs/MaskRCNN-ResNet50-vd-SSLDv2-FPN.yaml

@@ -0,0 +1,197 @@
+epoch: 12
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+target_metrics: mask
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+
+
+# dataset
+metric: COCO
+num_classes: 80
+worker_num: 2
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+# model
+architecture: MaskRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+
+MaskRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  mask_head: MaskHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+  mask_post_process: MaskPostProcess
+
+ResNet:
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  lr_mult_list: [0.05, 0.05, 0.1, 0.15]
+
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 1000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+BBoxHead:
+  head: TwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+TwoFCHead:
+  out_channel: 1024
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
+
+MaskHead:
+  head: MaskFeat
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  mask_assigner: MaskAssigner
+  share_bbox_feat: False
+
+MaskFeat:
+  num_convs: 4
+  out_channel: 256
+
+MaskAssigner:
+  mask_resolution: 28
+
+MaskPostProcess:
+  binary_thresh: 0.5
+
+
+# Exporting the model
+export:
+  post_process: True 
+  nms: True
+  benchmark: False
+  fuse_conv_bn: False

+ 189 - 0
paddlex/repo_apis/PaddleDetection_api/configs/MaskRCNN-ResNet50.yaml

@@ -0,0 +1,189 @@
+epoch: 12
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+target_metrics: mask
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+
+# dataset
+metric: COCO
+num_classes: 80
+worker_num: 2
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json 
+  dataset_dir: dataset/coco 
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: -1}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: true
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: -1}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: -1}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+# model
+architecture: MaskRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+MaskRCNN:
+  backbone: ResNet
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  mask_head: MaskHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+  mask_post_process: MaskPostProcess
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [2]
+  num_stages: 3
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [32, 64, 128, 256, 512]
+    strides: [16]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 12000
+    post_nms_top_n: 2000
+    topk_after_collect: False
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 6000
+    post_nms_top_n: 1000
+
+
+BBoxHead:
+  head: Res5Head
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+  with_pool: true
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
+
+MaskHead:
+  head: MaskFeat
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  mask_assigner: MaskAssigner
+  share_bbox_feat: true
+
+MaskFeat:
+  num_convs: 0
+  out_channel: 256
+
+MaskAssigner:
+  mask_resolution: 14
+
+MaskPostProcess:
+  binary_thresh: 0.5
+
+# Exporting the model
+export:
+  post_process: True 
+  nms: True 
+  benchmark: False
+  fuse_conv_bn: False

+ 156 - 0
paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE_seg-S.yaml

@@ -0,0 +1,156 @@
+epoch: 80
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+target_metrics: mask
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+worker_num: 4
+eval_height: &eval_height 640
+eval_width: &eval_width 640
+eval_size: &eval_size [*eval_height, *eval_width]
+
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - RandomDistort: {}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {is_mask_crop: True}
+    - RandomFlip: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - Poly2Mask: {del_poly: True}
+  batch_transforms:
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+    - PadGT: {}
+  batch_size: 8
+  shuffle: true
+  drop_last: true
+  use_shared_memory: true
+  collate_batch: False
+
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1
+
+
+TestReader:
+  inputs_def:
+    image_shape: [3, *eval_height, *eval_width]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1
+
+
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+    - name: CosineDecay
+      max_epochs: 96
+    - name: LinearWarmup
+      start_factor: 0.
+      epochs: 5
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+
+# Model
+architecture: PPYOLOE
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_s_obj365_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+ema_black_list: ['proj_conv.weight']
+custom_black_list: ['reduce_mean']
+depth_mult: 0.33
+width_mult: 0.50
+
+with_mask: True
+
+PPYOLOE:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEInsHead
+  post_process: ~
+  with_mask: True
+
+CSPResNet:
+  layers: [3, 6, 6, 3]
+  channels: [64, 128, 256, 512, 1024]
+  return_idx: [1, 2, 3]
+  use_large_stem: True
+  use_alpha: True
+
+CustomCSPPAN:
+  out_channels: [768, 384, 192]
+  stage_num: 1
+  block_num: 3
+  act: 'swish'
+  spp: true
+
+PPYOLOEInsHead:
+  fpn_strides: [32, 16, 8]
+  grid_cell_scale: 5.0
+  grid_cell_offset: 0.5
+  static_assigner_epoch: -1 # only use TaskAlignedAssigner
+  use_varifocal_loss: True
+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
+  assigner:
+    name: TaskAlignedAssigner
+    topk: 13
+    alpha: 1.0
+    beta: 6.0
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 300
+    score_threshold: 0.01
+    nms_threshold: 0.7
+    return_index: True
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 148 - 0
paddlex/repo_apis/PaddleDetection_api/configs/SOLOv2.yaml

@@ -0,0 +1,148 @@
+epoch: 12
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+target_metrics: mask
+log_iter: 20
+save_dir: output
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+worker_num: 8
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - Poly2Mask: {}
+  - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+  - RandomFlip: {}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  - Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12],
+                        scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]],
+                        coord_sigma: 0.2}
+  batch_size: 2
+  shuffle: true
+  drop_last: true
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+# Model
+architecture: SOLOv2
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+SOLOv2:
+  backbone: ResNet
+  neck: FPN
+  solov2_head: SOLOv2Head
+  mask_head: SOLOv2MaskHead
+
+ResNet:
+  depth: 50
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+SOLOv2Head:
+  seg_feat_channels: 512
+  stacked_convs: 4
+  num_grids: [40, 36, 24, 16, 12]
+  kernel_out_channels: 256
+  solov2_loss: SOLOv2Loss
+  mask_nms: MaskMatrixNMS
+
+SOLOv2MaskHead:
+  mid_channels: 128
+  out_channels: 256
+  start_level: 0
+  end_level: 3
+
+SOLOv2Loss:
+  ins_loss_weight: 3.0
+  focal_loss_gamma: 2.0
+  focal_loss_alpha: 0.25
+
+MaskMatrixNMS:
+  pre_nms_top_n: 500
+  post_nms_top_n: 100
+
+
+
+
+# Exporting the model
+export:
+  post_process: True 
+  nms: True 
+  benchmark: False 
+  fuse_conv_bn: False

+ 212 - 0
paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py

@@ -36,6 +36,36 @@ register_suite_info(
 ################ Models Using Universal Config ################
 register_model_info(
     {
+        "model_name": "Mask-RT-DETR-S",
+        "suite": "InstanceSeg",
+        "config_path": osp.join(PDX_CONFIG_DIR, "Mask-RT-DETR-S.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export"],
+        "supported_dataset_types": ["COCOInstSegDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "Mask-RT-DETR-M",
+        "suite": "InstanceSeg",
+        "config_path": osp.join(PDX_CONFIG_DIR, "Mask-RT-DETR-M.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export"],
+        "supported_dataset_types": ["COCOInstSegDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+register_model_info(
+    {
         "model_name": "Mask-RT-DETR-L",
         "suite": "InstanceSeg",
         "config_path": osp.join(PDX_CONFIG_DIR, "Mask-RT-DETR-L.yaml"),
@@ -51,6 +81,21 @@ register_model_info(
 
 register_model_info(
     {
+        "model_name": "Mask-RT-DETR-X",
+        "suite": "InstanceSeg",
+        "config_path": osp.join(PDX_CONFIG_DIR, "Mask-RT-DETR-X.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export"],
+        "supported_dataset_types": ["COCOInstSegDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+register_model_info(
+    {
         "model_name": "Mask-RT-DETR-H",
         "suite": "InstanceSeg",
         "config_path": osp.join(PDX_CONFIG_DIR, "Mask-RT-DETR-H.yaml"),
@@ -63,3 +108,170 @@ register_model_info(
         },
     }
 )
+
+register_model_info(
+    {
+        "model_name": "SOLOv2",
+        "suite": "InstanceSeg",
+        "config_path": osp.join(PDX_CONFIG_DIR, "SOLOv2.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export"],
+        "supported_dataset_types": ["COCOInstSegDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "MaskRCNN-ResNet50",
+        "suite": "InstanceSeg",
+        "config_path": osp.join(PDX_CONFIG_DIR, "MaskRCNN-ResNet50.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export"],
+        "supported_dataset_types": ["COCOInstSegDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "MaskRCNN-ResNet50-FPN",
+        "suite": "InstanceSeg",
+        "config_path": osp.join(PDX_CONFIG_DIR, "MaskRCNN-ResNet50-FPN.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export"],
+        "supported_dataset_types": ["COCOInstSegDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "MaskRCNN-ResNet50-vd-FPN",
+        "suite": "InstanceSeg",
+        "config_path": osp.join(PDX_CONFIG_DIR, "MaskRCNN-ResNet50-vd-FPN.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export"],
+        "supported_dataset_types": ["COCOInstSegDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "MaskRCNN-ResNet50-vd-SSLDv2-FPN",
+        "suite": "InstanceSeg",
+        "config_path": osp.join(PDX_CONFIG_DIR, "MaskRCNN-ResNet50-vd-SSLDv2-FPN.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export"],
+        "supported_dataset_types": ["COCOInstSegDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "MaskRCNN-ResNet101-FPN",
+        "suite": "InstanceSeg",
+        "config_path": osp.join(PDX_CONFIG_DIR, "MaskRCNN-ResNet101-FPN.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export"],
+        "supported_dataset_types": ["COCOInstSegDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "MaskRCNN-ResNet101-vd-FPN",
+        "suite": "InstanceSeg",
+        "config_path": osp.join(PDX_CONFIG_DIR, "MaskRCNN-ResNet101-vd-FPN.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export"],
+        "supported_dataset_types": ["COCOInstSegDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "MaskRCNN-ResNeXt101-vd-FPN",
+        "suite": "InstanceSeg",
+        "config_path": osp.join(PDX_CONFIG_DIR, "MaskRCNN-ResNeXt101-vd-FPN.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export"],
+        "supported_dataset_types": ["COCOInstSegDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "Cascade-MaskRCNN-ResNet50-FPN",
+        "suite": "InstanceSeg",
+        "config_path": osp.join(PDX_CONFIG_DIR, "Cascade-MaskRCNN-ResNet50-FPN.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export"],
+        "supported_dataset_types": ["COCOInstSegDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN",
+        "suite": "InstanceSeg",
+        "config_path": osp.join(
+            PDX_CONFIG_DIR, "Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN.yaml"
+        ),
+        "supported_apis": ["train", "evaluate", "predict", "export"],
+        "supported_dataset_types": ["COCOInstSegDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "PP-YOLOE_seg-S",
+        "suite": "InstanceSeg",
+        "config_path": osp.join(PDX_CONFIG_DIR, "PP-YOLOE_seg-S.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export"],
+        "supported_dataset_types": ["COCOInstSegDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)