Browse Source

add det models (#2041)

* add det models

* add det models
yongsheng yuan 1 year ago
parent
commit
66289e36cf
44 changed files with 4144 additions and 3 deletions
  1. 23 3
      README.md
  2. 30 0
      docs/tutorials/models/support_model_list.md
  3. 40 0
      paddlex/configs/face_detection/PicoDet_LCNet_x2_5_face.yaml
  4. 41 0
      paddlex/configs/human_detection/PP-YOLOE-L_human.yaml
  5. 41 0
      paddlex/configs/human_detection/PP-YOLOE-S_human.yaml
  6. 40 0
      paddlex/configs/mainbody_detection/PP-ShiTuV2_det.yaml
  7. 40 0
      paddlex/configs/object_detection/CenterNet-DLA-34.yaml
  8. 40 0
      paddlex/configs/object_detection/CenterNet-ResNet50.yaml
  9. 41 0
      paddlex/configs/object_detection/DETR-R50.yaml
  10. 40 0
      paddlex/configs/object_detection/FCOS-ResNet50.yaml
  11. 41 0
      paddlex/configs/object_detection/PicoDet-M.yaml
  12. 41 0
      paddlex/configs/object_detection/PicoDet-XS.yaml
  13. 41 0
      paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-L.yaml
  14. 41 0
      paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-S.yaml
  15. 41 0
      paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-largesize-L.yaml
  16. 40 0
      paddlex/configs/structure_analysis/PicoDet-L_layout.yaml
  17. 40 0
      paddlex/configs/structure_analysis/RT-DETR-H_layout_17cls.yaml
  18. 40 0
      paddlex/configs/structure_analysis/RT-DETR-H_layout_3cls.yaml
  19. 40 0
      paddlex/configs/vehicle_detection/PP-YOLOE-L_vehicle.yaml
  20. 41 0
      paddlex/configs/vehicle_detection/PP-YOLOE-S_vehicle.yaml
  21. 162 0
      paddlex/modules/base/predictor/transforms/image_common.py
  22. 18 0
      paddlex/modules/base/predictor/utils/official_models.py
  23. 18 0
      paddlex/modules/object_detection/model_list.py
  24. 5 0
      paddlex/modules/object_detection/predictor/utils.py
  25. 119 0
      paddlex/repo_apis/PaddleDetection_api/configs/CenterNet-DLA-34.yaml
  26. 130 0
      paddlex/repo_apis/PaddleDetection_api/configs/CenterNet-ResNet50.yaml
  27. 153 0
      paddlex/repo_apis/PaddleDetection_api/configs/DETR-R50.yaml
  28. 157 0
      paddlex/repo_apis/PaddleDetection_api/configs/FCOS-ResNet50.yaml
  29. 169 0
      paddlex/repo_apis/PaddleDetection_api/configs/PP-ShiTuV2_det.yaml
  30. 164 0
      paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE+_SOD-L.yaml
  31. 164 0
      paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE+_SOD-S.yaml
  32. 166 0
      paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE+_SOD-largesize-L.yaml
  33. 159 0
      paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-L_human.yaml
  34. 156 0
      paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-L_vehicle.yaml
  35. 159 0
      paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-S_human.yaml
  36. 156 0
      paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-S_vehicle.yaml
  37. 165 0
      paddlex/repo_apis/PaddleDetection_api/configs/PicoDet-L_layout.yaml
  38. 170 0
      paddlex/repo_apis/PaddleDetection_api/configs/PicoDet-M.yaml
  39. 170 0
      paddlex/repo_apis/PaddleDetection_api/configs/PicoDet-XS.yaml
  40. 167 0
      paddlex/repo_apis/PaddleDetection_api/configs/PicoDet_LCNet_x2_5_face.yaml
  41. 172 0
      paddlex/repo_apis/PaddleDetection_api/configs/RT-DETR-H_layout_17cls.yaml
  42. 172 0
      paddlex/repo_apis/PaddleDetection_api/configs/RT-DETR-H_layout_3cls.yaml
  43. 5 0
      paddlex/repo_apis/PaddleDetection_api/object_det/config.py
  44. 286 0
      paddlex/repo_apis/PaddleDetection_api/object_det/register.py

+ 23 - 3
README.md

@@ -69,11 +69,31 @@ PaddleX 3.0 覆盖了 16 条产业级模型产线,其中 9 条基础产线可
     <td>PP-LCNet_x1_0_doc_ori</td>
     <td>PP-LCNet_x1_0_doc_ori</td>
   </tr>
   </tr>
   <tr>
   <tr>
-    <td>基础产线</td>
-    <td>通用目标检测</td>
+    <td rowspan="6">基础产线</td>
+    <td rowspan="6">通用目标检测</td>
     <td>目标检测</td>
     <td>目标检测</td>
     <td>PicoDet-S<br/>PicoDet-L<details>
     <td>PicoDet-S<br/>PicoDet-L<details>
-    <summary><b>more</b></summary><br/>PP-YOLOE_plus-S<br/>PP-YOLOE_plus-M<br/>PP-YOLOE_plus-L<br/>PP-YOLOE_plus-X<br/>RT-DETR-L<br/>RT-DETR-H<br/>RT-DETR-X<br/>RT-DETR-R18<br/>RT-DETR-R50<br/>YOLOv3-DarkNet53<br/>YOLOv3-MobileNetV3<br/>YOLOv3-ResNet50_vd_DCN<br/>YOLOX-L<br/>YOLOX-M<br/>YOLOX-N<br/>YOLOX-S<br/>YOLOX-T<br/>YOLOX-X<br/>FasterRCNN-ResNet34-FPN<br/>FasterRCNN-ResNet50<br/>FasterRCNN-ResNet50-FPN<br/>FasterRCNN-ResNet50-vd-FPN<br/>FasterRCNN-ResNet50-vd-SSLDv2-FPN<br/>FasterRCNN-ResNet101<br/>FasterRCNN-ResNet101-FPN<br/>FasterRCNN-ResNeXt101-vd-FPN<br/>FasterRCNN-Swin-Tiny-FPN<br/>Cascade-FasterRCNN-ResNet50-FPN<br/>Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN</details></td>
+    <summary><b>more</b></summary><br/>PicoDet-XS<br/>PicoDet-M<br/>PP-YOLOE_plus-S<br/>PP-YOLOE_plus-M<br/>PP-YOLOE_plus-L<br/>PP-YOLOE_plus-X<br/>RT-DETR-L<br/>RT-DETR-H<br/>RT-DETR-X<br/>RT-DETR-R18<br/>RT-DETR-R50<br/>YOLOv3-DarkNet53<br/>YOLOv3-MobileNetV3<br/>YOLOv3-ResNet50_vd_DCN<br/>YOLOX-L<br/>YOLOX-M<br/>YOLOX-N<br/>YOLOX-S<br/>YOLOX-T<br/>YOLOX-X<br/>FasterRCNN-ResNet34-FPN<br/>FasterRCNN-ResNet50<br/>FasterRCNN-ResNet50-FPN<br/>FasterRCNN-ResNet50-vd-FPN<br/>FasterRCNN-ResNet50-vd-SSLDv2-FPN<br/>FasterRCNN-ResNet101<br/>FasterRCNN-ResNet101-FPN<br/>FasterRCNN-ResNeXt101-vd-FPN<br/>FasterRCNN-Swin-Tiny-FPN<br/>Cascade-FasterRCNN-ResNet50-FPN<br/>Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN<br/>FCOS-ResNet50<br/>DETR-R50<br/>CenterNet-DLA-34<br/>CenterNet-ResNet50</details></td>
+  </tr>
+  <tr>
+    <td>行人检测</td>
+    <td>PP-YOLOE-S_human<br/>PP-YOLOE-L_human</td>
+  </tr>
+  <tr>
+    <td>车辆检测</td>
+    <td>PP-YOLOE-L_vehicle<br/>PP-YOLOE-S_vehicle</td>
+  </tr>
+  <tr>
+    <td>小目标检测</td>
+    <td>PP-YOLOE+_SOD-L<br/>PP-YOLOE+_SOD-S<br/>PP-YOLOE+_SOD-largesize-L</td>
+  </tr>
+  <tr>
+    <td>主体检测</td>
+    <td>PP-ShiTuV2_det</td>
+  </tr>
+  <tr>
+    <td>人脸检测</td>
+    <td>PicoDet_LCNet_x2_5_face</td>
   </tr>
   </tr>
   <tr>
   <tr>
     <td rowspan="2">基础产线</td>
     <td rowspan="2">基础产线</td>

+ 30 - 0
docs/tutorials/models/support_model_list.md

@@ -120,6 +120,9 @@
 | PP-YOLOE_plus-M | [PP-YOLOE_plus-M.yaml](../../../paddlex/configs/object_detection/PP-YOLOE_plus-M.yaml)|
 | PP-YOLOE_plus-M | [PP-YOLOE_plus-M.yaml](../../../paddlex/configs/object_detection/PP-YOLOE_plus-M.yaml)|
 | PP-YOLOE_plus-L | [PP-YOLOE_plus-L.yaml](../../../paddlex/configs/object_detection/PP-YOLOE_plus-L.yaml)|
 | PP-YOLOE_plus-L | [PP-YOLOE_plus-L.yaml](../../../paddlex/configs/object_detection/PP-YOLOE_plus-L.yaml)|
 | PP-YOLOE_plus-X | [PP-YOLOE_plus-X.yaml](../../../paddlex/configs/object_detection/PP-YOLOE_plus-X.yaml)|
 | PP-YOLOE_plus-X | [PP-YOLOE_plus-X.yaml](../../../paddlex/configs/object_detection/PP-YOLOE_plus-X.yaml)|
+| PP-YOLOE+_SOD-L | [PP-YOLOE+_SOD-L.yaml](../../../paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-L.yaml)|
+| PP-YOLOE+_SOD-S | [PP-YOLOE+_SOD-S.yaml](../../../paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-S.yaml)|
+| PP-YOLOE+_SOD-largesize-L | [PP-YOLOE+_SOD-largesize-L.yaml](../../../paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-largesize-L.yaml)|
 ### 2. RT-DETR 系列
 ### 2. RT-DETR 系列
 | 模型名称 | config |
 | 模型名称 | config |
 | :--- | :---: |
 | :--- | :---: |
@@ -133,6 +136,10 @@
 | :--- | :---: |
 | :--- | :---: |
 | PicoDet-S | [PicoDet-S.yaml](../../../paddlex/configs/object_detection/PicoDet-S.yaml)|
 | PicoDet-S | [PicoDet-S.yaml](../../../paddlex/configs/object_detection/PicoDet-S.yaml)|
 | PicoDet-L | [PicoDet-L.yaml](../../../paddlex/configs/object_detection/PicoDet-L.yaml)|
 | PicoDet-L | [PicoDet-L.yaml](../../../paddlex/configs/object_detection/PicoDet-L.yaml)|
+| PicoDet-M | [PicoDet-M.yaml](../../../paddlex/configs/object_detection/PicoDet-M.yaml)|
+| PicoDet-XS | [PicoDet-XS.yaml](../../../paddlex/configs/object_detection/PicoDet-XS.yaml)|
+| PP-ShiTuV2_det | [PP-ShiTuV2_det.yaml](../../../paddlex/configs/mainbody_detection/PP-ShiTuV2_det.yaml)|
+| PicoDet_LCNet_x2_5_face | [PicoDet_LCNet_x2_5_face.yaml](../../../paddlex/configs/face_detection/PicoDet_LCNet_x2_5_face.yaml)|
 ### 4. YOLOv3 系列
 ### 4. YOLOv3 系列
 | 模型名称 | config |
 | 模型名称 | config |
 | :--- | :---: |
 | :--- | :---: |
@@ -165,6 +172,26 @@
 | :--- | :---: |
 | :--- | :---: |
 | Cascade-FasterRCNN-ResNet50-FPN | [Cascade-FasterRCNN-ResNet50-FPN.yaml](../../../paddlex/configs/object_detection/Cascade-FasterRCNN-ResNet50-FPN.yaml)|
 | Cascade-FasterRCNN-ResNet50-FPN | [Cascade-FasterRCNN-ResNet50-FPN.yaml](../../../paddlex/configs/object_detection/Cascade-FasterRCNN-ResNet50-FPN.yaml)|
 | Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN | [Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN.yaml](../../../paddlex/configs/object_detection/Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN.yaml)|
 | Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN | [Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN.yaml](../../../paddlex/configs/object_detection/Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN.yaml)|
+### 8. FCOS 系列
+| 模型名称 | config |
+| :--- | :---: |
+| FCOS-ResNet50 | [FCOS-ResNet50.yaml](../../../paddlex/configs/object_detection/FCOS-ResNet50.yaml)|
+### 9. DETR 系列
+| 模型名称 | config |
+| :--- | :---: |
+| DETR-R50 | [DETR-R50.yaml](../../../paddlex/configs/object_detection/DETR-R50.yaml)|
+### 10. PP-YOLOE 系列
+| 模型名称 | config |
+| :--- | :---: |
+| PP-YOLOE-L_vehicle | [PP-YOLOE-vehicle-L.yaml](../../../paddlex/configs/vehicle_detection/PP-YOLOE-L_vehicle.yaml)|
+| PP-YOLOE-S_vehicle | [PP-YOLOE-vehicle-S.yaml](../../../paddlex/configs/vehicle_detection/PP-YOLOE-S_vehicle.yaml)|
+| PP-YOLOE-L_human | [PP-YOLOE-human-L.yaml](../../../paddlex/configs/human_detection/PP-YOLOE-L_human.yaml)|
+| PP-YOLOE-S_human | [PP-YOLOE-human-S.yaml](../../../paddlex/configs/human_detection/PP-YOLOE-S_human.yaml)|
+### 11. CenterNet 系列
+| 模型名称 | config |
+| :--- | :---: |
+| CenterNet-DLA-34 | [CenterNet-DLA-34.yaml](../../../paddlex/configs/object_detection/CenterNet-DLA-34.yaml)|
+| CenterNet-ResNet50 | [CenterNet-ResNet50.yaml](../../../paddlex/configs/object_detection/CenterNet-ResNet50.yaml)|
 ## 三、实例分割
 ## 三、实例分割
 ### 1.Mask-RT-DETR 系列
 ### 1.Mask-RT-DETR 系列
 | 模型名称 | config |
 | 模型名称 | config |
@@ -232,6 +259,9 @@
 | 模型名称 | config |
 | 模型名称 | config |
 | :--- | :---: |
 | :--- | :---: |
 | PicoDet_layout_1x | [PicoDet_layout_1x.yaml](../../../paddlex/configs/structure_analysis/PicoDet_layout_1x.yaml)|
 | PicoDet_layout_1x | [PicoDet_layout_1x.yaml](../../../paddlex/configs/structure_analysis/PicoDet_layout_1x.yaml)|
+| PicoDet-L_layout | [PicoDet-L_layout.yaml](../../../paddlex/configs/structure_analysis/PicoDet-L_layout.yaml)|
+| RT-DETR-H_layout_3cls | [RT-DETR-H_layout.yaml](../../../paddlex/configs/structure_analysis/RT-DETR-H_layout_3cls.yaml)|
+| RT-DETR-H_layout_17cls | [RT-DETR-H_layout.yaml](../../../paddlex/configs/structure_analysis/RT-DETR-H_layout_17cls.yaml)|
 ## 十、时序异常检测
 ## 十、时序异常检测
 | 模型名称 | config |
 | 模型名称 | config |
 | :--- | :---: |
 | :--- | :---: |

+ 40 - 0
paddlex/configs/face_detection/PicoDet_LCNet_x2_5_face.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: PicoDet_LCNet_x2_5_face
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 50
+  batch_size: 4
+  learning_rate: #0.08
+  pretrain_weight_path: null
+  warmup_steps: #100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/PicoDet_LCNet_x2_5_face_pretrain.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/face_detection.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 41 - 0
paddlex/configs/human_detection/PP-YOLOE-L_human.yaml

@@ -0,0 +1,41 @@
+Global:
+  model: PP-YOLOE-L_human
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 50
+  batch_size: 2
+  learning_rate: #0.08
+  pretrain_weight_path: null
+  warmup_steps: #100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/human_detection.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1
+
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_36e_crowdhuman.pdparams

+ 41 - 0
paddlex/configs/human_detection/PP-YOLOE-S_human.yaml

@@ -0,0 +1,41 @@
+Global:
+  model: PP-YOLOE-S_human
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 50
+  batch_size: 2
+  learning_rate: #0.08
+  pretrain_weight_path: null
+  warmup_steps: #100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/human_detection.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1
+
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_36e_crowdhuman.pdparams

+ 40 - 0
paddlex/configs/mainbody_detection/PP-ShiTuV2_det.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: PP-ShiTuV2_det
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 50
+  batch_size: 2
+  learning_rate: #0.08
+  pretrain_weight_path: null
+  warmup_steps: #100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/picodet_lcnet_x2_5_640_mainbody.pdparams

+ 40 - 0
paddlex/configs/object_detection/CenterNet-DLA-34.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: CenterNet-DLA-34
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 50
+  batch_size: 2
+  learning_rate: # 0.08
+  pretrain_weight_path: null
+  warmup_steps: # 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/centernet_dla34_140e_coco.pdparams

+ 40 - 0
paddlex/configs/object_detection/CenterNet-ResNet50.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: CenterNet-ResNet50
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 50
+  batch_size: 2
+  learning_rate: # 0.08
+  pretrain_weight_path: null
+  warmup_steps: # 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/centernet_r50_140e_coco.pdparams

+ 41 - 0
paddlex/configs/object_detection/DETR-R50.yaml

@@ -0,0 +1,41 @@
+Global:
+  model: DETR-R50
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 50
+  batch_size: 2
+  learning_rate: #0.08
+  pretrain_weight_path: null
+  warmup_steps: #100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1
+
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/detr_r50_1x_coco.pdparams

+ 40 - 0
paddlex/configs/object_detection/FCOS-ResNet50.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: FCOS-ResNet50
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 50
+  batch_size: 4
+  learning_rate: 0.002
+  pretrain_weight_path: null
+  warmup_steps: # 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_1x_coco.pdparams

+ 41 - 0
paddlex/configs/object_detection/PicoDet-M.yaml

@@ -0,0 +1,41 @@
+Global:
+  model: PicoDet-M
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 50
+  batch_size: 2
+  learning_rate: 0.08
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1
+
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/picodet_m_416_coco_lcnet.pdparams

+ 41 - 0
paddlex/configs/object_detection/PicoDet-XS.yaml

@@ -0,0 +1,41 @@
+Global:
+  model: PicoDet-XS
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 50
+  batch_size: 2
+  learning_rate: #0.08
+  pretrain_weight_path: null
+  warmup_steps: #100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1
+
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/picodet_xs_416_coco_lcnet.pdparams

+ 41 - 0
paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-L.yaml

@@ -0,0 +1,41 @@
+Global:
+  model: PP-YOLOE+_SOD-L
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 50
+  batch_size: 2
+  learning_rate: 0.001
+  pretrain_weight_path: null
+  warmup_steps: #100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/small_object_detection.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1
+
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_sod_crn_l_80e_visdrone.pdparams

+ 41 - 0
paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-S.yaml

@@ -0,0 +1,41 @@
+Global:
+  model: PP-YOLOE+_SOD-S
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 50
+  batch_size: 2
+  learning_rate: 0.001
+  pretrain_weight_path: null
+  warmup_steps: #100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/small_object_detection.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1
+
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_sod_crn_s_80e_visdrone.pdparams

+ 41 - 0
paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-largesize-L.yaml

@@ -0,0 +1,41 @@
+Global:
+  model: PP-YOLOE+_SOD-largesize-L
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 10
+  batch_size: 2
+  learning_rate: 0.0001
+  pretrain_weight_path: null
+  warmup_steps: 100
+  resume_path: null
+  log_interval: 3
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/small_object_detection.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1
+
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone.pdparams

+ 40 - 0
paddlex/configs/structure_analysis/PicoDet-L_layout.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: PicoDet-L_layout
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/layout/det_layout_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 11
+  epochs_iters: 50
+  batch_size: 2
+  learning_rate: #0.4
+  pretrain_weight_path: null
+  warmup_steps: #100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/pretrained/PicoDet-L_layout_pretrained_v1.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/layout.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 40 - 0
paddlex/configs/structure_analysis/RT-DETR-H_layout_17cls.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: RT-DETR-H_layout_17cls
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/layout/det_layout_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 11
+  epochs_iters: 50
+  batch_size: 1
+  learning_rate: 0.00005
+  pretrain_weight_path: null
+  warmup_steps: #100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/pretrained/RT-DETR-H_layout_pretrained_17cls.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/layout.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 40 - 0
paddlex/configs/structure_analysis/RT-DETR-H_layout_3cls.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: RT-DETR-H_layout_3cls
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/layout/det_layout_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 11
+  epochs_iters: 50
+  batch_size: 1
+  learning_rate: 0.00005
+  pretrain_weight_path: null
+  warmup_steps: #100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Export:
+  weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/pretrained/RT-DETR-H_layout_pretrained_v1.pdparams
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/layout.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 40 - 0
paddlex/configs/vehicle_detection/PP-YOLOE-L_vehicle.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: PP-YOLOE-L_vehicle
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 50
+  batch_size: 2
+  learning_rate: #0.08
+  pretrain_weight_path: null
+  warmup_steps: #100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/vehicle_detection.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/mot_ppyoloe_l_36e_ppvehicle.pdparams

+ 41 - 0
paddlex/configs/vehicle_detection/PP-YOLOE-S_vehicle.yaml

@@ -0,0 +1,41 @@
+Global:
+  model: PP-YOLOE-S_vehicle
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 4
+  epochs_iters: 50
+  batch_size: 2
+  learning_rate: #0.08
+  pretrain_weight_path: null
+  warmup_steps: #100
+  resume_path: null
+  log_interval: 10
+  eval_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 10
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/vehicle_detection.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1
+
+
+Export:
+  weight_path: https://paddledet.bj.bcebos.com/models/mot_ppyoloe_s_36e_ppvehicle.pdparams

+ 162 - 0
paddlex/modules/base/predictor/transforms/image_common.py

@@ -591,3 +591,165 @@ class ToCHWImage(BaseTransform):
         """get output keys"""
         """get output keys"""
         # image: Image in chw format.
         # image: Image in chw format.
         return ["image"]
         return ["image"]
+
+
+def rotate_point(pt, angle_rad):
+    """Rotate a point by an angle.
+    Args:
+        pt (list[float]): 2 dimensional point to be rotated
+        angle_rad (float): rotation angle by radian
+    Returns:
+        list[float]: Rotated point.
+    """
+    assert len(pt) == 2
+    sn, cs = np.sin(angle_rad), np.cos(angle_rad)
+    new_x = pt[0] * cs - pt[1] * sn
+    new_y = pt[0] * sn + pt[1] * cs
+    rotated_pt = [new_x, new_y]
+
+    return rotated_pt
+
+
+def _get_3rd_point(a, b):
+    """To calculate the affine matrix, three pairs of points are required. This
+    function is used to get the 3rd point, given 2D points a & b.
+    The 3rd point is defined by rotating vector `a - b` by 90 degrees
+    anticlockwise, using b as the rotation center.
+    Args:
+        a (np.ndarray): point(x,y)
+        b (np.ndarray): point(x,y)
+    Returns:
+        np.ndarray: The 3rd point.
+    """
+    assert len(a) == 2
+    assert len(b) == 2
+    direction = a - b
+    third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
+
+    return third_pt
+
+
+def get_affine_transform(center,
+                         input_size,
+                         rot,
+                         output_size,
+                         shift=(0., 0.),
+                         inv=False):
+    """Get the affine transform matrix, given the center/scale/rot/output_size.
+    Args:
+        center (np.ndarray[2, ]): Center of the bounding box (x, y).
+        scale (np.ndarray[2, ]): Scale of the bounding box
+            wrt [width, height].
+        rot (float): Rotation angle (degree).
+        output_size (np.ndarray[2, ]): Size of the destination heatmaps.
+        shift (0-100%): Shift translation ratio wrt the width/height.
+            Default (0., 0.).
+        inv (bool): Option to inverse the affine transform direction.
+            (inv=False: src->dst or inv=True: dst->src)
+    Returns:
+        np.ndarray: The transform matrix.
+    """
+    assert len(center) == 2
+    assert len(output_size) == 2
+    assert len(shift) == 2
+    if not isinstance(input_size, (np.ndarray, list)):
+        input_size = np.array([input_size, input_size], dtype=np.float32)
+    scale_tmp = input_size
+
+    shift = np.array(shift)
+    src_w = scale_tmp[0]
+    dst_w = output_size[0]
+    dst_h = output_size[1]
+
+    rot_rad = np.pi * rot / 180
+    src_dir = rotate_point([0., src_w * -0.5], rot_rad)
+    dst_dir = np.array([0., dst_w * -0.5])
+
+    src = np.zeros((3, 2), dtype=np.float32)
+    src[0, :] = center + scale_tmp * shift
+    src[1, :] = center + src_dir + scale_tmp * shift
+    src[2, :] = _get_3rd_point(src[0, :], src[1, :])
+
+    dst = np.zeros((3, 2), dtype=np.float32)
+    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
+    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
+    dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
+
+    if inv:
+        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
+    else:
+        trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
+
+    return trans
+
+
+class WarpAffine(object):
+    """Warp affine the image
+    """
+
+    def __init__(self,
+                 keep_res=False,
+                 pad=31,
+                 input_h=512,
+                 input_w=512,
+                 scale=0.4,
+                 shift=0.1,
+                 down_ratio=4):
+        self.keep_res = keep_res
+        self.pad = pad
+        self.input_h = input_h
+        self.input_w = input_w
+        self.scale = scale
+        self.shift = shift
+        self.down_ratio = down_ratio
+
+    def __call__(self, data):
+
+        im = data['image']
+        img = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
+
+        h, w = img.shape[:2]
+
+        if self.keep_res:
+            # True in detection eval/infer
+            input_h = (h | self.pad) + 1
+            input_w = (w | self.pad) + 1
+            s = np.array([input_w, input_h], dtype=np.float32)
+            c = np.array([w // 2, h // 2], dtype=np.float32)
+
+        else:
+            # False in centertrack eval_mot/eval_mot
+            s = max(h, w) * 1.0
+            input_h, input_w = self.input_h, self.input_w
+            c = np.array([w / 2., h / 2.], dtype=np.float32)
+
+        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
+        img = cv2.resize(img, (w, h))
+        inp = cv2.warpAffine(
+            img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
+
+        if not self.keep_res:
+            out_h = input_h // self.down_ratio
+            out_w = input_w // self.down_ratio
+            trans_output = get_affine_transform(c, s, 0, [out_w, out_h])
+
+        data['image'] = inp
+
+        im_scale_w, im_scale_h = [
+            input_w / w, input_h / h
+        ]
+        data['image_size'] = [inp.shape[1], inp.shape[0]]
+        data['scale_factors'] = [im_scale_w, im_scale_h]
+        return data
+
+    @classmethod
+    def get_input_keys(cls):
+        """ get input keys """
+        # image: Image in hwc format.
+        return ['image']
+
+    @classmethod
+    def get_output_keys(cls):
+        """ get output keys """
+        # image: Image in chw format.
+        return ["image"]

+ 18 - 0
paddlex/modules/base/predictor/utils/official_models.py

@@ -214,6 +214,24 @@ openatom_rec_svtrv2_ch_infer.tar",
     "TimesNet_ad": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/TimesNet_ad_infer.tar",
     "TimesNet_ad": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/TimesNet_ad_infer.tar",
     "TimesNet_cls": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/TimesNet_cls_infer.tar",
     "TimesNet_cls": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/TimesNet_cls_infer.tar",
     "STFPM": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/STFPM_infer.tar",
     "STFPM": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/STFPM_infer.tar",
+    "FCOS-ResNet50": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/FCOS-ResNet50_infer.tar",
+    "DETR-R50": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/DETR-R50_infer.tar",
+    "PP-YOLOE-L_vehicle": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE-L_vehicle_infer.tar",
+    "PP-YOLOE-S_vehicle": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE-S_vehicle_infer.tar",
+    "PP-ShiTuV2_det": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-ShiTuV2_det_infer.tar",
+    "PP-YOLOE-S_human": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE-S_human_infer.tar",
+    "PP-YOLOE-L_human": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE-L_human_infer.tar",
+    "PicoDet-M": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PicoDet-M_infer.tar",
+    "PicoDet-XS": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PicoDet-XS_infer.tar",
+    "PP-YOLOE+_SOD-L": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE+_SOD-L_infer.tar",
+    "PP-YOLOE+_SOD-S": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE+_SOD-S_infer.tar",
+    "PP-YOLOE+_SOD-largesize-L": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE+_SOD-largesize-L_infer.tar",
+    "CenterNet-DLA-34": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/CenterNet-DLA-34_infer.tar",
+    "CenterNet-ResNet50": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/CenterNet-ResNet50_infer.tar",
+    "PicoDet-L_layout": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PicoDet-L_layout_infer.tar",
+    "RT-DETR-H_layout_3cls": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/RT-DETR-H_layout_3cls_infer.tar",
+    "RT-DETR-H_layout_17cls": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/RT-DETR-H_layout_17cls_infer.tar",
+    "PicoDet_LCNet_x2_5_face": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PicoDet_LCNet_x2_5_face_infer.tar",
 }
 }
 
 
 
 

+ 18 - 0
paddlex/modules/object_detection/model_list.py

@@ -26,6 +26,9 @@ MODELS = [
     "RT-DETR-R50",
     "RT-DETR-R50",
     "RT-DETR-X",
     "RT-DETR-X",
     "PicoDet_layout_1x",
     "PicoDet_layout_1x",
+    "PicoDet-L_layout",
+    "RT-DETR-H_layout_3cls",
+    "RT-DETR-H_layout_17cls",
     "YOLOv3-DarkNet53",
     "YOLOv3-DarkNet53",
     "YOLOv3-MobileNetV3",
     "YOLOv3-MobileNetV3",
     "YOLOv3-ResNet50_vd_DCN",
     "YOLOv3-ResNet50_vd_DCN",
@@ -46,4 +49,19 @@ MODELS = [
     "FasterRCNN-Swin-Tiny-FPN",
     "FasterRCNN-Swin-Tiny-FPN",
     "Cascade-FasterRCNN-ResNet50-FPN",
     "Cascade-FasterRCNN-ResNet50-FPN",
     "Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN",
     "Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN",
+    "PicoDet-M",
+    "PicoDet-XS",
+    "FCOS-ResNet50",
+    "DETR-R50",
+    "PP-ShiTuV2_det",
+    "PP-YOLOE-L_human",
+    "PP-YOLOE-S_human",
+    "PP-YOLOE-L_vehicle",
+    "PP-YOLOE-S_vehicle",
+    "PP-YOLOE+_SOD-L",
+    "PP-YOLOE+_SOD-S",
+    "PP-YOLOE+_SOD-largesize-L",
+    "CenterNet-DLA-34",
+    "CenterNet-ResNet50",
+    "PicoDet_LCNet_x2_5_face",
 ]
 ]

+ 5 - 0
paddlex/modules/object_detection/predictor/utils.py

@@ -74,6 +74,11 @@ class InnerConfig(object):
                 fill_value = cfg.get("fill_value", [114.0, 114.0, 114.0])
                 fill_value = cfg.get("fill_value", [114.0, 114.0, 114.0])
                 size = cfg.get("size", [640, 640])
                 size = cfg.get("size", [640, 640])
                 tf = Pad(size=size, fill_value=fill_value)
                 tf = Pad(size=size, fill_value=fill_value)
+            elif cfg['type'] == 'WarpAffine':
+                input_h = cfg.get('input_h', 512)
+                input_w = cfg.get('input_w', 512)
+                keep_res = cfg.get('keep_res', True)
+                tf = image_common.WarpAffine(input_h=input_h, input_w=input_w, keep_res=keep_res)
             else:
             else:
                 raise RuntimeError(f"Unsupported type: {cfg['type']}")
                 raise RuntimeError(f"Unsupported type: {cfg['type']}")
             tfs.append(tf)
             tfs.append(tf)

+ 119 - 0
paddlex/repo_apis/PaddleDetection_api/configs/CenterNet-DLA-34.yaml

@@ -0,0 +1,119 @@
+# Runtime
+use_ema: true
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+  allow_empty: true
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+
+# Reader
+worker_num: 4
+TrainReader:
+  inputs_def:
+    image_shape: [3, 512, 512]
+  sample_transforms:
+    - Decode: {}
+    - FlipWarpAffine: {keep_res: False, input_h: 512, input_w: 512, use_random: True}
+    - CenterRandColor: {}
+    - Lighting: {eigval: [0.2141788, 0.01817699, 0.00341571], eigvec: [[-0.58752847, -0.69563484, 0.41340352], [-0.5832747, 0.00994535, -0.81221408], [-0.56089297, 0.71832671, 0.41158938]]}
+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: False}
+    - Permute: {}
+    - Gt2CenterNetTarget: {down_ratio: 4, max_objs: 128}
+  batch_size: 16
+  shuffle: True
+  drop_last: True
+  use_shared_memory: True
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834]}
+    - Permute: {}
+  batch_size: 1
+
+
+TestReader:
+  inputs_def:
+    image_shape: [3, 512, 512]
+  sample_transforms:
+    - Decode: {}
+    - WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: True}
+    - Permute: {}
+  batch_size: 1
+
+# Model
+architecture: CenterNet
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/DLA34_pretrain.pdparams
+
+CenterNet:
+  backbone: DLA
+  neck: CenterNetDLAFPN
+  head: CenterNetHead
+  post_process: CenterNetPostProcess
+
+DLA:
+  depth: 34
+
+CenterNetDLAFPN:
+  down_ratio: 4
+
+CenterNetHead:
+  head_planes: 256
+  regress_ltrb: False
+
+CenterNetPostProcess:
+  max_per_img: 100
+  regress_ltrb: False
+
+# Optimizer
+epoch: 140
+
+LearningRate:
+  base_lr: 0.0005
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [90, 120]
+    use_warmup: False
+
+OptimizerBuilder:
+  optimizer:
+    type: Adam
+  regularizer: NULL
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 130 - 0
paddlex/repo_apis/PaddleDetection_api/configs/CenterNet-ResNet50.yaml

@@ -0,0 +1,130 @@
+# Runtime
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+use_ema: true
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+  allow_empty: true
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+# Reader
+worker_num: 4
+TrainReader:
+  inputs_def:
+    image_shape: [3, 512, 512]
+  sample_transforms:
+    - Decode: {}
+    - FlipWarpAffine: {keep_res: False, input_h: 512, input_w: 512, use_random: True}
+    - CenterRandColor: {}
+    - Lighting: {eigval: [0.2141788, 0.01817699, 0.00341571], eigvec: [[-0.58752847, -0.69563484, 0.41340352], [-0.5832747, 0.00994535, -0.81221408], [-0.56089297, 0.71832671, 0.41158938]]}
+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: False}
+    - Permute: {}
+    - Gt2CenterNetTarget: {down_ratio: 4, max_objs: 128}
+  batch_size: 16
+  shuffle: True
+  drop_last: True
+  use_shared_memory: True
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834]}
+    - Permute: {}
+  batch_size: 1
+
+
+TestReader:
+  inputs_def:
+    image_shape: [3, 512, 512]
+  sample_transforms:
+    - Decode: {}
+    - WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: True}
+    - Permute: {}
+  batch_size: 1
+
+# Model
+architecture: CenterNet
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+CenterNet:
+  backbone: ResNet
+  neck: CenterNetDLAFPN
+  head: CenterNetHead
+  post_process: CenterNetPostProcess
+
+ResNet:
+  depth: 50
+  variant: d
+  return_idx: [0, 1, 2, 3]
+  freeze_at: -1
+  norm_decay: 0.
+  dcn_v2_stages: [3]
+
+
+CenterNetDLAFPN:
+  first_level: 0
+  last_level: 4
+  down_ratio: 4
+  dcn_v2: False
+
+CenterNetHead:
+  head_planes: 256
+  regress_ltrb: False
+
+CenterNetPostProcess:
+  max_per_img: 100
+  regress_ltrb: False
+
+# Optimizer
+epoch: 140
+
+LearningRate:
+  base_lr: 0.0005
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [90, 120]
+    use_warmup: False
+
+OptimizerBuilder:
+  optimizer:
+    type: Adam
+  regularizer: NULL
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 153 - 0
paddlex/repo_apis/PaddleDetection_api/configs/DETR-R50.yaml

@@ -0,0 +1,153 @@
+# Runtime
+use_ema: true
+find_unused_parameters: True
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+  allow_empty: true
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+
+# Reader
+worker_num: 0
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomFlip: {prob: 0.5}
+  - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
+                    transforms2: [
+                        RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
+                        RandomSizeCrop: { min_size: 384, max_size: 600 },
+                        RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
+  }
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - NormalizeBox: {}
+  - BboxXYXY2XYWH: {}
+  - Permute: {}
+  batch_transforms:
+  - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+  batch_size: 2
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+# Model
+architecture: DETR
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams
+hidden_dim: 256
+
+
+DETR:
+  backbone: ResNet
+  transformer: DETRTransformer
+  detr_head: DETRHead
+  post_process: DETRPostProcess
+
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [3]
+  lr_mult_list: [0.0, 0.1, 0.1, 0.1]
+  num_stages: 4
+
+
+DETRTransformer:
+  num_queries: 100
+  position_embed_type: sine
+  nhead: 8
+  num_encoder_layers: 6
+  num_decoder_layers: 6
+  dim_feedforward: 2048
+  dropout: 0.1
+  activation: relu
+
+
+DETRHead:
+  num_mlp_layers: 3
+
+
+DETRLoss:
+  loss_coeff: {class: 1, bbox: 5, giou: 2, no_object: 0.1}
+  aux_loss: True
+
+
+HungarianMatcher:
+  matcher_coeff: {class: 1, bbox: 5, giou: 2}
+
+# Optimizer
+epoch: 500
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [400]
+    use_warmup: false
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 157 - 0
paddlex/repo_apis/PaddleDetection_api/configs/FCOS-ResNet50.yaml

@@ -0,0 +1,157 @@
+# Runtime
+use_ema: true
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+  allow_empty: true
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+
+# Reader
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1}
+  - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+  - RandomFlip: {}
+  batch_transforms:
+  - Permute: {}
+  - PadBatch: {pad_to_stride: 128}
+  - Gt2FCOSTarget:
+      object_sizes_boundary: [64, 128, 256, 512]
+      center_sampling_radius: 1.5
+      downsample_ratios: [8, 16, 32, 64, 128]
+      norm_reg_targets: True
+  batch_size: 2
+  shuffle: True
+  drop_last: True
+  use_shared_memory: True
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1}
+  - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 128}
+  batch_size: 1
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1}
+  - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 128}
+  batch_size: 1
+  fuse_normalize: True
+
+# Model
+architecture: FCOS
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+FCOS:
+  backbone: ResNet
+  neck: FPN
+  fcos_head: FCOSHead
+
+ResNet:
+  depth: 50
+  variant: 'b'
+  norm_type: bn
+  freeze_at: 0 # res2
+  return_idx: [1, 2, 3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+  spatial_scales: [0.125, 0.0625, 0.03125]
+  extra_stage: 2
+  has_extra_convs: True
+  use_c5: False
+
+FCOSHead:
+  fcos_feat:
+    name: FCOSFeat
+    feat_in: 256
+    feat_out: 256
+    num_convs: 4
+    norm_type: "gn"
+    use_dcn: False
+  fpn_stride: [8, 16, 32, 64, 128]
+  prior_prob: 0.01
+  norm_reg_targets: True
+  centerness_on_reg: True
+  num_shift: 0.5
+  fcos_loss:
+    name: FCOSLoss
+    loss_alpha: 0.25
+    loss_gamma: 2.0
+    iou_loss_type: "giou"
+    reg_weights: 1.0
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 100
+    score_threshold: 0.025
+    nms_threshold: 0.6
+
+# Optimizer
+epoch: 12
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 169 - 0
paddlex/repo_apis/PaddleDetection_api/configs/PP-ShiTuV2_det.yaml

@@ -0,0 +1,169 @@
+# Runtime
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+print_flops: false
+print_params: false
+find_unused_parameters: True
+use_ema: true
+cycle_epoch: 20
+snapshot_epoch: 2
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+  allow_empty: true
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+
+# Reader
+worker_num: 6
+eval_height: &eval_height 640
+eval_width: &eval_width 640
+eval_size: &eval_size [*eval_height, *eval_width]
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomCrop: {}
+  - RandomFlip: {prob: 0.5}
+  - RandomDistort: {}
+  batch_transforms:
+  - BatchRandomResize: {target_size: [576, 608, 640, 672, 704], random_size: True, random_interp: True, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 56
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 8
+  shuffle: false
+
+
+TestReader:
+  inputs_def:
+    image_shape: [1, 3, *eval_height, *eval_width]
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 1
+
+# Model
+architecture: PicoDet
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/LCNet_x2_5_ssld_pretrained.pdparams
+
+PicoDet:
+  backbone: LCNet
+  neck: CSPPAN
+  head: PicoHead
+
+LCNet:
+  scale: 2.5
+  feature_maps: [3, 4, 5]
+
+ESNet:
+  scale: 1.0
+  feature_maps: [4, 11, 14]
+  act: hard_swish
+  channel_ratio: [0.875, 0.5, 1.0, 0.625, 0.5, 0.75, 0.625, 0.625, 0.5, 0.625, 1.0, 0.625, 0.75]
+
+CSPPAN:
+  out_channels: 128
+  use_depthwise: True
+  num_csp_blocks: 1
+  num_features: 4
+
+PicoHead:
+  conv_feat:
+    name: PicoFeat
+    feat_in: 128
+    feat_out: 128
+    num_convs: 4
+    num_fpn_stride: 4
+    norm_type: bn
+    share_cls_reg: True
+  fpn_stride: [8, 16, 32, 64]
+  feat_in_chan: 128
+  prior_prob: 0.01
+  reg_max: 7
+  cell_offset: 0.5
+  loss_class:
+    name: VarifocalLoss
+    use_sigmoid: True
+    iou_weighted: True
+    loss_weight: 1.0
+  loss_dfl:
+    name: DistributionFocalLoss
+    loss_weight: 0.25
+  loss_bbox:
+    name: GIoULoss
+    loss_weight: 2.0
+  assigner:
+    name: SimOTAAssigner
+    candidate_topk: 10
+    iou_weight: 6
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 100
+    score_threshold: 0.025
+    nms_threshold: 0.6
+
+# Optimizer
+epoch: 100
+
+LearningRate:
+  base_lr: 0.4
+  schedulers:
+  - name: CosineDecay
+    max_epochs: 100
+  - name: LinearWarmup
+    start_factor: 0.1
+    steps: 300
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.00004
+    type: L2
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 164 - 0
paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE+_SOD-L.yaml

@@ -0,0 +1,164 @@
+# Runtime
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+save_dir: output
+print_flops: false
+print_params: false
+log_iter: 100
+snapshot_epoch: 10
+use_ema: true
+
+# Dataset
+metric: COCO
+num_classes: 10
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: VisDrone2019-DET-train
+  anno_path: train.json
+  dataset_dir: dataset/visdrone
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: VisDrone2019-DET-val
+  anno_path: val.json
+  dataset_dir: dataset/visdrone
+
+TestDataset:
+  name: ImageFolder
+  anno_path: val.json
+  dataset_dir: dataset/visdrone
+
+#reader
+worker_num: 4
+eval_height: &eval_height 640
+eval_width: &eval_width 640
+eval_size: &eval_size [*eval_height, *eval_width]
+
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - RandomDistort: {}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+    - PadGT: {}
+  batch_size: 8
+  shuffle: true
+  drop_last: true
+  use_shared_memory: true
+  collate_batch: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1
+
+TestReader:
+  inputs_def:
+    image_shape: [3, *eval_height, *eval_width]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1
+  fuse_normalize: True
+
+# Model
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco.pdparams
+depth_mult: 1.0
+width_mult: 1.0
+
+architecture: YOLOv3
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+ema_black_list: ['proj_conv.weight']
+custom_black_list: ['reduce_mean']
+
+YOLOv3:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+
+CSPResNet:
+  layers: [3, 6, 6, 3]
+  channels: [64, 128, 256, 512, 1024]
+  return_idx: [1, 2, 3]
+  use_large_stem: True
+  use_alpha: True
+
+CustomCSPPAN:
+  out_channels: [768, 384, 192]
+  stage_num: 1
+  block_num: 3
+  act: 'swish'
+  spp: true
+  num_layers: 4
+  use_trans: True
+
+
+PPYOLOEHead:
+  reg_range: [-2,8]
+  fpn_strides: [32, 16, 8]
+  grid_cell_scale: 5.0
+  grid_cell_offset: 0.5
+  static_assigner_epoch: -1
+  use_varifocal_loss: True
+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
+  static_assigner:
+    name: ATSSAssigner
+    topk: 9
+  assigner:
+    name: TaskAlignedAssigner_CR
+    center_radius: 1
+    topk: 13
+    alpha: 1.0
+    beta: 6.0
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 500
+    score_threshold: 0.01
+    nms_threshold: 0.6
+
+
+# Optimizer
+
+epoch: 80
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+    - !CosineDecay
+      max_epochs: 96
+    - !LinearWarmup
+      start_factor: 0.
+      epochs: 1
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 164 - 0
paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE+_SOD-S.yaml

@@ -0,0 +1,164 @@
+# Runtime
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+save_dir: output
+print_flops: false
+print_params: false
+log_iter: 100
+snapshot_epoch: 10
+use_ema: true
+
+# Dataset
+metric: COCO
+num_classes: 10
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: VisDrone2019-DET-train
+  anno_path: train.json
+  dataset_dir: dataset/visdrone
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: VisDrone2019-DET-val
+  anno_path: val.json
+  dataset_dir: dataset/visdrone
+
+TestDataset:
+  name: ImageFolder
+  anno_path: val.json
+  dataset_dir: dataset/visdrone
+
+#reader
+worker_num: 4
+eval_height: &eval_height 640
+eval_width: &eval_width 640
+eval_size: &eval_size [*eval_height, *eval_width]
+
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - RandomDistort: {}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+    - PadGT: {}
+  batch_size: 8
+  shuffle: true
+  drop_last: true
+  use_shared_memory: true
+  collate_batch: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1
+
+TestReader:
+  inputs_def:
+    image_shape: [3, *eval_height, *eval_width]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1
+  fuse_normalize: True
+
+# Model
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_s_80e_coco.pdparams
+depth_mult: 0.33
+width_mult: 0.50
+
+architecture: YOLOv3
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+ema_black_list: ['proj_conv.weight']
+custom_black_list: ['reduce_mean']
+
+YOLOv3:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+
+CSPResNet:
+  layers: [3, 6, 6, 3]
+  channels: [64, 128, 256, 512, 1024]
+  return_idx: [1, 2, 3]
+  use_large_stem: True
+  use_alpha: True
+
+CustomCSPPAN:
+  out_channels: [768, 384, 192]
+  stage_num: 1
+  block_num: 3
+  act: 'swish'
+  spp: true
+  num_layers: 4
+  use_trans: True
+
+
+PPYOLOEHead:
+  reg_range: [-2,8]
+  static_assigner_epoch: -1
+  fpn_strides: [32, 16, 8]
+  grid_cell_scale: 5.0
+  grid_cell_offset: 0.5
+  use_varifocal_loss: True
+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
+  static_assigner:
+    name: ATSSAssigner
+    topk: 9
+  assigner:
+    name: TaskAlignedAssigner_CR
+    center_radius: 1
+    topk: 13
+    alpha: 1.0
+    beta: 6.0
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 500
+    score_threshold: 0.01
+    nms_threshold: 0.6
+
+
+# Optimizer
+
+epoch: 80
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+    - !CosineDecay
+      max_epochs: 96
+    - !LinearWarmup
+      start_factor: 0.
+      epochs: 1
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 166 - 0
paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE+_SOD-largesize-L.yaml

@@ -0,0 +1,166 @@
+# Runtime
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+save_dir: output
+print_flops: false
+print_params: false
+log_iter: 5
+snapshot_epoch: 10
+use_ema: true
+
+# Dataset
+metric: COCO
+num_classes: 10
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: VisDrone2019-DET-train
+  anno_path: train.json
+  dataset_dir: dataset/visdrone
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: VisDrone2019-DET-val
+  anno_path: val.json
+  # image_dir: test_dev
+  # anno_path: test_dev.json
+  dataset_dir: dataset/visdrone
+
+TestDataset:
+  name: ImageFolder
+  anno_path: val.json
+  dataset_dir: dataset/visdrone
+
+#reader
+worker_num: 2
+eval_height: &eval_height 1920
+eval_width: &eval_width 1920
+eval_size: &eval_size [*eval_height, *eval_width]
+
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - RandomDistort: {}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [1024, 1088, 1152, 1216, 1280, 1344, 1408, 1472, 1536, 1600, 1664, 1728, 1792, 1856, 1920], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+    - PadGT: {}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  use_shared_memory: true
+  collate_batch: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1
+
+TestReader:
+  inputs_def:
+    image_shape: [3, *eval_height, *eval_width]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1
+  fuse_normalize: True
+
+# Model
+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco.pdparams
+depth_mult: 1.0
+width_mult: 1.0
+
+architecture: YOLOv3
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+ema_black_list: ['proj_conv.weight']
+custom_black_list: ['reduce_mean']
+
+YOLOv3:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+
+CSPResNet:
+  layers: [3, 6, 6, 3]
+  channels: [64, 128, 256, 512, 1024]
+  return_idx: [1, 2, 3]
+  use_large_stem: True
+  use_alpha: True
+
+CustomCSPPAN:
+  out_channels: [768, 384, 192]
+  stage_num: 1
+  block_num: 3
+  act: 'swish'
+  spp: true
+  num_layers: 4
+  use_trans: True
+
+
+PPYOLOEHead:
+  reg_range: [-2,20]
+  static_assigner_epoch: -1
+  fpn_strides: [32, 16, 8]
+  grid_cell_scale: 5.0
+  grid_cell_offset: 0.5
+  use_varifocal_loss: True
+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
+  static_assigner:
+    name: ATSSAssigner
+    topk: 9
+  assigner:
+    name: TaskAlignedAssigner_CR
+    center_radius: 1
+    topk: 13
+    alpha: 1.0
+    beta: 6.0
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 500
+    score_threshold: 0.01
+    nms_threshold: 0.6
+
+
+# Optimizer
+
+epoch: 80
+LearningRate:
+  base_lr: 0.00125
+  schedulers:
+    - !CosineDecay
+      max_epochs: 96
+    - !LinearWarmup
+      start_factor: 0.
+      epochs: 1
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 159 - 0
paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-L_human.yaml

@@ -0,0 +1,159 @@
+# Runtime
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+save_dir: output
+print_flops: false
+print_params: false
+use_ema: true
+log_iter: 100
+snapshot_epoch: 4
+
+# Dataset
+metric: COCO
+
+num_classes: 1
+TrainDataset:
+  name: COCODataSet
+  image_dir: ""
+  anno_path: annotations/train.json
+  dataset_dir: dataset/crowdhuman
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: ""
+  anno_path: annotations/val.json
+  dataset_dir: dataset/crowdhuman
+  allow_empty: true
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/val.json
+  dataset_dir: dataset/crowdhuman
+
+
+#reader
+worker_num: 4
+eval_height: &eval_height 640
+eval_width: &eval_width 640
+eval_size: &eval_size [*eval_height, *eval_width]
+
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - RandomDistort: {}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+    - PadGT: {}
+  batch_size: 8
+  shuffle: true
+  drop_last: true
+  use_shared_memory: true
+  collate_batch: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  batch_size: 2
+
+TestReader:
+  inputs_def:
+    image_shape: [3, *eval_height, *eval_width]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  batch_size: 1
+
+# Model
+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_300e_coco.pdparams
+depth_mult: 1.0
+width_mult: 1.0
+
+architecture: YOLOv3
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+ema_black_list: ['proj_conv.weight']
+custom_black_list: ['reduce_mean']
+
+YOLOv3:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+
+CSPResNet:
+  layers: [3, 6, 6, 3]
+  channels: [64, 128, 256, 512, 1024]
+  return_idx: [1, 2, 3]
+  use_large_stem: True
+
+CustomCSPPAN:
+  out_channels: [768, 384, 192]
+  stage_num: 1
+  block_num: 3
+  act: 'swish'
+  spp: true
+
+PPYOLOEHead:
+  fpn_strides: [32, 16, 8]
+  grid_cell_scale: 5.0
+  grid_cell_offset: 0.5
+  static_assigner_epoch: -1
+  use_varifocal_loss: True
+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
+  static_assigner:
+    name: ATSSAssigner
+    topk: 9
+  assigner:
+    name: TaskAlignedAssigner
+    topk: 13
+    alpha: 1.0
+    beta: 6.0
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 100
+    score_threshold: 0.01
+    nms_threshold: 0.6
+
+# Optimizer
+
+epoch: 36
+
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+    - name: CosineDecay
+      max_epochs: 43
+    - name: LinearWarmup
+      start_factor: 0.
+      epochs: 1
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 156 - 0
paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-L_vehicle.yaml

@@ -0,0 +1,156 @@
+# Runtime
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+save_dir: output
+print_flops: false
+print_params: false
+use_ema: true
+log_iter: 1
+snapshot_epoch: 1
+
+# Dataset
+metric: COCO
+num_classes: 1
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: ""
+  anno_path: annotations/train_all.json
+  dataset_dir: dataset/ppvehicle
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+  allow_empty: true
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: ""
+  anno_path: annotations/val_all.json
+  dataset_dir: dataset/ppvehicle
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/val_all.json
+  dataset_dir: dataset/ppvehicle
+
+# Reader
+worker_num: 4
+eval_height: &eval_height 640
+eval_width: &eval_width 640
+eval_size: &eval_size [*eval_height, *eval_width]
+
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - RandomDistort: {}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+    - PadGT: {}
+  batch_size: 8
+  shuffle: true
+  drop_last: true
+  use_shared_memory: true
+  collate_batch: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  batch_size: 2
+
+TestReader:
+  inputs_def:
+    image_shape: [3, *eval_height, *eval_width]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  batch_size: 1
+
+# Model
+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_300e_coco.pdparams
+depth_mult: 1.0
+width_mult: 1.0
+
+architecture: YOLOv3
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+ema_black_list: ['proj_conv.weight']
+custom_black_list: ['reduce_mean']
+
+YOLOv3:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+
+CSPResNet:
+  layers: [3, 6, 6, 3]
+  channels: [64, 128, 256, 512, 1024]
+  return_idx: [1, 2, 3]
+  use_large_stem: True
+
+CustomCSPPAN:
+  out_channels: [768, 384, 192]
+  stage_num: 1
+  block_num: 3
+  act: 'swish'
+  spp: true
+
+PPYOLOEHead:
+  fpn_strides: [32, 16, 8]
+  grid_cell_scale: 5.0
+  grid_cell_offset: 0.5
+  static_assigner_epoch: -1
+  use_varifocal_loss: True
+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
+  static_assigner:
+    name: ATSSAssigner
+    topk: 9
+  assigner:
+    name: TaskAlignedAssigner
+    topk: 13
+    alpha: 1.0
+    beta: 6.0
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 100
+    score_threshold: 0.01
+    nms_threshold: 0.6
+
+
+# Optimizer
+epoch: 36
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+    - !CosineDecay
+      max_epochs: 43
+    - !LinearWarmup
+      start_factor: 0.
+      epochs: 1
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 159 - 0
paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-S_human.yaml

@@ -0,0 +1,159 @@
+# Runtime
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+save_dir: output
+print_flops: false
+print_params: false
+use_ema: true
+log_iter: 100
+snapshot_epoch: 4
+
+# Dataset
+metric: COCO
+
+num_classes: 1
+TrainDataset:
+  name: COCODataSet
+  image_dir: ""
+  anno_path: annotations/train.json
+  dataset_dir: dataset/crowdhuman
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: ""
+  anno_path: annotations/val.json
+  dataset_dir: dataset/crowdhuman
+  allow_empty: true
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/val.json
+  dataset_dir: dataset/crowdhuman
+
+
+#reader
+worker_num: 4
+eval_height: &eval_height 640
+eval_width: &eval_width 640
+eval_size: &eval_size [*eval_height, *eval_width]
+
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - RandomDistort: {}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+    - PadGT: {}
+  batch_size: 8
+  shuffle: true
+  drop_last: true
+  use_shared_memory: true
+  collate_batch: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  batch_size: 2
+
+TestReader:
+  inputs_def:
+    image_shape: [3, *eval_height, *eval_width]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  batch_size: 1
+
+# Model
+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_300e_coco.pdparams
+depth_mult: 0.33
+width_mult: 0.50
+
+architecture: YOLOv3
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+ema_black_list: ['proj_conv.weight']
+custom_black_list: ['reduce_mean']
+
+YOLOv3:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+
+CSPResNet:
+  layers: [3, 6, 6, 3]
+  channels: [64, 128, 256, 512, 1024]
+  return_idx: [1, 2, 3]
+  use_large_stem: True
+
+CustomCSPPAN:
+  out_channels: [768, 384, 192]
+  stage_num: 1
+  block_num: 3
+  act: 'swish'
+  spp: true
+
+PPYOLOEHead:
+  fpn_strides: [32, 16, 8]
+  grid_cell_scale: 5.0
+  grid_cell_offset: 0.5
+  static_assigner_epoch: -1
+  use_varifocal_loss: True
+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
+  static_assigner:
+    name: ATSSAssigner
+    topk: 9
+  assigner:
+    name: TaskAlignedAssigner
+    topk: 13
+    alpha: 1.0
+    beta: 6.0
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 100
+    score_threshold: 0.01
+    nms_threshold: 0.6
+
+# Optimizer
+
+epoch: 36
+
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+    - name: CosineDecay
+      max_epochs: 43
+    - name: LinearWarmup
+      start_factor: 0.
+      epochs: 1
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 156 - 0
paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-S_vehicle.yaml

@@ -0,0 +1,156 @@
+# Runtime
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+save_dir: output
+print_flops: false
+print_params: false
+use_ema: true
+log_iter: 100
+snapshot_epoch: 4
+
+# Dataset
+metric: COCO
+num_classes: 1
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: ""
+  anno_path: annotations/train_all.json
+  dataset_dir: dataset/ppvehicle
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+  allow_empty: true
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: ""
+  anno_path: annotations/val_all.json
+  dataset_dir: dataset/ppvehicle
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/val_all.json
+  dataset_dir: dataset/ppvehicle
+
+# Reader
+worker_num: 4
+eval_height: &eval_height 640
+eval_width: &eval_width 640
+eval_size: &eval_size [*eval_height, *eval_width]
+
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - RandomDistort: {}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+    - PadGT: {}
+  batch_size: 8
+  shuffle: true
+  drop_last: true
+  use_shared_memory: true
+  collate_batch: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  batch_size: 2
+
+TestReader:
+  inputs_def:
+    image_shape: [3, *eval_height, *eval_width]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  batch_size: 1
+
+# Model
+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_300e_coco.pdparams
+depth_mult: 0.33
+width_mult: 0.50
+
+architecture: YOLOv3
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+ema_black_list: ['proj_conv.weight']
+custom_black_list: ['reduce_mean']
+
+YOLOv3:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+
+CSPResNet:
+  layers: [3, 6, 6, 3]
+  channels: [64, 128, 256, 512, 1024]
+  return_idx: [1, 2, 3]
+  use_large_stem: True
+
+CustomCSPPAN:
+  out_channels: [768, 384, 192]
+  stage_num: 1
+  block_num: 3
+  act: 'swish'
+  spp: true
+
+PPYOLOEHead:
+  fpn_strides: [32, 16, 8]
+  grid_cell_scale: 5.0
+  grid_cell_offset: 0.5
+  static_assigner_epoch: -1
+  use_varifocal_loss: True
+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
+  static_assigner:
+    name: ATSSAssigner
+    topk: 9
+  assigner:
+    name: TaskAlignedAssigner
+    topk: 13
+    alpha: 1.0
+    beta: 6.0
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 100
+    score_threshold: 0.01
+    nms_threshold: 0.6
+
+
+# Optimizer
+epoch: 36
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+    - !CosineDecay
+      max_epochs: 43
+    - !LinearWarmup
+      start_factor: 0.
+      epochs: 1
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 165 - 0
paddlex/repo_apis/PaddleDetection_api/configs/PicoDet-L_layout.yaml

@@ -0,0 +1,165 @@
+# Runtime
+epoch: 100
+log_iter: 10
+find_unused_parameters: true
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+use_ema: true
+save_dir: output
+snapshot_epoch: 10
+print_flops: false
+print_params: false
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+worker_num: 6
+eval_height: &eval_height 640
+eval_width: &eval_width 640
+eval_size: &eval_size [*eval_height, *eval_width]
+
+TrainDataset:
+  name: COCODetDataset
+  image_dir: images
+  anno_path: annotations/instance_train.json
+  dataset_dir: datasets/COCO
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODetDataset
+  image_dir: images
+  anno_path: annotations/instance_val.json
+  dataset_dir: datasets/COCO
+  allow_empty: true
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instance_val.json
+  dataset_dir: datasets/COCO
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomCrop: {}
+  - RandomFlip: {prob: 0.5}
+  - RandomDistort: {}
+  batch_transforms:
+  - BatchRandomResize: {target_size: [576, 608, 640, 672, 704], random_size: True, random_interp: True, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  - PadGT: {}
+  batch_size: 16
+  shuffle: true
+  drop_last: true
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 8
+  shuffle: false
+
+TestReader:
+  inputs_def:
+    image_shape: [1, 3, *eval_height, *eval_width]
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 1
+
+# Model
+architecture: PicoDet
+pretrain_weights: https://paddle-model-ecology.bj.bcebos.com/paddlex/pretrained/PicoDet-L_layout_pretrained_v1.pdparams
+
+PicoDet:
+  backbone: LCNet
+  neck: LCPAN
+  head: PicoHeadV2
+
+LCNet:
+  scale: 2.0
+  feature_maps: [3, 4, 5]
+
+LCPAN:
+  out_channels: 160
+  use_depthwise: true
+  num_features: 4
+
+PicoHeadV2:
+  conv_feat:
+    name: PicoFeat
+    feat_in: 160
+    feat_out: 160
+    num_convs: 4
+    num_fpn_stride: 4
+    norm_type: bn
+    share_cls_reg: true
+    use_se: true
+  fpn_stride: [8, 16, 32, 64]
+  feat_in_chan: 160
+  prior_prob: 0.01
+  reg_max: 7
+  cell_offset: 0.5
+  grid_cell_scale: 5.0
+  static_assigner_epoch: 100
+  use_align_head: true
+  static_assigner:
+    name: ATSSAssigner
+    topk: 9
+    force_gt_matching: false
+  assigner:
+    name: TaskAlignedAssigner
+    topk: 13
+    alpha: 1.0
+    beta: 6.0
+  loss_class:
+    name: VarifocalLoss
+    use_sigmoid: false
+    iou_weighted: true
+    loss_weight: 1.0
+  loss_dfl:
+    name: DistributionFocalLoss
+    loss_weight: 0.5
+  loss_bbox:
+    name: GIoULoss
+    loss_weight: 2.5
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 100
+    score_threshold: 0.025
+    nms_threshold: 0.6
+
+# Optimizer
+LearningRate:
+  base_lr: 0.06
+  schedulers:
+  - name: CosineDecay
+    max_epochs: 150
+  - name: LinearWarmup
+    start_factor: 0.1
+    steps: 300
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.00004
+    type: L2
+
+# Export
+export:
+  post_process: true
+  nms: true
+  benchmark: false
+  fuse_conv_bn: false

+ 170 - 0
paddlex/repo_apis/PaddleDetection_api/configs/PicoDet-M.yaml

@@ -0,0 +1,170 @@
+# Runtime
+find_unused_parameters: True
+use_ema: true
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+snapshot_epoch: 10
+print_flops: false
+print_params: false
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+  allow_empty: true
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+
+# Reader
+worker_num: 6
+eval_height: &eval_height 416
+eval_width: &eval_width 416
+eval_size: &eval_size [*eval_height, *eval_width]
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomCrop: {}
+  - RandomFlip: {prob: 0.5}
+  - RandomDistort: {}
+  batch_transforms:
+  - BatchRandomResize: {target_size: [352, 384, 416, 448, 480], random_size: True, random_interp: True, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  - PadGT: {}
+  batch_size: 48
+  shuffle: true
+  drop_last: true
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 8
+  shuffle: false
+
+
+TestReader:
+  inputs_def:
+    image_shape: [1, 3, *eval_height, *eval_width]
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 1
+
+# Model
+architecture: PicoDet
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/LCNet_x1_5_pretrained.pdparams
+
+PicoDet:
+  backbone: LCNet
+  neck: LCPAN
+  head: PicoHeadV2
+
+LCNet:
+  scale: 1.5
+  feature_maps: [3, 4, 5]
+
+LCPAN:
+  out_channels: 128
+  use_depthwise: True
+  num_features: 4
+
+PicoHeadV2:
+  conv_feat:
+    name: PicoFeat
+    feat_in: 128
+    feat_out: 128
+    num_convs: 4
+    num_fpn_stride: 4
+    norm_type: bn
+    share_cls_reg: True
+    use_se: True
+  fpn_stride: [8, 16, 32, 64]
+  feat_in_chan: 128
+  prior_prob: 0.01
+  reg_max: 7
+  cell_offset: 0.5
+  grid_cell_scale: 5.0
+  static_assigner_epoch: 100
+  use_align_head: True
+  static_assigner:
+    name: ATSSAssigner
+    topk: 9
+    force_gt_matching: False
+  assigner:
+    name: TaskAlignedAssigner
+    topk: 13
+    alpha: 1.0
+    beta: 6.0
+  loss_class:
+    name: VarifocalLoss
+    use_sigmoid: False
+    iou_weighted: True
+    loss_weight: 1.0
+  loss_dfl:
+    name: DistributionFocalLoss
+    loss_weight: 0.5
+  loss_bbox:
+    name: GIoULoss
+    loss_weight: 2.5
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 100
+    score_threshold: 0.025
+    nms_threshold: 0.6
+
+# Optimizer
+epoch: 250
+
+LearningRate:
+  base_lr: 0.24
+  schedulers:
+  - name: CosineDecay
+    max_epochs: 300
+  - name: LinearWarmup
+    start_factor: 0.1
+    steps: 300
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.00004
+    type: L2
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 170 - 0
paddlex/repo_apis/PaddleDetection_api/configs/PicoDet-XS.yaml

@@ -0,0 +1,170 @@
+# Runtime
+find_unused_parameters: True
+use_ema: true
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+snapshot_epoch: 10
+print_flops: false
+print_params: false
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+  allow_empty: true
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
+
+
+# Reader
+worker_num: 6
+eval_height: &eval_height 416
+eval_width: &eval_width 416
+eval_size: &eval_size [*eval_height, *eval_width]
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomCrop: {}
+  - RandomFlip: {prob: 0.5}
+  - RandomDistort: {}
+  batch_transforms:
+  - BatchRandomResize: {target_size: [352, 384, 416, 448, 480], random_size: True, random_interp: True, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  - PadGT: {}
+  batch_size: 56
+  shuffle: true
+  drop_last: true
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 8
+  shuffle: false
+
+
+TestReader:
+  inputs_def:
+    image_shape: [1, 3, *eval_height, *eval_width]
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 1
+
+# Model
+architecture: PicoDet
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/LCNet_x0_35_pretrained.pdparams
+
+PicoDet:
+  backbone: LCNet
+  neck: LCPAN
+  head: PicoHeadV2
+
+LCNet:
+  scale: 0.35
+  feature_maps: [3, 4, 5]
+
+LCPAN:
+  out_channels: 96
+  use_depthwise: True
+  num_features: 4
+
+PicoHeadV2:
+  conv_feat:
+    name: PicoFeat
+    feat_in: 96
+    feat_out: 96
+    num_convs: 2
+    num_fpn_stride: 4
+    norm_type: bn
+    share_cls_reg: True
+    use_se: True
+  fpn_stride: [8, 16, 32, 64]
+  feat_in_chan: 96
+  prior_prob: 0.01
+  reg_max: 7
+  cell_offset: 0.5
+  grid_cell_scale: 5.0
+  static_assigner_epoch: 100
+  use_align_head: True
+  static_assigner:
+    name: ATSSAssigner
+    topk: 9
+    force_gt_matching: False
+  assigner:
+    name: TaskAlignedAssigner
+    topk: 13
+    alpha: 1.0
+    beta: 6.0
+  loss_class:
+    name: VarifocalLoss
+    use_sigmoid: False
+    iou_weighted: True
+    loss_weight: 1.0
+  loss_dfl:
+    name: DistributionFocalLoss
+    loss_weight: 0.5
+  loss_bbox:
+    name: GIoULoss
+    loss_weight: 2.5
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 100
+    score_threshold: 0.025
+    nms_threshold: 0.6
+
+# Optimizer
+epoch: 300
+
+LearningRate:
+  base_lr: 0.28
+  schedulers:
+  - name: CosineDecay
+    max_epochs: 300
+  - name: LinearWarmup
+    start_factor: 0.1
+    steps: 300
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.00004
+    type: L2
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 167 - 0
paddlex/repo_apis/PaddleDetection_api/configs/PicoDet_LCNet_x2_5_face.yaml

@@ -0,0 +1,167 @@
+architecture: PicoDet
+pretrain_weights: https://paddledet.bj.bcebos.com/models/PicoDet_LCNet_x2_5_face_pretrain.pdparams
+weights: output/picodet_lcnet_x2_5_640_mainbody/model_final
+epoch: 300
+find_unused_parameters: True
+use_ema: true
+cycle_epoch: 20
+snapshot_epoch: 2
+
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+log_iter: 20
+save_dir: output
+snapshot_epoch: 1
+print_flops: false
+print_params: false
+
+PicoDet:
+  backbone: LCNet
+  neck: CSPPAN
+  head: PicoHead
+
+LCNet:
+  scale: 2.5
+  feature_maps: [3, 4, 5]
+
+ESNet:
+  scale: 1.0
+  feature_maps: [4, 11, 14]
+  act: hard_swish
+  channel_ratio: [0.875, 0.5, 1.0, 0.625, 0.5, 0.75, 0.625, 0.625, 0.5, 0.625, 1.0, 0.625, 0.75]
+
+CSPPAN:
+  out_channels: 128
+  use_depthwise: True
+  num_csp_blocks: 1
+  num_features: 4
+
+PicoHead:
+  conv_feat:
+    name: PicoFeat
+    feat_in: 128
+    feat_out: 128
+    num_convs: 4
+    num_fpn_stride: 4
+    norm_type: bn
+    share_cls_reg: True
+  fpn_stride: [8, 16, 32, 64]
+  feat_in_chan: 128
+  prior_prob: 0.01
+  reg_max: 7
+  cell_offset: 0.5
+  loss_class:
+    name: VarifocalLoss
+    use_sigmoid: True
+    iou_weighted: True
+    loss_weight: 1.0
+  loss_dfl:
+    name: DistributionFocalLoss
+    loss_weight: 0.25
+  loss_bbox:
+    name: GIoULoss
+    loss_weight: 2.0
+  assigner:
+    name: SimOTAAssigner
+    candidate_topk: 10
+    iou_weight: 6
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 100
+    score_threshold: 0.025
+    nms_threshold: 0.6
+
+
+LearningRate:
+  base_lr: 0.08
+  schedulers:
+  - name: CosineDecay
+    max_epochs: 300
+  - name: LinearWarmup
+    start_factor: 0.1
+    steps: 300
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.00004
+    type: L2
+
+worker_num: 6
+eval_height: &eval_height 1088
+eval_width: &eval_width 1088
+eval_size: &eval_size [*eval_height, *eval_width]
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomCrop: {}
+  - RandomFlip: {prob: 0.5}
+  - RandomDistort: {}
+  batch_transforms:
+  - BatchRandomResize: {target_size:  [704, 768, 896, 960, 1088, 1152, 1280], random_size: True, random_interp: True, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 16
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 2
+  shuffle: false
+
+
+TestReader:
+  inputs_def:
+    image_shape: [1, 3, *eval_height, *eval_width]
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 1
+
+
+metric: WiderFace
+num_classes: 1
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: WIDER_train/images
+  anno_path: train.json
+  dataset_dir: data_face
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: WIDER_val/images
+  anno_path: val.json
+  dataset_dir: data_face
+  allow_empty: true
+
+TestDataset:
+  name: COCODataSet
+  image_dir: WIDER_val/images
+  anno_path: val.json
+  dataset_dir: data_face
+
+# Exporting the model
+export:
+  post_process: True  # Whether post-processing is included in the network when export model.
+  nms: True           # Whether NMS is included in the network when export model.
+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
+  fuse_conv_bn: False

+ 172 - 0
paddlex/repo_apis/PaddleDetection_api/configs/RT-DETR-H_layout_17cls.yaml

@@ -0,0 +1,172 @@
+# Runtime
+epoch: 100
+log_iter: 10
+find_unused_parameters: true
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+use_ema: true
+ema_decay: 0.9999
+ema_decay_type: "exponential"
+ema_filter_no_grad: true
+save_dir: output
+snapshot_epoch: 10
+print_flops: false
+print_params: false
+eval_size: [640, 640]
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+worker_num: 4
+
+TrainDataset:
+  name: COCODetDataset
+  image_dir: images
+  anno_path: annotations/instance_train.json
+  dataset_dir: datasets/COCO
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODetDataset
+  image_dir: images
+  anno_path: annotations/instance_val.json
+  dataset_dir: datasets/COCO
+  allow_empty: true
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instance_val.json
+  dataset_dir: datasets/COCO
+
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - RandomDistort: {prob: 0.8}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {prob: 0.8}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - NormalizeBox: {}
+    - BboxXYXY2XYWH: {}
+    - Permute: {}
+  batch_size: 4
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 4
+  shuffle: false
+  drop_last: false
+
+TestReader:
+  inputs_def:
+    image_shape: [3, 640, 640]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+# Model
+architecture: DETR
+pretrain_weights: https://paddle-model-ecology.bj.bcebos.com/paddlex/pretrained/RT-DETR-H_layout_pretrained_v1.pdparams
+norm_type: sync_bn
+hidden_dim: 256
+use_focal_loss: True
+
+DETR:
+  backbone: PPHGNetV2
+  neck: HybridEncoder
+  transformer: RTDETRTransformer
+  detr_head: DINOHead
+  post_process: DETRPostProcess
+
+PPHGNetV2:
+  arch: 'H'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: true
+  freeze_at: 0
+  freeze_norm: true
+  lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]
+
+HybridEncoder:
+  hidden_dim: 512
+  use_encoder_idx: [2]
+  num_encoder_layers: 2
+  encoder_layer:
+    name: TransformerLayer
+    d_model: 512
+    nhead: 8
+    dim_feedforward: 2048
+    dropout: 0.
+    activation: 'gelu'
+  expansion: 1.0
+
+RTDETRTransformer:
+  num_queries: 300
+  position_embed_type: sine
+  feat_strides: [8, 16, 32]
+  num_levels: 3
+  nhead: 8
+  num_decoder_layers: 6
+  dim_feedforward: 1024
+  dropout: 0.0
+  activation: relu
+  num_denoising: 100
+  label_noise_ratio: 0.5
+  box_noise_scale: 1.0
+  learnt_init_query: false
+
+DINOHead:
+  loss:
+    name: DINOLoss
+    loss_coeff: {class: 1, bbox: 5, giou: 2}
+    aux_loss: true
+    use_vfl: true
+    matcher:
+      name: HungarianMatcher
+      matcher_coeff: {class: 2, bbox: 5, giou: 2}
+
+DETRPostProcess:
+  num_top_queries: 300
+
+# Optimizer
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 1.0
+    milestones: [100]
+    use_warmup: true
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 100
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001
+
+# Export
+export:
+  post_process: true
+  nms: true
+  benchmark: false
+  fuse_conv_bn: false

+ 172 - 0
paddlex/repo_apis/PaddleDetection_api/configs/RT-DETR-H_layout_3cls.yaml

@@ -0,0 +1,172 @@
+# Runtime
+epoch: 100
+log_iter: 10
+find_unused_parameters: true
+use_gpu: true
+use_xpu: false
+use_mlu: false
+use_npu: false
+use_ema: true
+ema_decay: 0.9999
+ema_decay_type: "exponential"
+ema_filter_no_grad: true
+save_dir: output
+snapshot_epoch: 10
+print_flops: false
+print_params: false
+eval_size: [640, 640]
+
+# Dataset
+metric: COCO
+num_classes: 80
+
+worker_num: 4
+
+TrainDataset:
+  name: COCODetDataset
+  image_dir: images
+  anno_path: annotations/instance_train.json
+  dataset_dir: datasets/COCO
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODetDataset
+  image_dir: images
+  anno_path: annotations/instance_val.json
+  dataset_dir: datasets/COCO
+  allow_empty: true
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instance_val.json
+  dataset_dir: datasets/COCO
+
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - RandomDistort: {prob: 0.8}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {prob: 0.8}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - NormalizeBox: {}
+    - BboxXYXY2XYWH: {}
+    - Permute: {}
+  batch_size: 4
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 4
+  shuffle: false
+  drop_last: false
+
+TestReader:
+  inputs_def:
+    image_shape: [3, 640, 640]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+# Model
+architecture: DETR
+pretrain_weights: https://paddle-model-ecology.bj.bcebos.com/paddlex/pretrained/RT-DETR-H_layout_pretrained_v1.pdparams
+norm_type: sync_bn
+hidden_dim: 256
+use_focal_loss: True
+
+DETR:
+  backbone: PPHGNetV2
+  neck: HybridEncoder
+  transformer: RTDETRTransformer
+  detr_head: DINOHead
+  post_process: DETRPostProcess
+
+PPHGNetV2:
+  arch: 'H'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: true
+  freeze_at: 0
+  freeze_norm: true
+  lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]
+
+HybridEncoder:
+  hidden_dim: 512
+  use_encoder_idx: [2]
+  num_encoder_layers: 2
+  encoder_layer:
+    name: TransformerLayer
+    d_model: 512
+    nhead: 8
+    dim_feedforward: 2048
+    dropout: 0.
+    activation: 'gelu'
+  expansion: 1.0
+
+RTDETRTransformer:
+  num_queries: 300
+  position_embed_type: sine
+  feat_strides: [8, 16, 32]
+  num_levels: 3
+  nhead: 8
+  num_decoder_layers: 6
+  dim_feedforward: 1024
+  dropout: 0.0
+  activation: relu
+  num_denoising: 100
+  label_noise_ratio: 0.5
+  box_noise_scale: 1.0
+  learnt_init_query: false
+
+DINOHead:
+  loss:
+    name: DINOLoss
+    loss_coeff: {class: 1, bbox: 5, giou: 2}
+    aux_loss: true
+    use_vfl: true
+    matcher:
+      name: HungarianMatcher
+      matcher_coeff: {class: 2, bbox: 5, giou: 2}
+
+DETRPostProcess:
+  num_top_queries: 300
+
+# Optimizer
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 1.0
+    milestones: [100]
+    use_warmup: true
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 100
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001
+
+# Export
+export:
+  post_process: true
+  nms: true
+  benchmark: false
+  fuse_conv_bn: false

+ 5 - 0
paddlex/repo_apis/PaddleDetection_api/object_det/config.py

@@ -367,6 +367,11 @@ class DetConfig(BaseConfig, PPDetConfigMixin):
             num_classes (int): the classes number value to set.
             num_classes (int): the classes number value to set.
         """
         """
         self["num_classes"] = num_classes
         self["num_classes"] = num_classes
+        if 'CenterNet' in self.model_name:
+            for i in range(len(self['TrainReader']['sample_transforms'])):
+                if 'Gt2CenterNetTarget' in self['TrainReader']['sample_transforms'][i].keys():
+                     self['TrainReader']['sample_transforms'][i]['Gt2CenterNetTarget']['num_classes'] = num_classes
+        
 
 
     def update_random_size(self, randomsize: list[list[int, int]]):
     def update_random_size(self, randomsize: list[list[int, int]]):
         """update `target_size` of `BatchRandomResize` op in TestReader
         """update `target_size` of `BatchRandomResize` op in TestReader

+ 286 - 0
paddlex/repo_apis/PaddleDetection_api/object_det/register.py

@@ -551,3 +551,289 @@ register_model_info(
 )
 )
 
 
 
 
+register_model_info(
+    {
+        'model_name': 'PicoDet-XS',
+        'suite': 'Det',
+        'config_path': osp.join(PDX_CONFIG_DIR, 'PicoDet-XS.yaml'),
+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
+        'supported_dataset_types': ['COCODetDataset'],
+        'supported_train_opts': {
+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
+            'dy2st': False,
+            'amp': ['OFF']
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        'model_name': 'PicoDet-M',
+        'suite': 'Det',
+        'config_path': osp.join(PDX_CONFIG_DIR, 'PicoDet-M.yaml'),
+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
+        'supported_dataset_types': ['COCODetDataset'],
+        'supported_train_opts': {
+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
+            'dy2st': False,
+            'amp': ['OFF']
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        'model_name': 'FCOS-ResNet50',
+        'suite': 'Det',
+        'config_path': osp.join(PDX_CONFIG_DIR, 'FCOS-ResNet50.yaml'),
+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
+        'supported_dataset_types': ['COCODetDataset'],
+        'supported_train_opts': {
+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
+            'dy2st': False,
+            'amp': ['OFF']
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        'model_name': 'DETR-R50',
+        'suite': 'Det',
+        'config_path': osp.join(PDX_CONFIG_DIR, 'DETR-R50.yaml'),
+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
+        'supported_dataset_types': ['COCODetDataset'],
+        'supported_train_opts': {
+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
+            'dy2st': False,
+            'amp': ['OFF']
+        },
+    }
+)
+
+
+register_model_info(
+        {
+        'model_name': 'PP-YOLOE-L_vehicle',
+        'suite': 'Det',
+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-YOLOE-L_vehicle.yaml'),
+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
+        'supported_dataset_types': ['COCODetDataset'],
+        'supported_train_opts': {
+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
+            'dy2st': False,
+            'amp': ['OFF']
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        'model_name': 'PP-YOLOE-S_vehicle',
+        'suite': 'Det',
+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-YOLOE-S_vehicle.yaml'),
+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
+        'supported_dataset_types': ['COCODetDataset'],
+        'supported_train_opts': {
+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
+            'dy2st': False,
+            'amp': ['OFF']
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        'model_name': 'PP-ShiTuV2_det',
+        'suite': 'Det',
+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-ShiTuV2_det.yaml'),
+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
+        'supported_dataset_types': ['COCODetDataset'],
+        'supported_train_opts': {
+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
+            'dy2st': False,
+            'amp': ['OFF']
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        'model_name': 'PP-YOLOE-L_human',
+        'suite': 'Det',
+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-YOLOE-L_human.yaml'),
+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
+        'supported_dataset_types': ['COCODetDataset'],
+        'supported_train_opts': {
+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
+            'dy2st': False,
+            'amp': ['OFF']
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        'model_name': 'PP-YOLOE-S_human',
+        'suite': 'Det',
+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-YOLOE-S_human.yaml'),
+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
+        'supported_dataset_types': ['COCODetDataset'],
+        'supported_train_opts': {
+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
+            'dy2st': False,
+            'amp': ['OFF']
+        },
+    }
+) 
+
+
+register_model_info(
+    {
+        'model_name': 'CenterNet-DLA-34',
+        'suite': 'Det',
+        'config_path': osp.join(PDX_CONFIG_DIR, 'CenterNet-DLA-34.yaml'),
+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
+        'supported_dataset_types': ['COCODetDataset'],
+        'supported_train_opts': {
+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
+            'dy2st': False,
+            'amp': ['OFF']
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        'model_name': 'CenterNet-ResNet50',
+        'suite': 'Det',
+        'config_path': osp.join(PDX_CONFIG_DIR, 'CenterNet-ResNet50.yaml'),
+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
+        'supported_dataset_types': ['COCODetDataset'],
+        'supported_train_opts': {
+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
+            'dy2st': False,
+            'amp': ['OFF']
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        'model_name': 'PP-YOLOE+_SOD-L',
+        'suite': 'Det',
+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-YOLOE+_SOD-L.yaml'),
+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
+        'supported_dataset_types': ['COCODetDataset'],
+        'supported_train_opts': {
+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
+            'dy2st': False,
+            'amp': ['OFF']
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        'model_name': 'PP-YOLOE+_SOD-S',
+        'suite': 'Det',
+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-YOLOE+_SOD-S.yaml'),
+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
+        'supported_dataset_types': ['COCODetDataset'],
+        'supported_train_opts': {
+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
+            'dy2st': False,
+            'amp': ['OFF']
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        'model_name': 'PP-YOLOE+_SOD-largesize-L',
+        'suite': 'Det',
+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-YOLOE+_SOD-largesize-L.yaml'),
+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
+        'supported_dataset_types': ['COCODetDataset'],
+        'supported_train_opts': {
+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
+            'dy2st': False,
+            'amp': ['OFF']
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        "model_name": "RT-DETR-H_layout_3cls",
+        "suite": "Det",
+        "config_path": osp.join(PDX_CONFIG_DIR, "RT-DETR-H_layout_3cls.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
+        "supported_dataset_types": ["COCODetDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        "model_name": "PicoDet-L_layout",
+        "suite": "Det",
+        "config_path": osp.join(PDX_CONFIG_DIR, "PicoDet-L_layout.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
+        "supported_dataset_types": ["COCODetDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        "model_name": "RT-DETR-H_layout_17cls",
+        "suite": "Det",
+        "config_path": osp.join(PDX_CONFIG_DIR, "RT-DETR-H_layout_17cls.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
+        "supported_dataset_types": ["COCODetDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)
+
+
+register_model_info(
+    {
+        "model_name": "PicoDet_LCNet_x2_5_face",
+        "suite": "Det",
+        "config_path": osp.join(PDX_CONFIG_DIR, "PicoDet_LCNet_x2_5_face.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
+        "supported_dataset_types": ["COCODetDataset"],
+        "supported_train_opts": {
+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
+            "dy2st": False,
+            "amp": ["OFF"],
+        },
+    }
+)