1 year ago · 66289e36cf
--- a/README.md
+++ b/README.md
@@ -69,11 +69,31 @@ PaddleX 3.0 覆盖了 16 条产业级模型产线，其中 9 条基础产线可
 
															     <td>PP-LCNet_x1_0_doc_ori</td>
														
 
															   </tr>
														
 
															   <tr>
														
 
															-    <td>基础产线</td>
														
 
															-    <td>通用目标检测</td>
														
 
															+    <td rowspan="6">基础产线</td>
														
 
															+    <td rowspan="6">通用目标检测</td>
														
 
															     <td>目标检测</td>
														
 
															     <td>PicoDet-S<br/>PicoDet-L<details>
														
 
															-    <summary><b>more</b></summary><br/>PP-YOLOE_plus-S<br/>PP-YOLOE_plus-M<br/>PP-YOLOE_plus-L<br/>PP-YOLOE_plus-X<br/>RT-DETR-L<br/>RT-DETR-H<br/>RT-DETR-X<br/>RT-DETR-R18<br/>RT-DETR-R50<br/>YOLOv3-DarkNet53<br/>YOLOv3-MobileNetV3<br/>YOLOv3-ResNet50_vd_DCN<br/>YOLOX-L<br/>YOLOX-M<br/>YOLOX-N<br/>YOLOX-S<br/>YOLOX-T<br/>YOLOX-X<br/>FasterRCNN-ResNet34-FPN<br/>FasterRCNN-ResNet50<br/>FasterRCNN-ResNet50-FPN<br/>FasterRCNN-ResNet50-vd-FPN<br/>FasterRCNN-ResNet50-vd-SSLDv2-FPN<br/>FasterRCNN-ResNet101<br/>FasterRCNN-ResNet101-FPN<br/>FasterRCNN-ResNeXt101-vd-FPN<br/>FasterRCNN-Swin-Tiny-FPN<br/>Cascade-FasterRCNN-ResNet50-FPN<br/>Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN</details></td>
														
 
															+    <summary><b>more</b></summary><br/>PicoDet-XS<br/>PicoDet-M<br/>PP-YOLOE_plus-S<br/>PP-YOLOE_plus-M<br/>PP-YOLOE_plus-L<br/>PP-YOLOE_plus-X<br/>RT-DETR-L<br/>RT-DETR-H<br/>RT-DETR-X<br/>RT-DETR-R18<br/>RT-DETR-R50<br/>YOLOv3-DarkNet53<br/>YOLOv3-MobileNetV3<br/>YOLOv3-ResNet50_vd_DCN<br/>YOLOX-L<br/>YOLOX-M<br/>YOLOX-N<br/>YOLOX-S<br/>YOLOX-T<br/>YOLOX-X<br/>FasterRCNN-ResNet34-FPN<br/>FasterRCNN-ResNet50<br/>FasterRCNN-ResNet50-FPN<br/>FasterRCNN-ResNet50-vd-FPN<br/>FasterRCNN-ResNet50-vd-SSLDv2-FPN<br/>FasterRCNN-ResNet101<br/>FasterRCNN-ResNet101-FPN<br/>FasterRCNN-ResNeXt101-vd-FPN<br/>FasterRCNN-Swin-Tiny-FPN<br/>Cascade-FasterRCNN-ResNet50-FPN<br/>Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN<br/>FCOS-ResNet50<br/>DETR-R50<br/>CenterNet-DLA-34<br/>CenterNet-ResNet50</details></td>
														
 
															+  </tr>
														
 
															+  <tr>
														
 
															+    <td>行人检测</td>
														
 
															+    <td>PP-YOLOE-S_human<br/>PP-YOLOE-L_human</td>
														
 
															+  </tr>
														
 
															+  <tr>
														
 
															+    <td>车辆检测</td>
														
 
															+    <td>PP-YOLOE-L_vehicle<br/>PP-YOLOE-S_vehicle</td>
														
 
															+  </tr>
														
 
															+  <tr>
														
 
															+    <td>小目标检测</td>
														
 
															+    <td>PP-YOLOE+_SOD-L<br/>PP-YOLOE+_SOD-S<br/>PP-YOLOE+_SOD-largesize-L</td>
														
 
															+  </tr>
														
 
															+  <tr>
														
 
															+    <td>主体检测</td>
														
 
															+    <td>PP-ShiTuV2_det</td>
														
 
															+  </tr>
														
 
															+  <tr>
														
 
															+    <td>人脸检测</td>
														
 
															+    <td>PicoDet_LCNet_x2_5_face</td>
														
 
															   </tr>
														
 
															   <tr>
														
 
															     <td rowspan="2">基础产线</td>
														
--- a/docs/tutorials/models/support_model_list.md
+++ b/docs/tutorials/models/support_model_list.md
@@ -120,6 +120,9 @@
 
															 | PP-YOLOE_plus-M | [PP-YOLOE_plus-M.yaml](../../../paddlex/configs/object_detection/PP-YOLOE_plus-M.yaml)|
														
 
															 | PP-YOLOE_plus-L | [PP-YOLOE_plus-L.yaml](../../../paddlex/configs/object_detection/PP-YOLOE_plus-L.yaml)|
														
 
															 | PP-YOLOE_plus-X | [PP-YOLOE_plus-X.yaml](../../../paddlex/configs/object_detection/PP-YOLOE_plus-X.yaml)|
														
 
															+| PP-YOLOE+_SOD-L | [PP-YOLOE+_SOD-L.yaml](../../../paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-L.yaml)|
														
 
															+| PP-YOLOE+_SOD-S | [PP-YOLOE+_SOD-S.yaml](../../../paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-S.yaml)|
														
 
															+| PP-YOLOE+_SOD-largesize-L | [PP-YOLOE+_SOD-largesize-L.yaml](../../../paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-largesize-L.yaml)|
														
 
															 ### 2. RT-DETR 系列
														
 
															 | 模型名称 | config |
														
 
															 | :--- | :---: |
														
@@ -133,6 +136,10 @@
 
															 | :--- | :---: |
														
 
															 | PicoDet-S | [PicoDet-S.yaml](../../../paddlex/configs/object_detection/PicoDet-S.yaml)|
														
 
															 | PicoDet-L | [PicoDet-L.yaml](../../../paddlex/configs/object_detection/PicoDet-L.yaml)|
														
 
															+| PicoDet-M | [PicoDet-M.yaml](../../../paddlex/configs/object_detection/PicoDet-M.yaml)|
														
 
															+| PicoDet-XS | [PicoDet-XS.yaml](../../../paddlex/configs/object_detection/PicoDet-XS.yaml)|
														
 
															+| PP-ShiTuV2_det | [PP-ShiTuV2_det.yaml](../../../paddlex/configs/mainbody_detection/PP-ShiTuV2_det.yaml)|
														
 
															+| PicoDet_LCNet_x2_5_face | [PicoDet_LCNet_x2_5_face.yaml](../../../paddlex/configs/face_detection/PicoDet_LCNet_x2_5_face.yaml)|
														
 
															 ### 4. YOLOv3 系列
														
 
															 | 模型名称 | config |
														
 
															 | :--- | :---: |
														
@@ -165,6 +172,26 @@
 
															 | :--- | :---: |
														
 
															 | Cascade-FasterRCNN-ResNet50-FPN | [Cascade-FasterRCNN-ResNet50-FPN.yaml](../../../paddlex/configs/object_detection/Cascade-FasterRCNN-ResNet50-FPN.yaml)|
														
 
															 | Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN | [Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN.yaml](../../../paddlex/configs/object_detection/Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN.yaml)|
														
 
															+### 8. FCOS 系列
														
 
															+| 模型名称 | config |
														
 
															+| :--- | :---: |
														
 
															+| FCOS-ResNet50 | [FCOS-ResNet50.yaml](../../../paddlex/configs/object_detection/FCOS-ResNet50.yaml)|
														
 
															+### 9. DETR 系列
														
 
															+| 模型名称 | config |
														
 
															+| :--- | :---: |
														
 
															+| DETR-R50 | [DETR-R50.yaml](../../../paddlex/configs/object_detection/DETR-R50.yaml)|
														
 
															+### 10. PP-YOLOE 系列
														
 
															+| 模型名称 | config |
														
 
															+| :--- | :---: |
														
 
															+| PP-YOLOE-L_vehicle | [PP-YOLOE-vehicle-L.yaml](../../../paddlex/configs/vehicle_detection/PP-YOLOE-L_vehicle.yaml)|
														
 
															+| PP-YOLOE-S_vehicle | [PP-YOLOE-vehicle-S.yaml](../../../paddlex/configs/vehicle_detection/PP-YOLOE-S_vehicle.yaml)|
														
 
															+| PP-YOLOE-L_human | [PP-YOLOE-human-L.yaml](../../../paddlex/configs/human_detection/PP-YOLOE-L_human.yaml)|
														
 
															+| PP-YOLOE-S_human | [PP-YOLOE-human-S.yaml](../../../paddlex/configs/human_detection/PP-YOLOE-S_human.yaml)|
														
 
															+### 11. CenterNet 系列
														
 
															+| 模型名称 | config |
														
 
															+| :--- | :---: |
														
 
															+| CenterNet-DLA-34 | [CenterNet-DLA-34.yaml](../../../paddlex/configs/object_detection/CenterNet-DLA-34.yaml)|
														
 
															+| CenterNet-ResNet50 | [CenterNet-ResNet50.yaml](../../../paddlex/configs/object_detection/CenterNet-ResNet50.yaml)|
														
 
															 ## 三、实例分割
														
 
															 ### 1.Mask-RT-DETR 系列
														
 
															 | 模型名称 | config |
														
@@ -232,6 +259,9 @@
 
															 | 模型名称 | config |
														
 
															 | :--- | :---: |
														
 
															 | PicoDet_layout_1x | [PicoDet_layout_1x.yaml](../../../paddlex/configs/structure_analysis/PicoDet_layout_1x.yaml)|
														
 
															+| PicoDet-L_layout | [PicoDet-L_layout.yaml](../../../paddlex/configs/structure_analysis/PicoDet-L_layout.yaml)|
														
 
															+| RT-DETR-H_layout_3cls | [RT-DETR-H_layout.yaml](../../../paddlex/configs/structure_analysis/RT-DETR-H_layout_3cls.yaml)|
														
 
															+| RT-DETR-H_layout_17cls | [RT-DETR-H_layout.yaml](../../../paddlex/configs/structure_analysis/RT-DETR-H_layout_17cls.yaml)|
														
 
															 ## 十、时序异常检测
														
 
															 | 模型名称 | config |
														
 
															 | :--- | :---: |
														
--- a/paddlex/configs/face_detection/PicoDet_LCNet_x2_5_face.yaml
+++ b/paddlex/configs/face_detection/PicoDet_LCNet_x2_5_face.yaml
@@ -0,0 +1,40 @@
 
															+Global:
														
 
															+  model: PicoDet_LCNet_x2_5_face
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 4
														
 
															+  learning_rate: #0.08
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: #100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/PicoDet_LCNet_x2_5_face_pretrain.pdparams
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/face_detection.png"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
--- a/paddlex/configs/human_detection/PP-YOLOE-L_human.yaml
+++ b/paddlex/configs/human_detection/PP-YOLOE-L_human.yaml
@@ -0,0 +1,41 @@
 
															+Global:
														
 
															+  model: PP-YOLOE-L_human
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 2
														
 
															+  learning_rate: #0.08
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: #100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/human_detection.jpg"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
 
															+
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_36e_crowdhuman.pdparams
														
--- a/paddlex/configs/human_detection/PP-YOLOE-S_human.yaml
+++ b/paddlex/configs/human_detection/PP-YOLOE-S_human.yaml
@@ -0,0 +1,41 @@
 
															+Global:
														
 
															+  model: PP-YOLOE-S_human
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 2
														
 
															+  learning_rate: #0.08
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: #100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/human_detection.jpg"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
 
															+
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_36e_crowdhuman.pdparams
														
--- a/paddlex/configs/mainbody_detection/PP-ShiTuV2_det.yaml
+++ b/paddlex/configs/mainbody_detection/PP-ShiTuV2_det.yaml
@@ -0,0 +1,40 @@
 
															+Global:
														
 
															+  model: PP-ShiTuV2_det
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 2
														
 
															+  learning_rate: #0.08
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: #100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/picodet_lcnet_x2_5_640_mainbody.pdparams
														
--- a/paddlex/configs/object_detection/CenterNet-DLA-34.yaml
+++ b/paddlex/configs/object_detection/CenterNet-DLA-34.yaml
@@ -0,0 +1,40 @@
 
															+Global:
														
 
															+  model: CenterNet-DLA-34
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 2
														
 
															+  learning_rate: # 0.08
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: # 100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/centernet_dla34_140e_coco.pdparams
														
--- a/paddlex/configs/object_detection/CenterNet-ResNet50.yaml
+++ b/paddlex/configs/object_detection/CenterNet-ResNet50.yaml
@@ -0,0 +1,40 @@
 
															+Global:
														
 
															+  model: CenterNet-ResNet50
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 2
														
 
															+  learning_rate: # 0.08
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: # 100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/centernet_r50_140e_coco.pdparams
														
--- a/paddlex/configs/object_detection/DETR-R50.yaml
+++ b/paddlex/configs/object_detection/DETR-R50.yaml
@@ -0,0 +1,41 @@
 
															+Global:
														
 
															+  model: DETR-R50
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 2
														
 
															+  learning_rate: #0.08
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: #100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
 
															+
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/detr_r50_1x_coco.pdparams
														
--- a/paddlex/configs/object_detection/FCOS-ResNet50.yaml
+++ b/paddlex/configs/object_detection/FCOS-ResNet50.yaml
@@ -0,0 +1,40 @@
 
															+Global:
														
 
															+  model: FCOS-ResNet50
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 4
														
 
															+  learning_rate: 0.002
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: # 100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_1x_coco.pdparams
														
--- a/paddlex/configs/object_detection/PicoDet-M.yaml
+++ b/paddlex/configs/object_detection/PicoDet-M.yaml
@@ -0,0 +1,41 @@
 
															+Global:
														
 
															+  model: PicoDet-M
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 2
														
 
															+  learning_rate: 0.08
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: 100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
 
															+
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/picodet_m_416_coco_lcnet.pdparams
														
--- a/paddlex/configs/object_detection/PicoDet-XS.yaml
+++ b/paddlex/configs/object_detection/PicoDet-XS.yaml
@@ -0,0 +1,41 @@
 
															+Global:
														
 
															+  model: PicoDet-XS
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 2
														
 
															+  learning_rate: #0.08
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: #100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_object_detection_002.png"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
 
															+
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/picodet_xs_416_coco_lcnet.pdparams
														
--- a/paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-L.yaml
+++ b/paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-L.yaml
@@ -0,0 +1,41 @@
 
															+Global:
														
 
															+  model: PP-YOLOE+_SOD-L
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 2
														
 
															+  learning_rate: 0.001
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: #100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/small_object_detection.jpg"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
 
															+
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_sod_crn_l_80e_visdrone.pdparams
														
--- a/paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-S.yaml
+++ b/paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-S.yaml
@@ -0,0 +1,41 @@
 
															+Global:
														
 
															+  model: PP-YOLOE+_SOD-S
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 2
														
 
															+  learning_rate: 0.001
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: #100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/small_object_detection.jpg"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
 
															+
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_sod_crn_s_80e_visdrone.pdparams
														
--- a/paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-largesize-L.yaml
+++ b/paddlex/configs/smallobject_detection/PP-YOLOE+_SOD-largesize-L.yaml
@@ -0,0 +1,41 @@
 
															+Global:
														
 
															+  model: PP-YOLOE+_SOD-largesize-L
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 10
														
 
															+  batch_size: 2
														
 
															+  learning_rate: 0.0001
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: 100
														
 
															+  resume_path: null
														
 
															+  log_interval: 3
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/small_object_detection.jpg"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
 
															+
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone.pdparams
														
--- a/paddlex/configs/structure_analysis/PicoDet-L_layout.yaml
+++ b/paddlex/configs/structure_analysis/PicoDet-L_layout.yaml
@@ -0,0 +1,40 @@
 
															+Global:
														
 
															+  model: PicoDet-L_layout
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/layout/det_layout_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 11
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 2
														
 
															+  learning_rate: #0.4
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: #100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/pretrained/PicoDet-L_layout_pretrained_v1.pdparams
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/layout.jpg"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
--- a/paddlex/configs/structure_analysis/RT-DETR-H_layout_17cls.yaml
+++ b/paddlex/configs/structure_analysis/RT-DETR-H_layout_17cls.yaml
@@ -0,0 +1,40 @@
 
															+Global:
														
 
															+  model: RT-DETR-H_layout_17cls
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/layout/det_layout_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 11
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 1
														
 
															+  learning_rate: 0.00005
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: #100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/pretrained/RT-DETR-H_layout_pretrained_17cls.pdparams
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/layout.jpg"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
--- a/paddlex/configs/structure_analysis/RT-DETR-H_layout_3cls.yaml
+++ b/paddlex/configs/structure_analysis/RT-DETR-H_layout_3cls.yaml
@@ -0,0 +1,40 @@
 
															+Global:
														
 
															+  model: RT-DETR-H_layout_3cls
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/layout/det_layout_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 11
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 1
														
 
															+  learning_rate: 0.00005
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: #100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/pretrained/RT-DETR-H_layout_pretrained_v1.pdparams
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/layout.jpg"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
--- a/paddlex/configs/vehicle_detection/PP-YOLOE-L_vehicle.yaml
+++ b/paddlex/configs/vehicle_detection/PP-YOLOE-L_vehicle.yaml
@@ -0,0 +1,40 @@
 
															+Global:
														
 
															+  model: PP-YOLOE-L_vehicle
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 2
														
 
															+  learning_rate: #0.08
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: #100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/vehicle_detection.jpg"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/mot_ppyoloe_l_36e_ppvehicle.pdparams
														
--- a/paddlex/configs/vehicle_detection/PP-YOLOE-S_vehicle.yaml
+++ b/paddlex/configs/vehicle_detection/PP-YOLOE-S_vehicle.yaml
@@ -0,0 +1,41 @@
 
															+Global:
														
 
															+  model: PP-YOLOE-S_vehicle
														
 
															+  mode: check_dataset # check_dataset/train/evaluate/predict
														
 
															+  dataset_dir: "/paddle/dataset/paddlex/det/det_coco_examples"
														
 
															+  device: gpu:0,1,2,3
														
 
															+  output: "output"
														
 
															+
														
 
															+CheckDataset:
														
 
															+  convert:
														
 
															+    enable: False
														
 
															+    src_dataset_type: null
														
 
															+  split:
														
 
															+    enable: False
														
 
															+    train_percent: null
														
 
															+    val_percent: null
														
 
															+
														
 
															+Train:
														
 
															+  num_classes: 4
														
 
															+  epochs_iters: 50
														
 
															+  batch_size: 2
														
 
															+  learning_rate: #0.08
														
 
															+  pretrain_weight_path: null
														
 
															+  warmup_steps: #100
														
 
															+  resume_path: null
														
 
															+  log_interval: 10
														
 
															+  eval_interval: 1
														
 
															+
														
 
															+Evaluate:
														
 
															+  weight_path: "output/best_model.pdparams"
														
 
															+  log_interval: 10
														
 
															+
														
 
															+Predict:
														
 
															+  model_dir: "output/best_model"
														
 
															+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/vehicle_detection.jpg"
														
 
															+  kernel_option:
														
 
															+    run_mode: paddle
														
 
															+    batch_size: 1
														
 
															+
														
 
															+
														
 
															+Export:
														
 
															+  weight_path: https://paddledet.bj.bcebos.com/models/mot_ppyoloe_s_36e_ppvehicle.pdparams
														
--- a/paddlex/modules/base/predictor/transforms/image_common.py
+++ b/paddlex/modules/base/predictor/transforms/image_common.py
@@ -591,3 +591,165 @@ class ToCHWImage(BaseTransform):
 
															         """get output keys"""
														
 
															         # image: Image in chw format.
														
 
															         return ["image"]
														
 
															+
														
 
															+
														
 
															+def rotate_point(pt, angle_rad):
														
 
															+    """Rotate a point by an angle.
														
 
															+    Args:
														
 
															+        pt (list[float]): 2 dimensional point to be rotated
														
 
															+        angle_rad (float): rotation angle by radian
														
 
															+    Returns:
														
 
															+        list[float]: Rotated point.
														
 
															+    """
														
 
															+    assert len(pt) == 2
														
 
															+    sn, cs = np.sin(angle_rad), np.cos(angle_rad)
														
 
															+    new_x = pt[0] * cs - pt[1] * sn
														
 
															+    new_y = pt[0] * sn + pt[1] * cs
														
 
															+    rotated_pt = [new_x, new_y]
														
 
															+
														
 
															+    return rotated_pt
														
 
															+
														
 
															+
														
 
															+def _get_3rd_point(a, b):
														
 
															+    """To calculate the affine matrix, three pairs of points are required. This
														
 
															+    function is used to get the 3rd point, given 2D points a & b.
														
 
															+    The 3rd point is defined by rotating vector `a - b` by 90 degrees
														
 
															+    anticlockwise, using b as the rotation center.
														
 
															+    Args:
														
 
															+        a (np.ndarray): point(x,y)
														
 
															+        b (np.ndarray): point(x,y)
														
 
															+    Returns:
														
 
															+        np.ndarray: The 3rd point.
														
 
															+    """
														
 
															+    assert len(a) == 2
														
 
															+    assert len(b) == 2
														
 
															+    direction = a - b
														
 
															+    third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
														
 
															+
														
 
															+    return third_pt
														
 
															+
														
 
															+
														
 
															+def get_affine_transform(center,
														
 
															+                         input_size,
														
 
															+                         rot,
														
 
															+                         output_size,
														
 
															+                         shift=(0., 0.),
														
 
															+                         inv=False):
														
 
															+    """Get the affine transform matrix, given the center/scale/rot/output_size.
														
 
															+    Args:
														
 
															+        center (np.ndarray[2, ]): Center of the bounding box (x, y).
														
 
															+        scale (np.ndarray[2, ]): Scale of the bounding box
														
 
															+            wrt [width, height].
														
 
															+        rot (float): Rotation angle (degree).
														
 
															+        output_size (np.ndarray[2, ]): Size of the destination heatmaps.
														
 
															+        shift (0-100%): Shift translation ratio wrt the width/height.
														
 
															+            Default (0., 0.).
														
 
															+        inv (bool): Option to inverse the affine transform direction.
														
 
															+            (inv=False: src->dst or inv=True: dst->src)
														
 
															+    Returns:
														
 
															+        np.ndarray: The transform matrix.
														
 
															+    """
														
 
															+    assert len(center) == 2
														
 
															+    assert len(output_size) == 2
														
 
															+    assert len(shift) == 2
														
 
															+    if not isinstance(input_size, (np.ndarray, list)):
														
 
															+        input_size = np.array([input_size, input_size], dtype=np.float32)
														
 
															+    scale_tmp = input_size
														
 
															+
														
 
															+    shift = np.array(shift)
														
 
															+    src_w = scale_tmp[0]
														
 
															+    dst_w = output_size[0]
														
 
															+    dst_h = output_size[1]
														
 
															+
														
 
															+    rot_rad = np.pi * rot / 180
														
 
															+    src_dir = rotate_point([0., src_w * -0.5], rot_rad)
														
 
															+    dst_dir = np.array([0., dst_w * -0.5])
														
 
															+
														
 
															+    src = np.zeros((3, 2), dtype=np.float32)
														
 
															+    src[0, :] = center + scale_tmp * shift
														
 
															+    src[1, :] = center + src_dir + scale_tmp * shift
														
 
															+    src[2, :] = _get_3rd_point(src[0, :], src[1, :])
														
 
															+
														
 
															+    dst = np.zeros((3, 2), dtype=np.float32)
														
 
															+    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
														
 
															+    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
														
 
															+    dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
														
 
															+
														
 
															+    if inv:
														
 
															+        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
														
 
															+    else:
														
 
															+        trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
														
 
															+
														
 
															+    return trans
														
 
															+
														
 
															+
														
 
															+class WarpAffine(object):
														
 
															+    """Warp affine the image
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 keep_res=False,
														
 
															+                 pad=31,
														
 
															+                 input_h=512,
														
 
															+                 input_w=512,
														
 
															+                 scale=0.4,
														
 
															+                 shift=0.1,
														
 
															+                 down_ratio=4):
														
 
															+        self.keep_res = keep_res
														
 
															+        self.pad = pad
														
 
															+        self.input_h = input_h
														
 
															+        self.input_w = input_w
														
 
															+        self.scale = scale
														
 
															+        self.shift = shift
														
 
															+        self.down_ratio = down_ratio
														
 
															+
														
 
															+    def __call__(self, data):
														
 
															+
														
 
															+        im = data['image']
														
 
															+        img = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
														
 
															+
														
 
															+        h, w = img.shape[:2]
														
 
															+
														
 
															+        if self.keep_res:
														
 
															+            # True in detection eval/infer
														
 
															+            input_h = (h | self.pad) + 1
														
 
															+            input_w = (w | self.pad) + 1
														
 
															+            s = np.array([input_w, input_h], dtype=np.float32)
														
 
															+            c = np.array([w // 2, h // 2], dtype=np.float32)
														
 
															+
														
 
															+        else:
														
 
															+            # False in centertrack eval_mot/eval_mot
														
 
															+            s = max(h, w) * 1.0
														
 
															+            input_h, input_w = self.input_h, self.input_w
														
 
															+            c = np.array([w / 2., h / 2.], dtype=np.float32)
														
 
															+
														
 
															+        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
														
 
															+        img = cv2.resize(img, (w, h))
														
 
															+        inp = cv2.warpAffine(
														
 
															+            img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
														
 
															+
														
 
															+        if not self.keep_res:
														
 
															+            out_h = input_h // self.down_ratio
														
 
															+            out_w = input_w // self.down_ratio
														
 
															+            trans_output = get_affine_transform(c, s, 0, [out_w, out_h])
														
 
															+
														
 
															+        data['image'] = inp
														
 
															+
														
 
															+        im_scale_w, im_scale_h = [
														
 
															+            input_w / w, input_h / h
														
 
															+        ]
														
 
															+        data['image_size'] = [inp.shape[1], inp.shape[0]]
														
 
															+        data['scale_factors'] = [im_scale_w, im_scale_h]
														
 
															+        return data
														
 
															+
														
 
															+    @classmethod
														
 
															+    def get_input_keys(cls):
														
 
															+        """ get input keys """
														
 
															+        # image: Image in hwc format.
														
 
															+        return ['image']
														
 
															+
														
 
															+    @classmethod
														
 
															+    def get_output_keys(cls):
														
 
															+        """ get output keys """
														
 
															+        # image: Image in chw format.
														
 
															+        return ["image"]
														
--- a/paddlex/modules/base/predictor/utils/official_models.py
+++ b/paddlex/modules/base/predictor/utils/official_models.py
@@ -214,6 +214,24 @@ openatom_rec_svtrv2_ch_infer.tar",
 
															     "TimesNet_ad": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/TimesNet_ad_infer.tar",
														
 
															     "TimesNet_cls": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/TimesNet_cls_infer.tar",
														
 
															     "STFPM": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/STFPM_infer.tar",
														
 
															+    "FCOS-ResNet50": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/FCOS-ResNet50_infer.tar",
														
 
															+    "DETR-R50": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/DETR-R50_infer.tar",
														
 
															+    "PP-YOLOE-L_vehicle": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE-L_vehicle_infer.tar",
														
 
															+    "PP-YOLOE-S_vehicle": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE-S_vehicle_infer.tar",
														
 
															+    "PP-ShiTuV2_det": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-ShiTuV2_det_infer.tar",
														
 
															+    "PP-YOLOE-S_human": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE-S_human_infer.tar",
														
 
															+    "PP-YOLOE-L_human": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE-L_human_infer.tar",
														
 
															+    "PicoDet-M": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PicoDet-M_infer.tar",
														
 
															+    "PicoDet-XS": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PicoDet-XS_infer.tar",
														
 
															+    "PP-YOLOE+_SOD-L": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE+_SOD-L_infer.tar",
														
 
															+    "PP-YOLOE+_SOD-S": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE+_SOD-S_infer.tar",
														
 
															+    "PP-YOLOE+_SOD-largesize-L": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PP-YOLOE+_SOD-largesize-L_infer.tar",
														
 
															+    "CenterNet-DLA-34": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/CenterNet-DLA-34_infer.tar",
														
 
															+    "CenterNet-ResNet50": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/CenterNet-ResNet50_infer.tar",
														
 
															+    "PicoDet-L_layout": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PicoDet-L_layout_infer.tar",
														
 
															+    "RT-DETR-H_layout_3cls": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/RT-DETR-H_layout_3cls_infer.tar",
														
 
															+    "RT-DETR-H_layout_17cls": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/RT-DETR-H_layout_17cls_infer.tar",
														
 
															+    "PicoDet_LCNet_x2_5_face": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0b1/PicoDet_LCNet_x2_5_face_infer.tar",
														
 
															 }
														
--- a/paddlex/modules/object_detection/model_list.py
+++ b/paddlex/modules/object_detection/model_list.py
@@ -26,6 +26,9 @@ MODELS = [
 
															     "RT-DETR-R50",
														
 
															     "RT-DETR-X",
														
 
															     "PicoDet_layout_1x",
														
 
															+    "PicoDet-L_layout",
														
 
															+    "RT-DETR-H_layout_3cls",
														
 
															+    "RT-DETR-H_layout_17cls",
														
 
															     "YOLOv3-DarkNet53",
														
 
															     "YOLOv3-MobileNetV3",
														
 
															     "YOLOv3-ResNet50_vd_DCN",
														
@@ -46,4 +49,19 @@ MODELS = [
 
															     "FasterRCNN-Swin-Tiny-FPN",
														
 
															     "Cascade-FasterRCNN-ResNet50-FPN",
														
 
															     "Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN",
														
 
															+    "PicoDet-M",
														
 
															+    "PicoDet-XS",
														
 
															+    "FCOS-ResNet50",
														
 
															+    "DETR-R50",
														
 
															+    "PP-ShiTuV2_det",
														
 
															+    "PP-YOLOE-L_human",
														
 
															+    "PP-YOLOE-S_human",
														
 
															+    "PP-YOLOE-L_vehicle",
														
 
															+    "PP-YOLOE-S_vehicle",
														
 
															+    "PP-YOLOE+_SOD-L",
														
 
															+    "PP-YOLOE+_SOD-S",
														
 
															+    "PP-YOLOE+_SOD-largesize-L",
														
 
															+    "CenterNet-DLA-34",
														
 
															+    "CenterNet-ResNet50",
														
 
															+    "PicoDet_LCNet_x2_5_face",
														
 
															 ]
														
--- a/paddlex/modules/object_detection/predictor/utils.py
+++ b/paddlex/modules/object_detection/predictor/utils.py
@@ -74,6 +74,11 @@ class InnerConfig(object):
 
															                 fill_value = cfg.get("fill_value", [114.0, 114.0, 114.0])
														
 
															                 size = cfg.get("size", [640, 640])
														
 
															                 tf = Pad(size=size, fill_value=fill_value)
														
 
															+            elif cfg['type'] == 'WarpAffine':
														
 
															+                input_h = cfg.get('input_h', 512)
														
 
															+                input_w = cfg.get('input_w', 512)
														
 
															+                keep_res = cfg.get('keep_res', True)
														
 
															+                tf = image_common.WarpAffine(input_h=input_h, input_w=input_w, keep_res=keep_res)
														
 
															             else:
														
 
															                 raise RuntimeError(f"Unsupported type: {cfg['type']}")
														
 
															             tfs.append(tf)
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/CenterNet-DLA-34.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/CenterNet-DLA-34.yaml
@@ -0,0 +1,119 @@
 
															+# Runtime
														
 
															+use_ema: true
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+log_iter: 20
														
 
															+save_dir: output
														
 
															+snapshot_epoch: 1
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 80
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: train2017
														
 
															+  anno_path: annotations/instances_train2017.json
														
 
															+  dataset_dir: dataset/coco
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: val2017
														
 
															+  anno_path: annotations/instances_val2017.json
														
 
															+  dataset_dir: dataset/coco
														
 
															+  allow_empty: true
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
														
 
															+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
														
 
															+
														
 
															+
														
 
															+# Reader
														
 
															+worker_num: 4
														
 
															+TrainReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [3, 512, 512]
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - FlipWarpAffine: {keep_res: False, input_h: 512, input_w: 512, use_random: True}
														
 
															+    - CenterRandColor: {}
														
 
															+    - Lighting: {eigval: [0.2141788, 0.01817699, 0.00341571], eigvec: [[-0.58752847, -0.69563484, 0.41340352], [-0.5832747, 0.00994535, -0.81221408], [-0.56089297, 0.71832671, 0.41158938]]}
														
 
															+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: False}
														
 
															+    - Permute: {}
														
 
															+    - Gt2CenterNetTarget: {down_ratio: 4, max_objs: 128}
														
 
															+  batch_size: 16
														
 
															+  shuffle: True
														
 
															+  drop_last: True
														
 
															+  use_shared_memory: True
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
														
 
															+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834]}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [3, 512, 512]
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
														
 
															+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: True}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+# Model
														
 
															+architecture: CenterNet
														
 
															+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/DLA34_pretrain.pdparams
														
 
															+
														
 
															+CenterNet:
														
 
															+  backbone: DLA
														
 
															+  neck: CenterNetDLAFPN
														
 
															+  head: CenterNetHead
														
 
															+  post_process: CenterNetPostProcess
														
 
															+
														
 
															+DLA:
														
 
															+  depth: 34
														
 
															+
														
 
															+CenterNetDLAFPN:
														
 
															+  down_ratio: 4
														
 
															+
														
 
															+CenterNetHead:
														
 
															+  head_planes: 256
														
 
															+  regress_ltrb: False
														
 
															+
														
 
															+CenterNetPostProcess:
														
 
															+  max_per_img: 100
														
 
															+  regress_ltrb: False
														
 
															+
														
 
															+# Optimizer
														
 
															+epoch: 140
														
 
															+
														
 
															+LearningRate:
														
 
															+  base_lr: 0.0005
														
 
															+  schedulers:
														
 
															+  - !PiecewiseDecay
														
 
															+    gamma: 0.1
														
 
															+    milestones: [90, 120]
														
 
															+    use_warmup: False
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    type: Adam
														
 
															+  regularizer: NULL
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/CenterNet-ResNet50.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/CenterNet-ResNet50.yaml
@@ -0,0 +1,130 @@
 
															+# Runtime
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+log_iter: 20
														
 
															+save_dir: output
														
 
															+snapshot_epoch: 1
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+use_ema: true
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 80
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: train2017
														
 
															+  anno_path: annotations/instances_train2017.json
														
 
															+  dataset_dir: dataset/coco
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: val2017
														
 
															+  anno_path: annotations/instances_val2017.json
														
 
															+  dataset_dir: dataset/coco
														
 
															+  allow_empty: true
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
														
 
															+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
														
 
															+
														
 
															+# Reader
														
 
															+worker_num: 4
														
 
															+TrainReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [3, 512, 512]
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - FlipWarpAffine: {keep_res: False, input_h: 512, input_w: 512, use_random: True}
														
 
															+    - CenterRandColor: {}
														
 
															+    - Lighting: {eigval: [0.2141788, 0.01817699, 0.00341571], eigvec: [[-0.58752847, -0.69563484, 0.41340352], [-0.5832747, 0.00994535, -0.81221408], [-0.56089297, 0.71832671, 0.41158938]]}
														
 
															+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: False}
														
 
															+    - Permute: {}
														
 
															+    - Gt2CenterNetTarget: {down_ratio: 4, max_objs: 128}
														
 
															+  batch_size: 16
														
 
															+  shuffle: True
														
 
															+  drop_last: True
														
 
															+  use_shared_memory: True
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
														
 
															+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834]}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [3, 512, 512]
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
														
 
															+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: True}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+# Model
														
 
															+architecture: CenterNet
														
 
															+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
														
 
															+norm_type: sync_bn
														
 
															+use_ema: true
														
 
															+ema_decay: 0.9998
														
 
															+
														
 
															+CenterNet:
														
 
															+  backbone: ResNet
														
 
															+  neck: CenterNetDLAFPN
														
 
															+  head: CenterNetHead
														
 
															+  post_process: CenterNetPostProcess
														
 
															+
														
 
															+ResNet:
														
 
															+  depth: 50
														
 
															+  variant: d
														
 
															+  return_idx: [0, 1, 2, 3]
														
 
															+  freeze_at: -1
														
 
															+  norm_decay: 0.
														
 
															+  dcn_v2_stages: [3]
														
 
															+
														
 
															+
														
 
															+CenterNetDLAFPN:
														
 
															+  first_level: 0
														
 
															+  last_level: 4
														
 
															+  down_ratio: 4
														
 
															+  dcn_v2: False
														
 
															+
														
 
															+CenterNetHead:
														
 
															+  head_planes: 256
														
 
															+  regress_ltrb: False
														
 
															+
														
 
															+CenterNetPostProcess:
														
 
															+  max_per_img: 100
														
 
															+  regress_ltrb: False
														
 
															+
														
 
															+# Optimizer
														
 
															+epoch: 140
														
 
															+
														
 
															+LearningRate:
														
 
															+  base_lr: 0.0005
														
 
															+  schedulers:
														
 
															+  - !PiecewiseDecay
														
 
															+    gamma: 0.1
														
 
															+    milestones: [90, 120]
														
 
															+    use_warmup: False
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    type: Adam
														
 
															+  regularizer: NULL
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/DETR-R50.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/DETR-R50.yaml
@@ -0,0 +1,153 @@
 
															+# Runtime
														
 
															+use_ema: true
														
 
															+find_unused_parameters: True
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+log_iter: 20
														
 
															+save_dir: output
														
 
															+snapshot_epoch: 1
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 80
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: train2017
														
 
															+  anno_path: annotations/instances_train2017.json
														
 
															+  dataset_dir: dataset/coco
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: val2017
														
 
															+  anno_path: annotations/instances_val2017.json
														
 
															+  dataset_dir: dataset/coco
														
 
															+  allow_empty: true
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
														
 
															+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
														
 
															+
														
 
															+
														
 
															+# Reader
														
 
															+worker_num: 0
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - RandomFlip: {prob: 0.5}
														
 
															+  - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
														
 
															+                    transforms2: [
														
 
															+                        RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
														
 
															+                        RandomSizeCrop: { min_size: 384, max_size: 600 },
														
 
															+                        RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
														
 
															+  }
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - NormalizeBox: {}
														
 
															+  - BboxXYXY2XYWH: {}
														
 
															+  - Permute: {}
														
 
															+  batch_transforms:
														
 
															+  - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
														
 
															+  batch_size: 2
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+  collate_batch: false
														
 
															+  use_shared_memory: false
														
 
															+
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {target_size: [800, 1333], keep_ratio: True}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  batch_size: 1
														
 
															+  shuffle: false
														
 
															+  drop_last: false
														
 
															+
														
 
															+
														
 
															+TestReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {target_size: [800, 1333], keep_ratio: True}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  batch_size: 1
														
 
															+  shuffle: false
														
 
															+  drop_last: false
														
 
															+
														
 
															+# Model
														
 
															+architecture: DETR
														
 
															+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams
														
 
															+hidden_dim: 256
														
 
															+
														
 
															+
														
 
															+DETR:
														
 
															+  backbone: ResNet
														
 
															+  transformer: DETRTransformer
														
 
															+  detr_head: DETRHead
														
 
															+  post_process: DETRPostProcess
														
 
															+
														
 
															+
														
 
															+ResNet:
														
 
															+  # index 0 stands for res2
														
 
															+  depth: 50
														
 
															+  norm_type: bn
														
 
															+  freeze_at: 0
														
 
															+  return_idx: [3]
														
 
															+  lr_mult_list: [0.0, 0.1, 0.1, 0.1]
														
 
															+  num_stages: 4
														
 
															+
														
 
															+
														
 
															+DETRTransformer:
														
 
															+  num_queries: 100
														
 
															+  position_embed_type: sine
														
 
															+  nhead: 8
														
 
															+  num_encoder_layers: 6
														
 
															+  num_decoder_layers: 6
														
 
															+  dim_feedforward: 2048
														
 
															+  dropout: 0.1
														
 
															+  activation: relu
														
 
															+
														
 
															+
														
 
															+DETRHead:
														
 
															+  num_mlp_layers: 3
														
 
															+
														
 
															+
														
 
															+DETRLoss:
														
 
															+  loss_coeff: {class: 1, bbox: 5, giou: 2, no_object: 0.1}
														
 
															+  aux_loss: True
														
 
															+
														
 
															+
														
 
															+HungarianMatcher:
														
 
															+  matcher_coeff: {class: 1, bbox: 5, giou: 2}
														
 
															+
														
 
															+# Optimizer
														
 
															+epoch: 500
														
 
															+
														
 
															+LearningRate:
														
 
															+  base_lr: 0.0001
														
 
															+  schedulers:
														
 
															+  - !PiecewiseDecay
														
 
															+    gamma: 0.1
														
 
															+    milestones: [400]
														
 
															+    use_warmup: false
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  clip_grad_by_norm: 0.1
														
 
															+  regularizer: false
														
 
															+  optimizer:
														
 
															+    type: AdamW
														
 
															+    weight_decay: 0.0001
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/FCOS-ResNet50.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/FCOS-ResNet50.yaml
@@ -0,0 +1,157 @@
 
															+# Runtime
														
 
															+use_ema: true
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+log_iter: 20
														
 
															+save_dir: output
														
 
															+snapshot_epoch: 1
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 80
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: train2017
														
 
															+  anno_path: annotations/instances_train2017.json
														
 
															+  dataset_dir: dataset/coco
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: val2017
														
 
															+  anno_path: annotations/instances_val2017.json
														
 
															+  dataset_dir: dataset/coco
														
 
															+  allow_empty: true
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
														
 
															+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
														
 
															+
														
 
															+
														
 
															+# Reader
														
 
															+worker_num: 2
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1}
														
 
															+  - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+  - RandomFlip: {}
														
 
															+  batch_transforms:
														
 
															+  - Permute: {}
														
 
															+  - PadBatch: {pad_to_stride: 128}
														
 
															+  - Gt2FCOSTarget:
														
 
															+      object_sizes_boundary: [64, 128, 256, 512]
														
 
															+      center_sampling_radius: 1.5
														
 
															+      downsample_ratios: [8, 16, 32, 64, 128]
														
 
															+      norm_reg_targets: True
														
 
															+  batch_size: 2
														
 
															+  shuffle: True
														
 
															+  drop_last: True
														
 
															+  use_shared_memory: True
														
 
															+
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1}
														
 
															+  - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+  - Permute: {}
														
 
															+  batch_transforms:
														
 
															+  - PadBatch: {pad_to_stride: 128}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+
														
 
															+TestReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1}
														
 
															+  - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+  - Permute: {}
														
 
															+  batch_transforms:
														
 
															+  - PadBatch: {pad_to_stride: 128}
														
 
															+  batch_size: 1
														
 
															+  fuse_normalize: True
														
 
															+
														
 
															+# Model
														
 
															+architecture: FCOS
														
 
															+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
														
 
															+
														
 
															+FCOS:
														
 
															+  backbone: ResNet
														
 
															+  neck: FPN
														
 
															+  fcos_head: FCOSHead
														
 
															+
														
 
															+ResNet:
														
 
															+  depth: 50
														
 
															+  variant: 'b'
														
 
															+  norm_type: bn
														
 
															+  freeze_at: 0 # res2
														
 
															+  return_idx: [1, 2, 3]
														
 
															+  num_stages: 4
														
 
															+
														
 
															+FPN:
														
 
															+  out_channel: 256
														
 
															+  spatial_scales: [0.125, 0.0625, 0.03125]
														
 
															+  extra_stage: 2
														
 
															+  has_extra_convs: True
														
 
															+  use_c5: False
														
 
															+
														
 
															+FCOSHead:
														
 
															+  fcos_feat:
														
 
															+    name: FCOSFeat
														
 
															+    feat_in: 256
														
 
															+    feat_out: 256
														
 
															+    num_convs: 4
														
 
															+    norm_type: "gn"
														
 
															+    use_dcn: False
														
 
															+  fpn_stride: [8, 16, 32, 64, 128]
														
 
															+  prior_prob: 0.01
														
 
															+  norm_reg_targets: True
														
 
															+  centerness_on_reg: True
														
 
															+  num_shift: 0.5
														
 
															+  fcos_loss:
														
 
															+    name: FCOSLoss
														
 
															+    loss_alpha: 0.25
														
 
															+    loss_gamma: 2.0
														
 
															+    iou_loss_type: "giou"
														
 
															+    reg_weights: 1.0
														
 
															+  nms:
														
 
															+    name: MultiClassNMS
														
 
															+    nms_top_k: 1000
														
 
															+    keep_top_k: 100
														
 
															+    score_threshold: 0.025
														
 
															+    nms_threshold: 0.6
														
 
															+
														
 
															+# Optimizer
														
 
															+epoch: 12
														
 
															+
														
 
															+LearningRate:
														
 
															+  base_lr: 0.01
														
 
															+  schedulers:
														
 
															+  - !PiecewiseDecay
														
 
															+    gamma: 0.1
														
 
															+    milestones: [8, 11]
														
 
															+  - !LinearWarmup
														
 
															+    start_factor: 0.3333333333333333
														
 
															+    steps: 500
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    momentum: 0.9
														
 
															+    type: Momentum
														
 
															+  regularizer:
														
 
															+    factor: 0.0001
														
 
															+    type: L2
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/PP-ShiTuV2_det.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/PP-ShiTuV2_det.yaml
@@ -0,0 +1,169 @@
 
															+# Runtime
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+log_iter: 20
														
 
															+save_dir: output
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+find_unused_parameters: True
														
 
															+use_ema: true
														
 
															+cycle_epoch: 20
														
 
															+snapshot_epoch: 2
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 80
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: train2017
														
 
															+  anno_path: annotations/instances_train2017.json
														
 
															+  dataset_dir: dataset/coco
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: val2017
														
 
															+  anno_path: annotations/instances_val2017.json
														
 
															+  dataset_dir: dataset/coco
														
 
															+  allow_empty: true
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
														
 
															+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
														
 
															+
														
 
															+
														
 
															+# Reader
														
 
															+worker_num: 6
														
 
															+eval_height: &eval_height 640
														
 
															+eval_width: &eval_width 640
														
 
															+eval_size: &eval_size [*eval_height, *eval_width]
														
 
															+
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - RandomCrop: {}
														
 
															+  - RandomFlip: {prob: 0.5}
														
 
															+  - RandomDistort: {}
														
 
															+  batch_transforms:
														
 
															+  - BatchRandomResize: {target_size: [576, 608, 640, 672, 704], random_size: True, random_interp: True, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  batch_size: 56
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+  collate_batch: false
														
 
															+
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  batch_transforms:
														
 
															+  - PadBatch: {pad_to_stride: 32}
														
 
															+  batch_size: 8
														
 
															+  shuffle: false
														
 
															+
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [1, 3, *eval_height, *eval_width]
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+# Model
														
 
															+architecture: PicoDet
														
 
															+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/LCNet_x2_5_ssld_pretrained.pdparams
														
 
															+
														
 
															+PicoDet:
														
 
															+  backbone: LCNet
														
 
															+  neck: CSPPAN
														
 
															+  head: PicoHead
														
 
															+
														
 
															+LCNet:
														
 
															+  scale: 2.5
														
 
															+  feature_maps: [3, 4, 5]
														
 
															+
														
 
															+ESNet:
														
 
															+  scale: 1.0
														
 
															+  feature_maps: [4, 11, 14]
														
 
															+  act: hard_swish
														
 
															+  channel_ratio: [0.875, 0.5, 1.0, 0.625, 0.5, 0.75, 0.625, 0.625, 0.5, 0.625, 1.0, 0.625, 0.75]
														
 
															+
														
 
															+CSPPAN:
														
 
															+  out_channels: 128
														
 
															+  use_depthwise: True
														
 
															+  num_csp_blocks: 1
														
 
															+  num_features: 4
														
 
															+
														
 
															+PicoHead:
														
 
															+  conv_feat:
														
 
															+    name: PicoFeat
														
 
															+    feat_in: 128
														
 
															+    feat_out: 128
														
 
															+    num_convs: 4
														
 
															+    num_fpn_stride: 4
														
 
															+    norm_type: bn
														
 
															+    share_cls_reg: True
														
 
															+  fpn_stride: [8, 16, 32, 64]
														
 
															+  feat_in_chan: 128
														
 
															+  prior_prob: 0.01
														
 
															+  reg_max: 7
														
 
															+  cell_offset: 0.5
														
 
															+  loss_class:
														
 
															+    name: VarifocalLoss
														
 
															+    use_sigmoid: True
														
 
															+    iou_weighted: True
														
 
															+    loss_weight: 1.0
														
 
															+  loss_dfl:
														
 
															+    name: DistributionFocalLoss
														
 
															+    loss_weight: 0.25
														
 
															+  loss_bbox:
														
 
															+    name: GIoULoss
														
 
															+    loss_weight: 2.0
														
 
															+  assigner:
														
 
															+    name: SimOTAAssigner
														
 
															+    candidate_topk: 10
														
 
															+    iou_weight: 6
														
 
															+  nms:
														
 
															+    name: MultiClassNMS
														
 
															+    nms_top_k: 1000
														
 
															+    keep_top_k: 100
														
 
															+    score_threshold: 0.025
														
 
															+    nms_threshold: 0.6
														
 
															+
														
 
															+# Optimizer
														
 
															+epoch: 100
														
 
															+
														
 
															+LearningRate:
														
 
															+  base_lr: 0.4
														
 
															+  schedulers:
														
 
															+  - name: CosineDecay
														
 
															+    max_epochs: 100
														
 
															+  - name: LinearWarmup
														
 
															+    start_factor: 0.1
														
 
															+    steps: 300
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    momentum: 0.9
														
 
															+    type: Momentum
														
 
															+  regularizer:
														
 
															+    factor: 0.00004
														
 
															+    type: L2
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE+_SOD-L.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE+_SOD-L.yaml
@@ -0,0 +1,164 @@
 
															+# Runtime
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+save_dir: output
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+log_iter: 100
														
 
															+snapshot_epoch: 10
														
 
															+use_ema: true
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 10
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: VisDrone2019-DET-train
														
 
															+  anno_path: train.json
														
 
															+  dataset_dir: dataset/visdrone
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: VisDrone2019-DET-val
														
 
															+  anno_path: val.json
														
 
															+  dataset_dir: dataset/visdrone
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: val.json
														
 
															+  dataset_dir: dataset/visdrone
														
 
															+
														
 
															+#reader
														
 
															+worker_num: 4
														
 
															+eval_height: &eval_height 640
														
 
															+eval_width: &eval_width 640
														
 
															+eval_size: &eval_size [*eval_height, *eval_width]
														
 
															+
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - RandomDistort: {}
														
 
															+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
														
 
															+    - RandomCrop: {}
														
 
															+    - RandomFlip: {}
														
 
															+  batch_transforms:
														
 
															+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - Permute: {}
														
 
															+    - PadGT: {}
														
 
															+  batch_size: 8
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+  use_shared_memory: true
														
 
															+  collate_batch: true
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [3, *eval_height, *eval_width]
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+  fuse_normalize: True
														
 
															+
														
 
															+# Model
														
 
															+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco.pdparams
														
 
															+depth_mult: 1.0
														
 
															+width_mult: 1.0
														
 
															+
														
 
															+architecture: YOLOv3
														
 
															+norm_type: sync_bn
														
 
															+use_ema: true
														
 
															+ema_decay: 0.9998
														
 
															+ema_black_list: ['proj_conv.weight']
														
 
															+custom_black_list: ['reduce_mean']
														
 
															+
														
 
															+YOLOv3:
														
 
															+  backbone: CSPResNet
														
 
															+  neck: CustomCSPPAN
														
 
															+  yolo_head: PPYOLOEHead
														
 
															+  post_process: ~
														
 
															+
														
 
															+CSPResNet:
														
 
															+  layers: [3, 6, 6, 3]
														
 
															+  channels: [64, 128, 256, 512, 1024]
														
 
															+  return_idx: [1, 2, 3]
														
 
															+  use_large_stem: True
														
 
															+  use_alpha: True
														
 
															+
														
 
															+CustomCSPPAN:
														
 
															+  out_channels: [768, 384, 192]
														
 
															+  stage_num: 1
														
 
															+  block_num: 3
														
 
															+  act: 'swish'
														
 
															+  spp: true
														
 
															+  num_layers: 4
														
 
															+  use_trans: True
														
 
															+
														
 
															+
														
 
															+PPYOLOEHead:
														
 
															+  reg_range: [-2,8]
														
 
															+  fpn_strides: [32, 16, 8]
														
 
															+  grid_cell_scale: 5.0
														
 
															+  grid_cell_offset: 0.5
														
 
															+  static_assigner_epoch: -1
														
 
															+  use_varifocal_loss: True
														
 
															+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
														
 
															+  static_assigner:
														
 
															+    name: ATSSAssigner
														
 
															+    topk: 9
														
 
															+  assigner:
														
 
															+    name: TaskAlignedAssigner_CR
														
 
															+    center_radius: 1
														
 
															+    topk: 13
														
 
															+    alpha: 1.0
														
 
															+    beta: 6.0
														
 
															+  nms:
														
 
															+    name: MultiClassNMS
														
 
															+    nms_top_k: 1000
														
 
															+    keep_top_k: 500
														
 
															+    score_threshold: 0.01
														
 
															+    nms_threshold: 0.6
														
 
															+
														
 
															+
														
 
															+# Optimizer
														
 
															+
														
 
															+epoch: 80
														
 
															+LearningRate:
														
 
															+  base_lr: 0.01
														
 
															+  schedulers:
														
 
															+    - !CosineDecay
														
 
															+      max_epochs: 96
														
 
															+    - !LinearWarmup
														
 
															+      start_factor: 0.
														
 
															+      epochs: 1
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    momentum: 0.9
														
 
															+    type: Momentum
														
 
															+  regularizer:
														
 
															+    factor: 0.0005
														
 
															+    type: L2
														
 
															+
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE+_SOD-S.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE+_SOD-S.yaml
@@ -0,0 +1,164 @@
 
															+# Runtime
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+save_dir: output
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+log_iter: 100
														
 
															+snapshot_epoch: 10
														
 
															+use_ema: true
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 10
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: VisDrone2019-DET-train
														
 
															+  anno_path: train.json
														
 
															+  dataset_dir: dataset/visdrone
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: VisDrone2019-DET-val
														
 
															+  anno_path: val.json
														
 
															+  dataset_dir: dataset/visdrone
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: val.json
														
 
															+  dataset_dir: dataset/visdrone
														
 
															+
														
 
															+#reader
														
 
															+worker_num: 4
														
 
															+eval_height: &eval_height 640
														
 
															+eval_width: &eval_width 640
														
 
															+eval_size: &eval_size [*eval_height, *eval_width]
														
 
															+
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - RandomDistort: {}
														
 
															+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
														
 
															+    - RandomCrop: {}
														
 
															+    - RandomFlip: {}
														
 
															+  batch_transforms:
														
 
															+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - Permute: {}
														
 
															+    - PadGT: {}
														
 
															+  batch_size: 8
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+  use_shared_memory: true
														
 
															+  collate_batch: true
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [3, *eval_height, *eval_width]
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+  fuse_normalize: True
														
 
															+
														
 
															+# Model
														
 
															+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_s_80e_coco.pdparams
														
 
															+depth_mult: 0.33
														
 
															+width_mult: 0.50
														
 
															+
														
 
															+architecture: YOLOv3
														
 
															+norm_type: sync_bn
														
 
															+use_ema: true
														
 
															+ema_decay: 0.9998
														
 
															+ema_black_list: ['proj_conv.weight']
														
 
															+custom_black_list: ['reduce_mean']
														
 
															+
														
 
															+YOLOv3:
														
 
															+  backbone: CSPResNet
														
 
															+  neck: CustomCSPPAN
														
 
															+  yolo_head: PPYOLOEHead
														
 
															+  post_process: ~
														
 
															+
														
 
															+CSPResNet:
														
 
															+  layers: [3, 6, 6, 3]
														
 
															+  channels: [64, 128, 256, 512, 1024]
														
 
															+  return_idx: [1, 2, 3]
														
 
															+  use_large_stem: True
														
 
															+  use_alpha: True
														
 
															+
														
 
															+CustomCSPPAN:
														
 
															+  out_channels: [768, 384, 192]
														
 
															+  stage_num: 1
														
 
															+  block_num: 3
														
 
															+  act: 'swish'
														
 
															+  spp: true
														
 
															+  num_layers: 4
														
 
															+  use_trans: True
														
 
															+
														
 
															+
														
 
															+PPYOLOEHead:
														
 
															+  reg_range: [-2,8]
														
 
															+  static_assigner_epoch: -1
														
 
															+  fpn_strides: [32, 16, 8]
														
 
															+  grid_cell_scale: 5.0
														
 
															+  grid_cell_offset: 0.5
														
 
															+  use_varifocal_loss: True
														
 
															+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
														
 
															+  static_assigner:
														
 
															+    name: ATSSAssigner
														
 
															+    topk: 9
														
 
															+  assigner:
														
 
															+    name: TaskAlignedAssigner_CR
														
 
															+    center_radius: 1
														
 
															+    topk: 13
														
 
															+    alpha: 1.0
														
 
															+    beta: 6.0
														
 
															+  nms:
														
 
															+    name: MultiClassNMS
														
 
															+    nms_top_k: 1000
														
 
															+    keep_top_k: 500
														
 
															+    score_threshold: 0.01
														
 
															+    nms_threshold: 0.6
														
 
															+
														
 
															+
														
 
															+# Optimizer
														
 
															+
														
 
															+epoch: 80
														
 
															+LearningRate:
														
 
															+  base_lr: 0.01
														
 
															+  schedulers:
														
 
															+    - !CosineDecay
														
 
															+      max_epochs: 96
														
 
															+    - !LinearWarmup
														
 
															+      start_factor: 0.
														
 
															+      epochs: 1
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    momentum: 0.9
														
 
															+    type: Momentum
														
 
															+  regularizer:
														
 
															+    factor: 0.0005
														
 
															+    type: L2
														
 
															+
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE+_SOD-largesize-L.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE+_SOD-largesize-L.yaml
@@ -0,0 +1,166 @@
 
															+# Runtime
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+save_dir: output
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+log_iter: 5
														
 
															+snapshot_epoch: 10
														
 
															+use_ema: true
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 10
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: VisDrone2019-DET-train
														
 
															+  anno_path: train.json
														
 
															+  dataset_dir: dataset/visdrone
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: VisDrone2019-DET-val
														
 
															+  anno_path: val.json
														
 
															+  # image_dir: test_dev
														
 
															+  # anno_path: test_dev.json
														
 
															+  dataset_dir: dataset/visdrone
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: val.json
														
 
															+  dataset_dir: dataset/visdrone
														
 
															+
														
 
															+#reader
														
 
															+worker_num: 2
														
 
															+eval_height: &eval_height 1920
														
 
															+eval_width: &eval_width 1920
														
 
															+eval_size: &eval_size [*eval_height, *eval_width]
														
 
															+
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - RandomDistort: {}
														
 
															+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
														
 
															+    - RandomCrop: {}
														
 
															+    - RandomFlip: {}
														
 
															+  batch_transforms:
														
 
															+    - BatchRandomResize: {target_size: [1024, 1088, 1152, 1216, 1280, 1344, 1408, 1472, 1536, 1600, 1664, 1728, 1792, 1856, 1920], random_size: True, random_interp: True, keep_ratio: False}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - Permute: {}
														
 
															+    - PadGT: {}
														
 
															+  batch_size: 1
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+  use_shared_memory: true
														
 
															+  collate_batch: true
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [3, *eval_height, *eval_width]
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+  fuse_normalize: True
														
 
															+
														
 
															+# Model
														
 
															+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco.pdparams
														
 
															+depth_mult: 1.0
														
 
															+width_mult: 1.0
														
 
															+
														
 
															+architecture: YOLOv3
														
 
															+norm_type: sync_bn
														
 
															+use_ema: true
														
 
															+ema_decay: 0.9998
														
 
															+ema_black_list: ['proj_conv.weight']
														
 
															+custom_black_list: ['reduce_mean']
														
 
															+
														
 
															+YOLOv3:
														
 
															+  backbone: CSPResNet
														
 
															+  neck: CustomCSPPAN
														
 
															+  yolo_head: PPYOLOEHead
														
 
															+  post_process: ~
														
 
															+
														
 
															+CSPResNet:
														
 
															+  layers: [3, 6, 6, 3]
														
 
															+  channels: [64, 128, 256, 512, 1024]
														
 
															+  return_idx: [1, 2, 3]
														
 
															+  use_large_stem: True
														
 
															+  use_alpha: True
														
 
															+
														
 
															+CustomCSPPAN:
														
 
															+  out_channels: [768, 384, 192]
														
 
															+  stage_num: 1
														
 
															+  block_num: 3
														
 
															+  act: 'swish'
														
 
															+  spp: true
														
 
															+  num_layers: 4
														
 
															+  use_trans: True
														
 
															+
														
 
															+
														
 
															+PPYOLOEHead:
														
 
															+  reg_range: [-2,20]
														
 
															+  static_assigner_epoch: -1
														
 
															+  fpn_strides: [32, 16, 8]
														
 
															+  grid_cell_scale: 5.0
														
 
															+  grid_cell_offset: 0.5
														
 
															+  use_varifocal_loss: True
														
 
															+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
														
 
															+  static_assigner:
														
 
															+    name: ATSSAssigner
														
 
															+    topk: 9
														
 
															+  assigner:
														
 
															+    name: TaskAlignedAssigner_CR
														
 
															+    center_radius: 1
														
 
															+    topk: 13
														
 
															+    alpha: 1.0
														
 
															+    beta: 6.0
														
 
															+  nms:
														
 
															+    name: MultiClassNMS
														
 
															+    nms_top_k: 1000
														
 
															+    keep_top_k: 500
														
 
															+    score_threshold: 0.01
														
 
															+    nms_threshold: 0.6
														
 
															+
														
 
															+
														
 
															+# Optimizer
														
 
															+
														
 
															+epoch: 80
														
 
															+LearningRate:
														
 
															+  base_lr: 0.00125
														
 
															+  schedulers:
														
 
															+    - !CosineDecay
														
 
															+      max_epochs: 96
														
 
															+    - !LinearWarmup
														
 
															+      start_factor: 0.
														
 
															+      epochs: 1
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    momentum: 0.9
														
 
															+    type: Momentum
														
 
															+  regularizer:
														
 
															+    factor: 0.0005
														
 
															+    type: L2
														
 
															+
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-L_human.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-L_human.yaml
@@ -0,0 +1,159 @@
 
															+# Runtime
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+save_dir: output
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+use_ema: true
														
 
															+log_iter: 100
														
 
															+snapshot_epoch: 4
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+
														
 
															+num_classes: 1
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: ""
														
 
															+  anno_path: annotations/train.json
														
 
															+  dataset_dir: dataset/crowdhuman
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: ""
														
 
															+  anno_path: annotations/val.json
														
 
															+  dataset_dir: dataset/crowdhuman
														
 
															+  allow_empty: true
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: annotations/val.json
														
 
															+  dataset_dir: dataset/crowdhuman
														
 
															+
														
 
															+
														
 
															+#reader
														
 
															+worker_num: 4
														
 
															+eval_height: &eval_height 640
														
 
															+eval_width: &eval_width 640
														
 
															+eval_size: &eval_size [*eval_height, *eval_width]
														
 
															+
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - RandomDistort: {}
														
 
															+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
														
 
															+    - RandomCrop: {}
														
 
															+    - RandomFlip: {}
														
 
															+  batch_transforms:
														
 
															+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
														
 
															+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+    - Permute: {}
														
 
															+    - PadGT: {}
														
 
															+  batch_size: 8
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+  use_shared_memory: true
														
 
															+  collate_batch: true
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 2
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [3, *eval_height, *eval_width]
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+# Model
														
 
															+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_300e_coco.pdparams
														
 
															+depth_mult: 1.0
														
 
															+width_mult: 1.0
														
 
															+
														
 
															+architecture: YOLOv3
														
 
															+norm_type: sync_bn
														
 
															+use_ema: true
														
 
															+ema_decay: 0.9998
														
 
															+ema_black_list: ['proj_conv.weight']
														
 
															+custom_black_list: ['reduce_mean']
														
 
															+
														
 
															+YOLOv3:
														
 
															+  backbone: CSPResNet
														
 
															+  neck: CustomCSPPAN
														
 
															+  yolo_head: PPYOLOEHead
														
 
															+  post_process: ~
														
 
															+
														
 
															+CSPResNet:
														
 
															+  layers: [3, 6, 6, 3]
														
 
															+  channels: [64, 128, 256, 512, 1024]
														
 
															+  return_idx: [1, 2, 3]
														
 
															+  use_large_stem: True
														
 
															+
														
 
															+CustomCSPPAN:
														
 
															+  out_channels: [768, 384, 192]
														
 
															+  stage_num: 1
														
 
															+  block_num: 3
														
 
															+  act: 'swish'
														
 
															+  spp: true
														
 
															+
														
 
															+PPYOLOEHead:
														
 
															+  fpn_strides: [32, 16, 8]
														
 
															+  grid_cell_scale: 5.0
														
 
															+  grid_cell_offset: 0.5
														
 
															+  static_assigner_epoch: -1
														
 
															+  use_varifocal_loss: True
														
 
															+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
														
 
															+  static_assigner:
														
 
															+    name: ATSSAssigner
														
 
															+    topk: 9
														
 
															+  assigner:
														
 
															+    name: TaskAlignedAssigner
														
 
															+    topk: 13
														
 
															+    alpha: 1.0
														
 
															+    beta: 6.0
														
 
															+  nms:
														
 
															+    name: MultiClassNMS
														
 
															+    nms_top_k: 1000
														
 
															+    keep_top_k: 100
														
 
															+    score_threshold: 0.01
														
 
															+    nms_threshold: 0.6
														
 
															+
														
 
															+# Optimizer
														
 
															+
														
 
															+epoch: 36
														
 
															+
														
 
															+LearningRate:
														
 
															+  base_lr: 0.001
														
 
															+  schedulers:
														
 
															+    - name: CosineDecay
														
 
															+      max_epochs: 43
														
 
															+    - name: LinearWarmup
														
 
															+      start_factor: 0.
														
 
															+      epochs: 1
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    momentum: 0.9
														
 
															+    type: Momentum
														
 
															+  regularizer:
														
 
															+    factor: 0.0005
														
 
															+    type: L2
														
 
															+
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-L_vehicle.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-L_vehicle.yaml
@@ -0,0 +1,156 @@
 
															+# Runtime
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+save_dir: output
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+use_ema: true
														
 
															+log_iter: 1
														
 
															+snapshot_epoch: 1
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 1
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: ""
														
 
															+  anno_path: annotations/train_all.json
														
 
															+  dataset_dir: dataset/ppvehicle
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+  allow_empty: true
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: ""
														
 
															+  anno_path: annotations/val_all.json
														
 
															+  dataset_dir: dataset/ppvehicle
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: annotations/val_all.json
														
 
															+  dataset_dir: dataset/ppvehicle
														
 
															+
														
 
															+# Reader
														
 
															+worker_num: 4
														
 
															+eval_height: &eval_height 640
														
 
															+eval_width: &eval_width 640
														
 
															+eval_size: &eval_size [*eval_height, *eval_width]
														
 
															+
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - RandomDistort: {}
														
 
															+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
														
 
															+    - RandomCrop: {}
														
 
															+    - RandomFlip: {}
														
 
															+  batch_transforms:
														
 
															+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
														
 
															+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+    - Permute: {}
														
 
															+    - PadGT: {}
														
 
															+  batch_size: 8
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+  use_shared_memory: true
														
 
															+  collate_batch: true
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 2
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [3, *eval_height, *eval_width]
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+# Model
														
 
															+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_300e_coco.pdparams
														
 
															+depth_mult: 1.0
														
 
															+width_mult: 1.0
														
 
															+
														
 
															+architecture: YOLOv3
														
 
															+norm_type: sync_bn
														
 
															+use_ema: true
														
 
															+ema_decay: 0.9998
														
 
															+ema_black_list: ['proj_conv.weight']
														
 
															+custom_black_list: ['reduce_mean']
														
 
															+
														
 
															+YOLOv3:
														
 
															+  backbone: CSPResNet
														
 
															+  neck: CustomCSPPAN
														
 
															+  yolo_head: PPYOLOEHead
														
 
															+  post_process: ~
														
 
															+
														
 
															+CSPResNet:
														
 
															+  layers: [3, 6, 6, 3]
														
 
															+  channels: [64, 128, 256, 512, 1024]
														
 
															+  return_idx: [1, 2, 3]
														
 
															+  use_large_stem: True
														
 
															+
														
 
															+CustomCSPPAN:
														
 
															+  out_channels: [768, 384, 192]
														
 
															+  stage_num: 1
														
 
															+  block_num: 3
														
 
															+  act: 'swish'
														
 
															+  spp: true
														
 
															+
														
 
															+PPYOLOEHead:
														
 
															+  fpn_strides: [32, 16, 8]
														
 
															+  grid_cell_scale: 5.0
														
 
															+  grid_cell_offset: 0.5
														
 
															+  static_assigner_epoch: -1
														
 
															+  use_varifocal_loss: True
														
 
															+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
														
 
															+  static_assigner:
														
 
															+    name: ATSSAssigner
														
 
															+    topk: 9
														
 
															+  assigner:
														
 
															+    name: TaskAlignedAssigner
														
 
															+    topk: 13
														
 
															+    alpha: 1.0
														
 
															+    beta: 6.0
														
 
															+  nms:
														
 
															+    name: MultiClassNMS
														
 
															+    nms_top_k: 1000
														
 
															+    keep_top_k: 100
														
 
															+    score_threshold: 0.01
														
 
															+    nms_threshold: 0.6
														
 
															+
														
 
															+
														
 
															+# Optimizer
														
 
															+epoch: 36
														
 
															+LearningRate:
														
 
															+  base_lr: 0.001
														
 
															+  schedulers:
														
 
															+    - !CosineDecay
														
 
															+      max_epochs: 43
														
 
															+    - !LinearWarmup
														
 
															+      start_factor: 0.
														
 
															+      epochs: 1
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    momentum: 0.9
														
 
															+    type: Momentum
														
 
															+  regularizer:
														
 
															+    factor: 0.0005
														
 
															+    type: L2
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-S_human.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-S_human.yaml
@@ -0,0 +1,159 @@
 
															+# Runtime
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+save_dir: output
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+use_ema: true
														
 
															+log_iter: 100
														
 
															+snapshot_epoch: 4
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+
														
 
															+num_classes: 1
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: ""
														
 
															+  anno_path: annotations/train.json
														
 
															+  dataset_dir: dataset/crowdhuman
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: ""
														
 
															+  anno_path: annotations/val.json
														
 
															+  dataset_dir: dataset/crowdhuman
														
 
															+  allow_empty: true
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: annotations/val.json
														
 
															+  dataset_dir: dataset/crowdhuman
														
 
															+
														
 
															+
														
 
															+#reader
														
 
															+worker_num: 4
														
 
															+eval_height: &eval_height 640
														
 
															+eval_width: &eval_width 640
														
 
															+eval_size: &eval_size [*eval_height, *eval_width]
														
 
															+
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - RandomDistort: {}
														
 
															+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
														
 
															+    - RandomCrop: {}
														
 
															+    - RandomFlip: {}
														
 
															+  batch_transforms:
														
 
															+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
														
 
															+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+    - Permute: {}
														
 
															+    - PadGT: {}
														
 
															+  batch_size: 8
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+  use_shared_memory: true
														
 
															+  collate_batch: true
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 2
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [3, *eval_height, *eval_width]
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+# Model
														
 
															+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_300e_coco.pdparams
														
 
															+depth_mult: 0.33
														
 
															+width_mult: 0.50
														
 
															+
														
 
															+architecture: YOLOv3
														
 
															+norm_type: sync_bn
														
 
															+use_ema: true
														
 
															+ema_decay: 0.9998
														
 
															+ema_black_list: ['proj_conv.weight']
														
 
															+custom_black_list: ['reduce_mean']
														
 
															+
														
 
															+YOLOv3:
														
 
															+  backbone: CSPResNet
														
 
															+  neck: CustomCSPPAN
														
 
															+  yolo_head: PPYOLOEHead
														
 
															+  post_process: ~
														
 
															+
														
 
															+CSPResNet:
														
 
															+  layers: [3, 6, 6, 3]
														
 
															+  channels: [64, 128, 256, 512, 1024]
														
 
															+  return_idx: [1, 2, 3]
														
 
															+  use_large_stem: True
														
 
															+
														
 
															+CustomCSPPAN:
														
 
															+  out_channels: [768, 384, 192]
														
 
															+  stage_num: 1
														
 
															+  block_num: 3
														
 
															+  act: 'swish'
														
 
															+  spp: true
														
 
															+
														
 
															+PPYOLOEHead:
														
 
															+  fpn_strides: [32, 16, 8]
														
 
															+  grid_cell_scale: 5.0
														
 
															+  grid_cell_offset: 0.5
														
 
															+  static_assigner_epoch: -1
														
 
															+  use_varifocal_loss: True
														
 
															+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
														
 
															+  static_assigner:
														
 
															+    name: ATSSAssigner
														
 
															+    topk: 9
														
 
															+  assigner:
														
 
															+    name: TaskAlignedAssigner
														
 
															+    topk: 13
														
 
															+    alpha: 1.0
														
 
															+    beta: 6.0
														
 
															+  nms:
														
 
															+    name: MultiClassNMS
														
 
															+    nms_top_k: 1000
														
 
															+    keep_top_k: 100
														
 
															+    score_threshold: 0.01
														
 
															+    nms_threshold: 0.6
														
 
															+
														
 
															+# Optimizer
														
 
															+
														
 
															+epoch: 36
														
 
															+
														
 
															+LearningRate:
														
 
															+  base_lr: 0.001
														
 
															+  schedulers:
														
 
															+    - name: CosineDecay
														
 
															+      max_epochs: 43
														
 
															+    - name: LinearWarmup
														
 
															+      start_factor: 0.
														
 
															+      epochs: 1
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    momentum: 0.9
														
 
															+    type: Momentum
														
 
															+  regularizer:
														
 
															+    factor: 0.0005
														
 
															+    type: L2
														
 
															+
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-S_vehicle.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/PP-YOLOE-S_vehicle.yaml
@@ -0,0 +1,156 @@
 
															+# Runtime
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+save_dir: output
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+use_ema: true
														
 
															+log_iter: 100
														
 
															+snapshot_epoch: 4
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 1
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: ""
														
 
															+  anno_path: annotations/train_all.json
														
 
															+  dataset_dir: dataset/ppvehicle
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+  allow_empty: true
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: ""
														
 
															+  anno_path: annotations/val_all.json
														
 
															+  dataset_dir: dataset/ppvehicle
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: annotations/val_all.json
														
 
															+  dataset_dir: dataset/ppvehicle
														
 
															+
														
 
															+# Reader
														
 
															+worker_num: 4
														
 
															+eval_height: &eval_height 640
														
 
															+eval_width: &eval_width 640
														
 
															+eval_size: &eval_size [*eval_height, *eval_width]
														
 
															+
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - RandomDistort: {}
														
 
															+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
														
 
															+    - RandomCrop: {}
														
 
															+    - RandomFlip: {}
														
 
															+  batch_transforms:
														
 
															+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
														
 
															+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+    - Permute: {}
														
 
															+    - PadGT: {}
														
 
															+  batch_size: 8
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+  use_shared_memory: true
														
 
															+  collate_batch: true
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 2
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [3, *eval_height, *eval_width]
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+# Model
														
 
															+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_300e_coco.pdparams
														
 
															+depth_mult: 0.33
														
 
															+width_mult: 0.50
														
 
															+
														
 
															+architecture: YOLOv3
														
 
															+norm_type: sync_bn
														
 
															+use_ema: true
														
 
															+ema_decay: 0.9998
														
 
															+ema_black_list: ['proj_conv.weight']
														
 
															+custom_black_list: ['reduce_mean']
														
 
															+
														
 
															+YOLOv3:
														
 
															+  backbone: CSPResNet
														
 
															+  neck: CustomCSPPAN
														
 
															+  yolo_head: PPYOLOEHead
														
 
															+  post_process: ~
														
 
															+
														
 
															+CSPResNet:
														
 
															+  layers: [3, 6, 6, 3]
														
 
															+  channels: [64, 128, 256, 512, 1024]
														
 
															+  return_idx: [1, 2, 3]
														
 
															+  use_large_stem: True
														
 
															+
														
 
															+CustomCSPPAN:
														
 
															+  out_channels: [768, 384, 192]
														
 
															+  stage_num: 1
														
 
															+  block_num: 3
														
 
															+  act: 'swish'
														
 
															+  spp: true
														
 
															+
														
 
															+PPYOLOEHead:
														
 
															+  fpn_strides: [32, 16, 8]
														
 
															+  grid_cell_scale: 5.0
														
 
															+  grid_cell_offset: 0.5
														
 
															+  static_assigner_epoch: -1
														
 
															+  use_varifocal_loss: True
														
 
															+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
														
 
															+  static_assigner:
														
 
															+    name: ATSSAssigner
														
 
															+    topk: 9
														
 
															+  assigner:
														
 
															+    name: TaskAlignedAssigner
														
 
															+    topk: 13
														
 
															+    alpha: 1.0
														
 
															+    beta: 6.0
														
 
															+  nms:
														
 
															+    name: MultiClassNMS
														
 
															+    nms_top_k: 1000
														
 
															+    keep_top_k: 100
														
 
															+    score_threshold: 0.01
														
 
															+    nms_threshold: 0.6
														
 
															+
														
 
															+
														
 
															+# Optimizer
														
 
															+epoch: 36
														
 
															+LearningRate:
														
 
															+  base_lr: 0.001
														
 
															+  schedulers:
														
 
															+    - !CosineDecay
														
 
															+      max_epochs: 43
														
 
															+    - !LinearWarmup
														
 
															+      start_factor: 0.
														
 
															+      epochs: 1
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    momentum: 0.9
														
 
															+    type: Momentum
														
 
															+  regularizer:
														
 
															+    factor: 0.0005
														
 
															+    type: L2
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/PicoDet-L_layout.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/PicoDet-L_layout.yaml
@@ -0,0 +1,165 @@
 
															+# Runtime
														
 
															+epoch: 100
														
 
															+log_iter: 10
														
 
															+find_unused_parameters: true
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+use_ema: true
														
 
															+save_dir: output
														
 
															+snapshot_epoch: 10
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 80
														
 
															+
														
 
															+worker_num: 6
														
 
															+eval_height: &eval_height 640
														
 
															+eval_width: &eval_width 640
														
 
															+eval_size: &eval_size [*eval_height, *eval_width]
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODetDataset
														
 
															+  image_dir: images
														
 
															+  anno_path: annotations/instance_train.json
														
 
															+  dataset_dir: datasets/COCO
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODetDataset
														
 
															+  image_dir: images
														
 
															+  anno_path: annotations/instance_val.json
														
 
															+  dataset_dir: datasets/COCO
														
 
															+  allow_empty: true
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: annotations/instance_val.json
														
 
															+  dataset_dir: datasets/COCO
														
 
															+
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - RandomCrop: {}
														
 
															+  - RandomFlip: {prob: 0.5}
														
 
															+  - RandomDistort: {}
														
 
															+  batch_transforms:
														
 
															+  - BatchRandomResize: {target_size: [576, 608, 640, 672, 704], random_size: True, random_interp: True, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  - PadGT: {}
														
 
															+  batch_size: 16
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  batch_transforms:
														
 
															+  - PadBatch: {pad_to_stride: 32}
														
 
															+  batch_size: 8
														
 
															+  shuffle: false
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [1, 3, *eval_height, *eval_width]
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+# Model
														
 
															+architecture: PicoDet
														
 
															+pretrain_weights: https://paddle-model-ecology.bj.bcebos.com/paddlex/pretrained/PicoDet-L_layout_pretrained_v1.pdparams
														
 
															+
														
 
															+PicoDet:
														
 
															+  backbone: LCNet
														
 
															+  neck: LCPAN
														
 
															+  head: PicoHeadV2
														
 
															+
														
 
															+LCNet:
														
 
															+  scale: 2.0
														
 
															+  feature_maps: [3, 4, 5]
														
 
															+
														
 
															+LCPAN:
														
 
															+  out_channels: 160
														
 
															+  use_depthwise: true
														
 
															+  num_features: 4
														
 
															+
														
 
															+PicoHeadV2:
														
 
															+  conv_feat:
														
 
															+    name: PicoFeat
														
 
															+    feat_in: 160
														
 
															+    feat_out: 160
														
 
															+    num_convs: 4
														
 
															+    num_fpn_stride: 4
														
 
															+    norm_type: bn
														
 
															+    share_cls_reg: true
														
 
															+    use_se: true
														
 
															+  fpn_stride: [8, 16, 32, 64]
														
 
															+  feat_in_chan: 160
														
 
															+  prior_prob: 0.01
														
 
															+  reg_max: 7
														
 
															+  cell_offset: 0.5
														
 
															+  grid_cell_scale: 5.0
														
 
															+  static_assigner_epoch: 100
														
 
															+  use_align_head: true
														
 
															+  static_assigner:
														
 
															+    name: ATSSAssigner
														
 
															+    topk: 9
														
 
															+    force_gt_matching: false
														
 
															+  assigner:
														
 
															+    name: TaskAlignedAssigner
														
 
															+    topk: 13
														
 
															+    alpha: 1.0
														
 
															+    beta: 6.0
														
 
															+  loss_class:
														
 
															+    name: VarifocalLoss
														
 
															+    use_sigmoid: false
														
 
															+    iou_weighted: true
														
 
															+    loss_weight: 1.0
														
 
															+  loss_dfl:
														
 
															+    name: DistributionFocalLoss
														
 
															+    loss_weight: 0.5
														
 
															+  loss_bbox:
														
 
															+    name: GIoULoss
														
 
															+    loss_weight: 2.5
														
 
															+  nms:
														
 
															+    name: MultiClassNMS
														
 
															+    nms_top_k: 1000
														
 
															+    keep_top_k: 100
														
 
															+    score_threshold: 0.025
														
 
															+    nms_threshold: 0.6
														
 
															+
														
 
															+# Optimizer
														
 
															+LearningRate:
														
 
															+  base_lr: 0.06
														
 
															+  schedulers:
														
 
															+  - name: CosineDecay
														
 
															+    max_epochs: 150
														
 
															+  - name: LinearWarmup
														
 
															+    start_factor: 0.1
														
 
															+    steps: 300
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    momentum: 0.9
														
 
															+    type: Momentum
														
 
															+  regularizer:
														
 
															+    factor: 0.00004
														
 
															+    type: L2
														
 
															+
														
 
															+# Export
														
 
															+export:
														
 
															+  post_process: true
														
 
															+  nms: true
														
 
															+  benchmark: false
														
 
															+  fuse_conv_bn: false
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/PicoDet-M.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/PicoDet-M.yaml
@@ -0,0 +1,170 @@
 
															+# Runtime
														
 
															+find_unused_parameters: True
														
 
															+use_ema: true
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+log_iter: 20
														
 
															+save_dir: output
														
 
															+snapshot_epoch: 10
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 80
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: train2017
														
 
															+  anno_path: annotations/instances_train2017.json
														
 
															+  dataset_dir: dataset/coco
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: val2017
														
 
															+  anno_path: annotations/instances_val2017.json
														
 
															+  dataset_dir: dataset/coco
														
 
															+  allow_empty: true
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
														
 
															+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
														
 
															+
														
 
															+
														
 
															+# Reader
														
 
															+worker_num: 6
														
 
															+eval_height: &eval_height 416
														
 
															+eval_width: &eval_width 416
														
 
															+eval_size: &eval_size [*eval_height, *eval_width]
														
 
															+
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - RandomCrop: {}
														
 
															+  - RandomFlip: {prob: 0.5}
														
 
															+  - RandomDistort: {}
														
 
															+  batch_transforms:
														
 
															+  - BatchRandomResize: {target_size: [352, 384, 416, 448, 480], random_size: True, random_interp: True, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  - PadGT: {}
														
 
															+  batch_size: 48
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  batch_transforms:
														
 
															+  - PadBatch: {pad_to_stride: 32}
														
 
															+  batch_size: 8
														
 
															+  shuffle: false
														
 
															+
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [1, 3, *eval_height, *eval_width]
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+# Model
														
 
															+architecture: PicoDet
														
 
															+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/LCNet_x1_5_pretrained.pdparams
														
 
															+
														
 
															+PicoDet:
														
 
															+  backbone: LCNet
														
 
															+  neck: LCPAN
														
 
															+  head: PicoHeadV2
														
 
															+
														
 
															+LCNet:
														
 
															+  scale: 1.5
														
 
															+  feature_maps: [3, 4, 5]
														
 
															+
														
 
															+LCPAN:
														
 
															+  out_channels: 128
														
 
															+  use_depthwise: True
														
 
															+  num_features: 4
														
 
															+
														
 
															+PicoHeadV2:
														
 
															+  conv_feat:
														
 
															+    name: PicoFeat
														
 
															+    feat_in: 128
														
 
															+    feat_out: 128
														
 
															+    num_convs: 4
														
 
															+    num_fpn_stride: 4
														
 
															+    norm_type: bn
														
 
															+    share_cls_reg: True
														
 
															+    use_se: True
														
 
															+  fpn_stride: [8, 16, 32, 64]
														
 
															+  feat_in_chan: 128
														
 
															+  prior_prob: 0.01
														
 
															+  reg_max: 7
														
 
															+  cell_offset: 0.5
														
 
															+  grid_cell_scale: 5.0
														
 
															+  static_assigner_epoch: 100
														
 
															+  use_align_head: True
														
 
															+  static_assigner:
														
 
															+    name: ATSSAssigner
														
 
															+    topk: 9
														
 
															+    force_gt_matching: False
														
 
															+  assigner:
														
 
															+    name: TaskAlignedAssigner
														
 
															+    topk: 13
														
 
															+    alpha: 1.0
														
 
															+    beta: 6.0
														
 
															+  loss_class:
														
 
															+    name: VarifocalLoss
														
 
															+    use_sigmoid: False
														
 
															+    iou_weighted: True
														
 
															+    loss_weight: 1.0
														
 
															+  loss_dfl:
														
 
															+    name: DistributionFocalLoss
														
 
															+    loss_weight: 0.5
														
 
															+  loss_bbox:
														
 
															+    name: GIoULoss
														
 
															+    loss_weight: 2.5
														
 
															+  nms:
														
 
															+    name: MultiClassNMS
														
 
															+    nms_top_k: 1000
														
 
															+    keep_top_k: 100
														
 
															+    score_threshold: 0.025
														
 
															+    nms_threshold: 0.6
														
 
															+
														
 
															+# Optimizer
														
 
															+epoch: 250
														
 
															+
														
 
															+LearningRate:
														
 
															+  base_lr: 0.24
														
 
															+  schedulers:
														
 
															+  - name: CosineDecay
														
 
															+    max_epochs: 300
														
 
															+  - name: LinearWarmup
														
 
															+    start_factor: 0.1
														
 
															+    steps: 300
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    momentum: 0.9
														
 
															+    type: Momentum
														
 
															+  regularizer:
														
 
															+    factor: 0.00004
														
 
															+    type: L2
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/PicoDet-XS.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/PicoDet-XS.yaml
@@ -0,0 +1,170 @@
 
															+# Runtime
														
 
															+find_unused_parameters: True
														
 
															+use_ema: true
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+log_iter: 20
														
 
															+save_dir: output
														
 
															+snapshot_epoch: 10
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 80
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: train2017
														
 
															+  anno_path: annotations/instances_train2017.json
														
 
															+  dataset_dir: dataset/coco
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: val2017
														
 
															+  anno_path: annotations/instances_val2017.json
														
 
															+  dataset_dir: dataset/coco
														
 
															+  allow_empty: true
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
														
 
															+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
														
 
															+
														
 
															+
														
 
															+# Reader
														
 
															+worker_num: 6
														
 
															+eval_height: &eval_height 416
														
 
															+eval_width: &eval_width 416
														
 
															+eval_size: &eval_size [*eval_height, *eval_width]
														
 
															+
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - RandomCrop: {}
														
 
															+  - RandomFlip: {prob: 0.5}
														
 
															+  - RandomDistort: {}
														
 
															+  batch_transforms:
														
 
															+  - BatchRandomResize: {target_size: [352, 384, 416, 448, 480], random_size: True, random_interp: True, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  - PadGT: {}
														
 
															+  batch_size: 56
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  batch_transforms:
														
 
															+  - PadBatch: {pad_to_stride: 32}
														
 
															+  batch_size: 8
														
 
															+  shuffle: false
														
 
															+
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [1, 3, *eval_height, *eval_width]
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+# Model
														
 
															+architecture: PicoDet
														
 
															+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/LCNet_x0_35_pretrained.pdparams
														
 
															+
														
 
															+PicoDet:
														
 
															+  backbone: LCNet
														
 
															+  neck: LCPAN
														
 
															+  head: PicoHeadV2
														
 
															+
														
 
															+LCNet:
														
 
															+  scale: 0.35
														
 
															+  feature_maps: [3, 4, 5]
														
 
															+
														
 
															+LCPAN:
														
 
															+  out_channels: 96
														
 
															+  use_depthwise: True
														
 
															+  num_features: 4
														
 
															+
														
 
															+PicoHeadV2:
														
 
															+  conv_feat:
														
 
															+    name: PicoFeat
														
 
															+    feat_in: 96
														
 
															+    feat_out: 96
														
 
															+    num_convs: 2
														
 
															+    num_fpn_stride: 4
														
 
															+    norm_type: bn
														
 
															+    share_cls_reg: True
														
 
															+    use_se: True
														
 
															+  fpn_stride: [8, 16, 32, 64]
														
 
															+  feat_in_chan: 96
														
 
															+  prior_prob: 0.01
														
 
															+  reg_max: 7
														
 
															+  cell_offset: 0.5
														
 
															+  grid_cell_scale: 5.0
														
 
															+  static_assigner_epoch: 100
														
 
															+  use_align_head: True
														
 
															+  static_assigner:
														
 
															+    name: ATSSAssigner
														
 
															+    topk: 9
														
 
															+    force_gt_matching: False
														
 
															+  assigner:
														
 
															+    name: TaskAlignedAssigner
														
 
															+    topk: 13
														
 
															+    alpha: 1.0
														
 
															+    beta: 6.0
														
 
															+  loss_class:
														
 
															+    name: VarifocalLoss
														
 
															+    use_sigmoid: False
														
 
															+    iou_weighted: True
														
 
															+    loss_weight: 1.0
														
 
															+  loss_dfl:
														
 
															+    name: DistributionFocalLoss
														
 
															+    loss_weight: 0.5
														
 
															+  loss_bbox:
														
 
															+    name: GIoULoss
														
 
															+    loss_weight: 2.5
														
 
															+  nms:
														
 
															+    name: MultiClassNMS
														
 
															+    nms_top_k: 1000
														
 
															+    keep_top_k: 100
														
 
															+    score_threshold: 0.025
														
 
															+    nms_threshold: 0.6
														
 
															+
														
 
															+# Optimizer
														
 
															+epoch: 300
														
 
															+
														
 
															+LearningRate:
														
 
															+  base_lr: 0.28
														
 
															+  schedulers:
														
 
															+  - name: CosineDecay
														
 
															+    max_epochs: 300
														
 
															+  - name: LinearWarmup
														
 
															+    start_factor: 0.1
														
 
															+    steps: 300
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    momentum: 0.9
														
 
															+    type: Momentum
														
 
															+  regularizer:
														
 
															+    factor: 0.00004
														
 
															+    type: L2
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/PicoDet_LCNet_x2_5_face.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/PicoDet_LCNet_x2_5_face.yaml
@@ -0,0 +1,167 @@
 
															+architecture: PicoDet
														
 
															+pretrain_weights: https://paddledet.bj.bcebos.com/models/PicoDet_LCNet_x2_5_face_pretrain.pdparams
														
 
															+weights: output/picodet_lcnet_x2_5_640_mainbody/model_final
														
 
															+epoch: 300
														
 
															+find_unused_parameters: True
														
 
															+use_ema: true
														
 
															+cycle_epoch: 20
														
 
															+snapshot_epoch: 2
														
 
															+
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+log_iter: 20
														
 
															+save_dir: output
														
 
															+snapshot_epoch: 1
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+
														
 
															+PicoDet:
														
 
															+  backbone: LCNet
														
 
															+  neck: CSPPAN
														
 
															+  head: PicoHead
														
 
															+
														
 
															+LCNet:
														
 
															+  scale: 2.5
														
 
															+  feature_maps: [3, 4, 5]
														
 
															+
														
 
															+ESNet:
														
 
															+  scale: 1.0
														
 
															+  feature_maps: [4, 11, 14]
														
 
															+  act: hard_swish
														
 
															+  channel_ratio: [0.875, 0.5, 1.0, 0.625, 0.5, 0.75, 0.625, 0.625, 0.5, 0.625, 1.0, 0.625, 0.75]
														
 
															+
														
 
															+CSPPAN:
														
 
															+  out_channels: 128
														
 
															+  use_depthwise: True
														
 
															+  num_csp_blocks: 1
														
 
															+  num_features: 4
														
 
															+
														
 
															+PicoHead:
														
 
															+  conv_feat:
														
 
															+    name: PicoFeat
														
 
															+    feat_in: 128
														
 
															+    feat_out: 128
														
 
															+    num_convs: 4
														
 
															+    num_fpn_stride: 4
														
 
															+    norm_type: bn
														
 
															+    share_cls_reg: True
														
 
															+  fpn_stride: [8, 16, 32, 64]
														
 
															+  feat_in_chan: 128
														
 
															+  prior_prob: 0.01
														
 
															+  reg_max: 7
														
 
															+  cell_offset: 0.5
														
 
															+  loss_class:
														
 
															+    name: VarifocalLoss
														
 
															+    use_sigmoid: True
														
 
															+    iou_weighted: True
														
 
															+    loss_weight: 1.0
														
 
															+  loss_dfl:
														
 
															+    name: DistributionFocalLoss
														
 
															+    loss_weight: 0.25
														
 
															+  loss_bbox:
														
 
															+    name: GIoULoss
														
 
															+    loss_weight: 2.0
														
 
															+  assigner:
														
 
															+    name: SimOTAAssigner
														
 
															+    candidate_topk: 10
														
 
															+    iou_weight: 6
														
 
															+  nms:
														
 
															+    name: MultiClassNMS
														
 
															+    nms_top_k: 1000
														
 
															+    keep_top_k: 100
														
 
															+    score_threshold: 0.025
														
 
															+    nms_threshold: 0.6
														
 
															+
														
 
															+
														
 
															+LearningRate:
														
 
															+  base_lr: 0.08
														
 
															+  schedulers:
														
 
															+  - name: CosineDecay
														
 
															+    max_epochs: 300
														
 
															+  - name: LinearWarmup
														
 
															+    start_factor: 0.1
														
 
															+    steps: 300
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  optimizer:
														
 
															+    momentum: 0.9
														
 
															+    type: Momentum
														
 
															+  regularizer:
														
 
															+    factor: 0.00004
														
 
															+    type: L2
														
 
															+
														
 
															+worker_num: 6
														
 
															+eval_height: &eval_height 1088
														
 
															+eval_width: &eval_width 1088
														
 
															+eval_size: &eval_size [*eval_height, *eval_width]
														
 
															+
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - RandomCrop: {}
														
 
															+  - RandomFlip: {prob: 0.5}
														
 
															+  - RandomDistort: {}
														
 
															+  batch_transforms:
														
 
															+  - BatchRandomResize: {target_size:  [704, 768, 896, 960, 1088, 1152, 1280], random_size: True, random_interp: True, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  batch_size: 16
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+  collate_batch: false
														
 
															+
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  batch_transforms:
														
 
															+  - PadBatch: {pad_to_stride: 32}
														
 
															+  batch_size: 2
														
 
															+  shuffle: false
														
 
															+
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [1, 3, *eval_height, *eval_width]
														
 
															+  sample_transforms:
														
 
															+  - Decode: {}
														
 
															+  - Resize: {interp: 2, target_size: *eval_size, keep_ratio: False}
														
 
															+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
														
 
															+  - Permute: {}
														
 
															+  batch_size: 1
														
 
															+
														
 
															+
														
 
															+metric: WiderFace
														
 
															+num_classes: 1
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: WIDER_train/images
														
 
															+  anno_path: train.json
														
 
															+  dataset_dir: data_face
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: WIDER_val/images
														
 
															+  anno_path: val.json
														
 
															+  dataset_dir: data_face
														
 
															+  allow_empty: true
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: COCODataSet
														
 
															+  image_dir: WIDER_val/images
														
 
															+  anno_path: val.json
														
 
															+  dataset_dir: data_face
														
 
															+
														
 
															+# Exporting the model
														
 
															+export:
														
 
															+  post_process: True  # Whether post-processing is included in the network when export model.
														
 
															+  nms: True           # Whether NMS is included in the network when export model.
														
 
															+  benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
														
 
															+  fuse_conv_bn: False
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/RT-DETR-H_layout_17cls.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/RT-DETR-H_layout_17cls.yaml
@@ -0,0 +1,172 @@
 
															+# Runtime
														
 
															+epoch: 100
														
 
															+log_iter: 10
														
 
															+find_unused_parameters: true
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+use_ema: true
														
 
															+ema_decay: 0.9999
														
 
															+ema_decay_type: "exponential"
														
 
															+ema_filter_no_grad: true
														
 
															+save_dir: output
														
 
															+snapshot_epoch: 10
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+eval_size: [640, 640]
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 80
														
 
															+
														
 
															+worker_num: 4
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODetDataset
														
 
															+  image_dir: images
														
 
															+  anno_path: annotations/instance_train.json
														
 
															+  dataset_dir: datasets/COCO
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODetDataset
														
 
															+  image_dir: images
														
 
															+  anno_path: annotations/instance_val.json
														
 
															+  dataset_dir: datasets/COCO
														
 
															+  allow_empty: true
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: annotations/instance_val.json
														
 
															+  dataset_dir: datasets/COCO
														
 
															+
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - RandomDistort: {prob: 0.8}
														
 
															+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
														
 
															+    - RandomCrop: {prob: 0.8}
														
 
															+    - RandomFlip: {}
														
 
															+  batch_transforms:
														
 
															+    - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - NormalizeBox: {}
														
 
															+    - BboxXYXY2XYWH: {}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 4
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+  collate_batch: false
														
 
															+  use_shared_memory: true
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 4
														
 
															+  shuffle: false
														
 
															+  drop_last: false
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [3, 640, 640]
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+  shuffle: false
														
 
															+  drop_last: false
														
 
															+
														
 
															+# Model
														
 
															+architecture: DETR
														
 
															+pretrain_weights: https://paddle-model-ecology.bj.bcebos.com/paddlex/pretrained/RT-DETR-H_layout_pretrained_v1.pdparams
														
 
															+norm_type: sync_bn
														
 
															+hidden_dim: 256
														
 
															+use_focal_loss: True
														
 
															+
														
 
															+DETR:
														
 
															+  backbone: PPHGNetV2
														
 
															+  neck: HybridEncoder
														
 
															+  transformer: RTDETRTransformer
														
 
															+  detr_head: DINOHead
														
 
															+  post_process: DETRPostProcess
														
 
															+
														
 
															+PPHGNetV2:
														
 
															+  arch: 'H'
														
 
															+  return_idx: [1, 2, 3]
														
 
															+  freeze_stem_only: true
														
 
															+  freeze_at: 0
														
 
															+  freeze_norm: true
														
 
															+  lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]
														
 
															+
														
 
															+HybridEncoder:
														
 
															+  hidden_dim: 512
														
 
															+  use_encoder_idx: [2]
														
 
															+  num_encoder_layers: 2
														
 
															+  encoder_layer:
														
 
															+    name: TransformerLayer
														
 
															+    d_model: 512
														
 
															+    nhead: 8
														
 
															+    dim_feedforward: 2048
														
 
															+    dropout: 0.
														
 
															+    activation: 'gelu'
														
 
															+  expansion: 1.0
														
 
															+
														
 
															+RTDETRTransformer:
														
 
															+  num_queries: 300
														
 
															+  position_embed_type: sine
														
 
															+  feat_strides: [8, 16, 32]
														
 
															+  num_levels: 3
														
 
															+  nhead: 8
														
 
															+  num_decoder_layers: 6
														
 
															+  dim_feedforward: 1024
														
 
															+  dropout: 0.0
														
 
															+  activation: relu
														
 
															+  num_denoising: 100
														
 
															+  label_noise_ratio: 0.5
														
 
															+  box_noise_scale: 1.0
														
 
															+  learnt_init_query: false
														
 
															+
														
 
															+DINOHead:
														
 
															+  loss:
														
 
															+    name: DINOLoss
														
 
															+    loss_coeff: {class: 1, bbox: 5, giou: 2}
														
 
															+    aux_loss: true
														
 
															+    use_vfl: true
														
 
															+    matcher:
														
 
															+      name: HungarianMatcher
														
 
															+      matcher_coeff: {class: 2, bbox: 5, giou: 2}
														
 
															+
														
 
															+DETRPostProcess:
														
 
															+  num_top_queries: 300
														
 
															+
														
 
															+# Optimizer
														
 
															+LearningRate:
														
 
															+  base_lr: 0.0001
														
 
															+  schedulers:
														
 
															+  - !PiecewiseDecay
														
 
															+    gamma: 1.0
														
 
															+    milestones: [100]
														
 
															+    use_warmup: true
														
 
															+  - !LinearWarmup
														
 
															+    start_factor: 0.001
														
 
															+    steps: 100
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  clip_grad_by_norm: 0.1
														
 
															+  regularizer: false
														
 
															+  optimizer:
														
 
															+    type: AdamW
														
 
															+    weight_decay: 0.0001
														
 
															+
														
 
															+# Export
														
 
															+export:
														
 
															+  post_process: true
														
 
															+  nms: true
														
 
															+  benchmark: false
														
 
															+  fuse_conv_bn: false
														
--- a/paddlex/repo_apis/PaddleDetection_api/configs/RT-DETR-H_layout_3cls.yaml
+++ b/paddlex/repo_apis/PaddleDetection_api/configs/RT-DETR-H_layout_3cls.yaml
@@ -0,0 +1,172 @@
 
															+# Runtime
														
 
															+epoch: 100
														
 
															+log_iter: 10
														
 
															+find_unused_parameters: true
														
 
															+use_gpu: true
														
 
															+use_xpu: false
														
 
															+use_mlu: false
														
 
															+use_npu: false
														
 
															+use_ema: true
														
 
															+ema_decay: 0.9999
														
 
															+ema_decay_type: "exponential"
														
 
															+ema_filter_no_grad: true
														
 
															+save_dir: output
														
 
															+snapshot_epoch: 10
														
 
															+print_flops: false
														
 
															+print_params: false
														
 
															+eval_size: [640, 640]
														
 
															+
														
 
															+# Dataset
														
 
															+metric: COCO
														
 
															+num_classes: 80
														
 
															+
														
 
															+worker_num: 4
														
 
															+
														
 
															+TrainDataset:
														
 
															+  name: COCODetDataset
														
 
															+  image_dir: images
														
 
															+  anno_path: annotations/instance_train.json
														
 
															+  dataset_dir: datasets/COCO
														
 
															+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
														
 
															+
														
 
															+EvalDataset:
														
 
															+  name: COCODetDataset
														
 
															+  image_dir: images
														
 
															+  anno_path: annotations/instance_val.json
														
 
															+  dataset_dir: datasets/COCO
														
 
															+  allow_empty: true
														
 
															+
														
 
															+TestDataset:
														
 
															+  name: ImageFolder
														
 
															+  anno_path: annotations/instance_val.json
														
 
															+  dataset_dir: datasets/COCO
														
 
															+
														
 
															+TrainReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - RandomDistort: {prob: 0.8}
														
 
															+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
														
 
															+    - RandomCrop: {prob: 0.8}
														
 
															+    - RandomFlip: {}
														
 
															+  batch_transforms:
														
 
															+    - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - NormalizeBox: {}
														
 
															+    - BboxXYXY2XYWH: {}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 4
														
 
															+  shuffle: true
														
 
															+  drop_last: true
														
 
															+  collate_batch: false
														
 
															+  use_shared_memory: true
														
 
															+
														
 
															+EvalReader:
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 4
														
 
															+  shuffle: false
														
 
															+  drop_last: false
														
 
															+
														
 
															+TestReader:
														
 
															+  inputs_def:
														
 
															+    image_shape: [3, 640, 640]
														
 
															+  sample_transforms:
														
 
															+    - Decode: {}
														
 
															+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
														
 
															+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
														
 
															+    - Permute: {}
														
 
															+  batch_size: 1
														
 
															+  shuffle: false
														
 
															+  drop_last: false
														
 
															+
														
 
															+# Model
														
 
															+architecture: DETR
														
 
															+pretrain_weights: https://paddle-model-ecology.bj.bcebos.com/paddlex/pretrained/RT-DETR-H_layout_pretrained_v1.pdparams
														
 
															+norm_type: sync_bn
														
 
															+hidden_dim: 256
														
 
															+use_focal_loss: True
														
 
															+
														
 
															+DETR:
														
 
															+  backbone: PPHGNetV2
														
 
															+  neck: HybridEncoder
														
 
															+  transformer: RTDETRTransformer
														
 
															+  detr_head: DINOHead
														
 
															+  post_process: DETRPostProcess
														
 
															+
														
 
															+PPHGNetV2:
														
 
															+  arch: 'H'
														
 
															+  return_idx: [1, 2, 3]
														
 
															+  freeze_stem_only: true
														
 
															+  freeze_at: 0
														
 
															+  freeze_norm: true
														
 
															+  lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]
														
 
															+
														
 
															+HybridEncoder:
														
 
															+  hidden_dim: 512
														
 
															+  use_encoder_idx: [2]
														
 
															+  num_encoder_layers: 2
														
 
															+  encoder_layer:
														
 
															+    name: TransformerLayer
														
 
															+    d_model: 512
														
 
															+    nhead: 8
														
 
															+    dim_feedforward: 2048
														
 
															+    dropout: 0.
														
 
															+    activation: 'gelu'
														
 
															+  expansion: 1.0
														
 
															+
														
 
															+RTDETRTransformer:
														
 
															+  num_queries: 300
														
 
															+  position_embed_type: sine
														
 
															+  feat_strides: [8, 16, 32]
														
 
															+  num_levels: 3
														
 
															+  nhead: 8
														
 
															+  num_decoder_layers: 6
														
 
															+  dim_feedforward: 1024
														
 
															+  dropout: 0.0
														
 
															+  activation: relu
														
 
															+  num_denoising: 100
														
 
															+  label_noise_ratio: 0.5
														
 
															+  box_noise_scale: 1.0
														
 
															+  learnt_init_query: false
														
 
															+
														
 
															+DINOHead:
														
 
															+  loss:
														
 
															+    name: DINOLoss
														
 
															+    loss_coeff: {class: 1, bbox: 5, giou: 2}
														
 
															+    aux_loss: true
														
 
															+    use_vfl: true
														
 
															+    matcher:
														
 
															+      name: HungarianMatcher
														
 
															+      matcher_coeff: {class: 2, bbox: 5, giou: 2}
														
 
															+
														
 
															+DETRPostProcess:
														
 
															+  num_top_queries: 300
														
 
															+
														
 
															+# Optimizer
														
 
															+LearningRate:
														
 
															+  base_lr: 0.0001
														
 
															+  schedulers:
														
 
															+  - !PiecewiseDecay
														
 
															+    gamma: 1.0
														
 
															+    milestones: [100]
														
 
															+    use_warmup: true
														
 
															+  - !LinearWarmup
														
 
															+    start_factor: 0.001
														
 
															+    steps: 100
														
 
															+
														
 
															+OptimizerBuilder:
														
 
															+  clip_grad_by_norm: 0.1
														
 
															+  regularizer: false
														
 
															+  optimizer:
														
 
															+    type: AdamW
														
 
															+    weight_decay: 0.0001
														
 
															+
														
 
															+# Export
														
 
															+export:
														
 
															+  post_process: true
														
 
															+  nms: true
														
 
															+  benchmark: false
														
 
															+  fuse_conv_bn: false
														
--- a/paddlex/repo_apis/PaddleDetection_api/object_det/config.py
+++ b/paddlex/repo_apis/PaddleDetection_api/object_det/config.py
@@ -367,6 +367,11 @@ class DetConfig(BaseConfig, PPDetConfigMixin):
 
															             num_classes (int): the classes number value to set.
														
 
															         """
														
 
															         self["num_classes"] = num_classes
														
 
															+        if 'CenterNet' in self.model_name:
														
 
															+            for i in range(len(self['TrainReader']['sample_transforms'])):
														
 
															+                if 'Gt2CenterNetTarget' in self['TrainReader']['sample_transforms'][i].keys():
														
 
															+                     self['TrainReader']['sample_transforms'][i]['Gt2CenterNetTarget']['num_classes'] = num_classes
														
 
															+        
														
 
															     def update_random_size(self, randomsize: list[list[int, int]]):
														
 
															         """update `target_size` of `BatchRandomResize` op in TestReader
														
--- a/paddlex/repo_apis/PaddleDetection_api/object_det/register.py
+++ b/paddlex/repo_apis/PaddleDetection_api/object_det/register.py
@@ -551,3 +551,289 @@ register_model_info(
 
															 )
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        'model_name': 'PicoDet-XS',
														
 
															+        'suite': 'Det',
														
 
															+        'config_path': osp.join(PDX_CONFIG_DIR, 'PicoDet-XS.yaml'),
														
 
															+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
														
 
															+        'supported_dataset_types': ['COCODetDataset'],
														
 
															+        'supported_train_opts': {
														
 
															+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
														
 
															+            'dy2st': False,
														
 
															+            'amp': ['OFF']
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        'model_name': 'PicoDet-M',
														
 
															+        'suite': 'Det',
														
 
															+        'config_path': osp.join(PDX_CONFIG_DIR, 'PicoDet-M.yaml'),
														
 
															+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
														
 
															+        'supported_dataset_types': ['COCODetDataset'],
														
 
															+        'supported_train_opts': {
														
 
															+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
														
 
															+            'dy2st': False,
														
 
															+            'amp': ['OFF']
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        'model_name': 'FCOS-ResNet50',
														
 
															+        'suite': 'Det',
														
 
															+        'config_path': osp.join(PDX_CONFIG_DIR, 'FCOS-ResNet50.yaml'),
														
 
															+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
														
 
															+        'supported_dataset_types': ['COCODetDataset'],
														
 
															+        'supported_train_opts': {
														
 
															+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
														
 
															+            'dy2st': False,
														
 
															+            'amp': ['OFF']
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        'model_name': 'DETR-R50',
														
 
															+        'suite': 'Det',
														
 
															+        'config_path': osp.join(PDX_CONFIG_DIR, 'DETR-R50.yaml'),
														
 
															+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
														
 
															+        'supported_dataset_types': ['COCODetDataset'],
														
 
															+        'supported_train_opts': {
														
 
															+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
														
 
															+            'dy2st': False,
														
 
															+            'amp': ['OFF']
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+        {
														
 
															+        'model_name': 'PP-YOLOE-L_vehicle',
														
 
															+        'suite': 'Det',
														
 
															+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-YOLOE-L_vehicle.yaml'),
														
 
															+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
														
 
															+        'supported_dataset_types': ['COCODetDataset'],
														
 
															+        'supported_train_opts': {
														
 
															+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
														
 
															+            'dy2st': False,
														
 
															+            'amp': ['OFF']
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        'model_name': 'PP-YOLOE-S_vehicle',
														
 
															+        'suite': 'Det',
														
 
															+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-YOLOE-S_vehicle.yaml'),
														
 
															+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
														
 
															+        'supported_dataset_types': ['COCODetDataset'],
														
 
															+        'supported_train_opts': {
														
 
															+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
														
 
															+            'dy2st': False,
														
 
															+            'amp': ['OFF']
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        'model_name': 'PP-ShiTuV2_det',
														
 
															+        'suite': 'Det',
														
 
															+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-ShiTuV2_det.yaml'),
														
 
															+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
														
 
															+        'supported_dataset_types': ['COCODetDataset'],
														
 
															+        'supported_train_opts': {
														
 
															+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
														
 
															+            'dy2st': False,
														
 
															+            'amp': ['OFF']
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        'model_name': 'PP-YOLOE-L_human',
														
 
															+        'suite': 'Det',
														
 
															+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-YOLOE-L_human.yaml'),
														
 
															+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
														
 
															+        'supported_dataset_types': ['COCODetDataset'],
														
 
															+        'supported_train_opts': {
														
 
															+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
														
 
															+            'dy2st': False,
														
 
															+            'amp': ['OFF']
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        'model_name': 'PP-YOLOE-S_human',
														
 
															+        'suite': 'Det',
														
 
															+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-YOLOE-S_human.yaml'),
														
 
															+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
														
 
															+        'supported_dataset_types': ['COCODetDataset'],
														
 
															+        'supported_train_opts': {
														
 
															+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
														
 
															+            'dy2st': False,
														
 
															+            'amp': ['OFF']
														
 
															+        },
														
 
															+    }
														
 
															+) 
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        'model_name': 'CenterNet-DLA-34',
														
 
															+        'suite': 'Det',
														
 
															+        'config_path': osp.join(PDX_CONFIG_DIR, 'CenterNet-DLA-34.yaml'),
														
 
															+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
														
 
															+        'supported_dataset_types': ['COCODetDataset'],
														
 
															+        'supported_train_opts': {
														
 
															+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
														
 
															+            'dy2st': False,
														
 
															+            'amp': ['OFF']
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        'model_name': 'CenterNet-ResNet50',
														
 
															+        'suite': 'Det',
														
 
															+        'config_path': osp.join(PDX_CONFIG_DIR, 'CenterNet-ResNet50.yaml'),
														
 
															+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
														
 
															+        'supported_dataset_types': ['COCODetDataset'],
														
 
															+        'supported_train_opts': {
														
 
															+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
														
 
															+            'dy2st': False,
														
 
															+            'amp': ['OFF']
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        'model_name': 'PP-YOLOE+_SOD-L',
														
 
															+        'suite': 'Det',
														
 
															+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-YOLOE+_SOD-L.yaml'),
														
 
															+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
														
 
															+        'supported_dataset_types': ['COCODetDataset'],
														
 
															+        'supported_train_opts': {
														
 
															+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
														
 
															+            'dy2st': False,
														
 
															+            'amp': ['OFF']
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        'model_name': 'PP-YOLOE+_SOD-S',
														
 
															+        'suite': 'Det',
														
 
															+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-YOLOE+_SOD-S.yaml'),
														
 
															+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
														
 
															+        'supported_dataset_types': ['COCODetDataset'],
														
 
															+        'supported_train_opts': {
														
 
															+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
														
 
															+            'dy2st': False,
														
 
															+            'amp': ['OFF']
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        'model_name': 'PP-YOLOE+_SOD-largesize-L',
														
 
															+        'suite': 'Det',
														
 
															+        'config_path': osp.join(PDX_CONFIG_DIR, 'PP-YOLOE+_SOD-largesize-L.yaml'),
														
 
															+        'supported_apis': ['train', 'evaluate', 'predict', 'export', 'infer'],
														
 
															+        'supported_dataset_types': ['COCODetDataset'],
														
 
															+        'supported_train_opts': {
														
 
															+            'device': ['cpu', 'gpu_nxcx', 'xpu', 'npu', 'mlu'],
														
 
															+            'dy2st': False,
														
 
															+            'amp': ['OFF']
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        "model_name": "RT-DETR-H_layout_3cls",
														
 
															+        "suite": "Det",
														
 
															+        "config_path": osp.join(PDX_CONFIG_DIR, "RT-DETR-H_layout_3cls.yaml"),
														
 
															+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
														
 
															+        "supported_dataset_types": ["COCODetDataset"],
														
 
															+        "supported_train_opts": {
														
 
															+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
														
 
															+            "dy2st": False,
														
 
															+            "amp": ["OFF"],
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        "model_name": "PicoDet-L_layout",
														
 
															+        "suite": "Det",
														
 
															+        "config_path": osp.join(PDX_CONFIG_DIR, "PicoDet-L_layout.yaml"),
														
 
															+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
														
 
															+        "supported_dataset_types": ["COCODetDataset"],
														
 
															+        "supported_train_opts": {
														
 
															+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
														
 
															+            "dy2st": False,
														
 
															+            "amp": ["OFF"],
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        "model_name": "RT-DETR-H_layout_17cls",
														
 
															+        "suite": "Det",
														
 
															+        "config_path": osp.join(PDX_CONFIG_DIR, "RT-DETR-H_layout_17cls.yaml"),
														
 
															+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
														
 
															+        "supported_dataset_types": ["COCODetDataset"],
														
 
															+        "supported_train_opts": {
														
 
															+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
														
 
															+            "dy2st": False,
														
 
															+            "amp": ["OFF"],
														
 
															+        },
														
 
															+    }
														
 
															+)
														
 
															+
														
 
															+
														
 
															+register_model_info(
														
 
															+    {
														
 
															+        "model_name": "PicoDet_LCNet_x2_5_face",
														
 
															+        "suite": "Det",
														
 
															+        "config_path": osp.join(PDX_CONFIG_DIR, "PicoDet_LCNet_x2_5_face.yaml"),
														
 
															+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
														
 
															+        "supported_dataset_types": ["COCODetDataset"],
														
 
															+        "supported_train_opts": {
														
 
															+            "device": ["cpu", "gpu_nxcx", "xpu", "npu", "mlu"],
														
 
															+            "dy2st": False,
														
 
															+            "amp": ["OFF"],
														
 
															+        },
														
 
															+    }
														
 
															+)