1 year ago · 24bfe4cfa1
--- a/docs/tutorials/models/support_model_list.md
+++ b/docs/tutorials/models/support_model_list.md
@@ -96,6 +96,17 @@
 
				 | SwinTransformer_base_patch4_window12_384 | [SwinTransformer_base_patch4_window12_384.yaml](../../../paddlex/configs/image_classification/SwinTransformer_base_patch4_window12_384.yaml)|
			
 
				 | SwinTransformer_large_patch4_window7_224 | [SwinTransformer_large_patch4_window7_224.yaml](../../../paddlex/configs/image_classification/SwinTransformer_large_patch4_window7_224.yaml)|
			
 
				 | SwinTransformer_large_patch4_window12_384 | [SwinTransformer_large_patch4_window12_384.yaml](../../../paddlex/configs/image_classification/SwinTransformer_large_patch4_window12_384.yaml)|
			
 
				+
			
 
				+### 11. 多标签分类系列
			
 
				+| 模型名称 | config |
			
 
				+| :--- | :---: |
			
 
				+| ResNet50_ML | [ResNet50_ML.yaml](../../../paddlex/configs/multilabel_classification/ResNet50_ML.yaml)|
			
 
				+| PP-LCNet_x1_0_ML | [PP-LCNet_x1_0_ML.yaml](../../../paddlex/configs/multilabel_classification/PP-LCNet_x1_0_ML.yaml)|
			
 
				+| PP-HGNetV2-B0_ML | [PP-HGNetV2-B0_ML.yaml](../../../paddlex/configs/multilabel_classification/PP-HGNetV2-B0_ML.yaml)|
			
 
				+| PP-HGNetV2-B4_ML | [PP-HGNetV2-B4_ML.yaml](../../../paddlex/configs/multilabel_classification/PP-HGNetV2-B4_ML.yaml)|
			
 
				+| PP-HGNetV2-B6_ML | [PP-HGNetV2-B6_ML.yaml](../../../paddlex/configs/multilabel_classification/PP-HGNetV2-B6_ML.yaml)|
			
 
				+| CLIP_vit_base_patch16_448_ML | [CLIP_vit_base_patch16_448_ML.yaml](../../../paddlex/configs/multilabel_classification/CLIP_vit_base_patch16_448_ML.yaml)|
			
 
				+
			
 
				 ## 二、目标检测
			
 
				 ### 1. PP-YOLOE_plus 系列
			
 
				 | 模型名称 | config |
			
--- a/paddlex/configs/multilabel_classification/CLIP_vit_base_patch16_448_ML.yaml
+++ b/paddlex/configs/multilabel_classification/CLIP_vit_base_patch16_448_ML.yaml
@@ -0,0 +1,38 @@
 
				+Global:
			
 
				+  model: CLIP_vit_base_patch16_448_ML
			
 
				+  mode: check_dataset # check_dataset/train/evaluate/predict
			
 
				+  dataset_dir: dataset/mlcls_nus_examples
			
 
				+  device: gpu:0,1,2,3
			
 
				+  output: "output"
			
 
				+
			
 
				+CheckDataset:
			
 
				+  convert: 
			
 
				+    enable: False
			
 
				+    src_dataset_type: null
			
 
				+  split: 
			
 
				+    enable: False
			
 
				+    train_percent: null
			
 
				+    val_percent: null
			
 
				+
			
 
				+Train:
			
 
				+  num_classes: 33
			
 
				+  epochs_iters: 20
			
 
				+  batch_size: 16
			
 
				+  learning_rate: 0.0003
			
 
				+  pretrain_weight_path: null
			
 
				+  warmup_steps: 5
			
 
				+  resume_path: null
			
 
				+  log_interval: 1
			
 
				+  eval_interval: 1
			
 
				+  save_interval: 1
			
 
				+
			
 
				+Evaluate:
			
 
				+  weight_path: "output/best_model.pdparams"
			
 
				+  log_interval: 1
			
 
				+
			
 
				+Predict:
			
 
				+  model_dir: "output/best_model"
			
 
				+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg"
			
 
				+  kernel_option:
			
 
				+    run_mode: paddle
			
 
				+    batch_size: 1
			
--- a/paddlex/configs/multilabel_classification/PP-HGNetV2-B0_ML.yaml
+++ b/paddlex/configs/multilabel_classification/PP-HGNetV2-B0_ML.yaml
@@ -0,0 +1,38 @@
 
				+Global:
			
 
				+  model: PP-HGNetV2-B0_ML
			
 
				+  mode: check_dataset # check_dataset/train/evaluate/predict
			
 
				+  dataset_dir: dataset/mlcls_nus_examples
			
 
				+  device: gpu:0,1,2,3
			
 
				+  output: "output"
			
 
				+
			
 
				+CheckDataset:
			
 
				+  convert: 
			
 
				+    enable: False
			
 
				+    src_dataset_type: null
			
 
				+  split: 
			
 
				+    enable: False
			
 
				+    train_percent: null
			
 
				+    val_percent: null
			
 
				+
			
 
				+Train:
			
 
				+  num_classes: 33
			
 
				+  epochs_iters: 20
			
 
				+  batch_size: 64
			
 
				+  learning_rate: 0.0008
			
 
				+  pretrain_weight_path: null
			
 
				+  warmup_steps: 5
			
 
				+  resume_path: null
			
 
				+  log_interval: 1
			
 
				+  eval_interval: 1
			
 
				+  save_interval: 1
			
 
				+
			
 
				+Evaluate:
			
 
				+  weight_path: "output/best_model.pdparams"
			
 
				+  log_interval: 1
			
 
				+
			
 
				+Predict:
			
 
				+  model_dir: "output/best_model"
			
 
				+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg"
			
 
				+  kernel_option:
			
 
				+    run_mode: paddle
			
 
				+    batch_size: 1
			
--- a/paddlex/configs/multilabel_classification/PP-HGNetV2-B4_ML.yaml
+++ b/paddlex/configs/multilabel_classification/PP-HGNetV2-B4_ML.yaml
@@ -0,0 +1,38 @@
 
				+Global:
			
 
				+  model: PP-HGNetV2-B4_ML
			
 
				+  mode: check_dataset # check_dataset/train/evaluate/predict
			
 
				+  dataset_dir: dataset/mlcls_nus_examples
			
 
				+  device: gpu:0,1,2,3
			
 
				+  output: "output"
			
 
				+
			
 
				+CheckDataset:
			
 
				+  convert: 
			
 
				+    enable: False
			
 
				+    src_dataset_type: null
			
 
				+  split: 
			
 
				+    enable: False
			
 
				+    train_percent: null
			
 
				+    val_percent: null
			
 
				+
			
 
				+Train:
			
 
				+  num_classes: 33
			
 
				+  epochs_iters: 20
			
 
				+  batch_size: 64
			
 
				+  learning_rate: 0.05
			
 
				+  pretrain_weight_path: null
			
 
				+  warmup_steps: 5
			
 
				+  resume_path: null
			
 
				+  log_interval: 1
			
 
				+  eval_interval: 1
			
 
				+  save_interval: 1
			
 
				+
			
 
				+Evaluate:
			
 
				+  weight_path: "output/best_model.pdparams"
			
 
				+  log_interval: 1
			
 
				+
			
 
				+Predict:
			
 
				+  model_dir: "output/best_model"
			
 
				+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg"
			
 
				+  kernel_option:
			
 
				+    run_mode: paddle
			
 
				+    batch_size: 1
			
--- a/paddlex/configs/multilabel_classification/PP-HGNetV2-B6_ML.yaml
+++ b/paddlex/configs/multilabel_classification/PP-HGNetV2-B6_ML.yaml
@@ -0,0 +1,38 @@
 
				+Global:
			
 
				+  model: PP-HGNetV2-B6_ML
			
 
				+  mode: check_dataset # check_dataset/train/evaluate/predict
			
 
				+  dataset_dir: dataset/mlcls_nus_examples
			
 
				+  device: gpu:0,1,2,3
			
 
				+  output: "output"
			
 
				+
			
 
				+CheckDataset:
			
 
				+  convert: 
			
 
				+    enable: False
			
 
				+    src_dataset_type: null
			
 
				+  split: 
			
 
				+    enable: False
			
 
				+    train_percent: null
			
 
				+    val_percent: null
			
 
				+
			
 
				+Train:
			
 
				+  num_classes: 33
			
 
				+  epochs_iters: 20
			
 
				+  batch_size: 16
			
 
				+  learning_rate: 0.05
			
 
				+  pretrain_weight_path: null
			
 
				+  warmup_steps: 5
			
 
				+  resume_path: null
			
 
				+  log_interval: 1
			
 
				+  eval_interval: 1
			
 
				+  save_interval: 1
			
 
				+
			
 
				+Evaluate:
			
 
				+  weight_path: "output/best_model.pdparams"
			
 
				+  log_interval: 1
			
 
				+
			
 
				+Predict:
			
 
				+  model_dir: "output/best_model"
			
 
				+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg"
			
 
				+  kernel_option:
			
 
				+    run_mode: paddle
			
 
				+    batch_size: 1
			
--- a/paddlex/configs/multilabel_classification/PP-LCNet_x1_0_ML.yaml
+++ b/paddlex/configs/multilabel_classification/PP-LCNet_x1_0_ML.yaml
@@ -0,0 +1,38 @@
 
				+Global:
			
 
				+  model: PP-LCNet_x1_0_ML
			
 
				+  mode: check_dataset # check_dataset/train/evaluate/predict/predict
			
 
				+  dataset_dir: dataset/mlcls_nus_examples
			
 
				+  device: gpu:0,1,2,3
			
 
				+  output: "output"
			
 
				+
			
 
				+CheckDataset:
			
 
				+  convert: 
			
 
				+    enable: False
			
 
				+    src_dataset_type: null
			
 
				+  split: 
			
 
				+    enable: False
			
 
				+    train_percent: null
			
 
				+    val_percent: null
			
 
				+
			
 
				+Train:
			
 
				+  num_classes: 33
			
 
				+  epochs_iters: 20
			
 
				+  batch_size: 32
			
 
				+  learning_rate: 0.1
			
 
				+  pretrain_weight_path: null
			
 
				+  warmup_steps: 5
			
 
				+  resume_path: null
			
 
				+  log_interval: 1
			
 
				+  eval_interval: 1
			
 
				+  save_interval: 1
			
 
				+
			
 
				+Evaluate:
			
 
				+  weight_path: "output/best_model.pdparams"
			
 
				+  log_interval: 1
			
 
				+
			
 
				+Predict:
			
 
				+  model_dir: "output/best_model"
			
 
				+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg"
			
 
				+  kernel_option:
			
 
				+    run_mode: paddle
			
 
				+    batch_size: 1
			
--- a/paddlex/configs/multilabel_classification/ResNet50_ML.yaml
+++ b/paddlex/configs/multilabel_classification/ResNet50_ML.yaml
@@ -0,0 +1,38 @@
 
				+Global:
			
 
				+  model: ResNet50_ML
			
 
				+  mode: check_dataset # check_dataset/train/evaluate/predict
			
 
				+  dataset_dir: dataset/mlcls_nus_examples
			
 
				+  device: gpu:0,1,2,3
			
 
				+  output: "output"
			
 
				+
			
 
				+CheckDataset:
			
 
				+  convert: 
			
 
				+    enable: False
			
 
				+    src_dataset_type: null
			
 
				+  split: 
			
 
				+    enable: False
			
 
				+    train_percent: null
			
 
				+    val_percent: null
			
 
				+
			
 
				+Train:
			
 
				+  num_classes: 33
			
 
				+  epochs_iters: 20
			
 
				+  batch_size: 64
			
 
				+  learning_rate: 0.1
			
 
				+  pretrain_weight_path: null
			
 
				+  warmup_steps: 5
			
 
				+  resume_path: null
			
 
				+  log_interval: 1
			
 
				+  eval_interval: 1
			
 
				+  save_interval: 1
			
 
				+
			
 
				+Evaluate:
			
 
				+  weight_path: "output/best_model.pdparams"
			
 
				+  log_interval: 1
			
 
				+
			
 
				+Predict:
			
 
				+  model_dir: "output/best_model"
			
 
				+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg"
			
 
				+  kernel_option:
			
 
				+    run_mode: paddle
			
 
				+    batch_size: 1
			
--- a/paddlex/modules/image_classification/__init__.py
+++ b/paddlex/modules/image_classification/__init__.py
@@ -13,6 +13,7 @@
 
				 # limitations under the License.
			
 
				 
			
 
				 from .trainer import ClsTrainer
			
 
				+from .trainer_ml import MLClsTrainer
			
 
				 from .dataset_checker import ClsDatasetChecker
			
 
				 from .evaluator import ClsEvaluator
			
 
				 from .exportor import ClsExportor
			
--- a/paddlex/modules/image_classification/dataset_checker/__init__.py
+++ b/paddlex/modules/image_classification/dataset_checker/__init__.py
@@ -12,12 +12,11 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-
			
 
				 from pathlib import Path
			
 
				 
			
 
				 from ...base import BaseDatasetChecker
			
 
				 from .dataset_src import check, split_dataset, deep_analyse
			
 
				-from ..model_list import MODELS
			
 
				+from ..model_list import MODELS, ML_MODELS
			
 
				 
			
 
				 
			
 
				 class ClsDatasetChecker(BaseDatasetChecker):
			
@@ -103,3 +102,38 @@ class ClsDatasetChecker(BaseDatasetChecker):
 
				             str: dataset type
			
 
				         """
			
 
				         return "ClsDataset"
			
 
				+
			
 
				+
			
 
				+class MLClsDatasetChecker(ClsDatasetChecker):
			
 
				+    entities = ML_MODELS
			
 
				+    sample_num = 10
			
 
				+
			
 
				+    def check_dataset(self, dataset_dir: str, sample_num: int = sample_num) -> dict:
			
 
				+        """check if the dataset meets the specifications and get dataset summary
			
 
				+
			
 
				+        Args:
			
 
				+            dataset_dir (str): the root directory of dataset.
			
 
				+            sample_num (int): the number to be sampled.
			
 
				+        Returns:
			
 
				+            dict: dataset summary.
			
 
				+        """
			
 
				+        return check(dataset_dir, self.output, dataset_type="MLCls")
			
 
				+
			
 
				+    def analyse(self, dataset_dir: str) -> dict:
			
 
				+        """deep analyse dataset
			
 
				+
			
 
				+        Args:
			
 
				+            dataset_dir (str): the root directory of dataset.
			
 
				+
			
 
				+        Returns:
			
 
				+            dict: the deep analysis results.
			
 
				+        """
			
 
				+        return deep_analyse(dataset_dir, self.output, dataset_type="MLCls")
			
 
				+
			
 
				+    def get_dataset_type(self) -> str:
			
 
				+        """return the dataset type
			
 
				+
			
 
				+        Returns:
			
 
				+            str: dataset type
			
 
				+        """
			
 
				+        return "MLClsDataset"
			
--- a/paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py
+++ b/paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py
@@ -12,7 +12,6 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-
			
 
				 import os
			
 
				 import json
			
 
				 import math
			
@@ -30,7 +29,7 @@ from .....utils.file_interface import custom_open
 
				 from .....utils.fonts import PINGFANG_FONT_FILE_PATH
			
 
				 
			
 
				 
			
 
				-def deep_analyse(dataset_path, output):
			
 
				+def deep_analyse(dataset_path, output, dataset_type="Cls"):
			
 
				     """class analysis for dataset"""
			
 
				     tags = ["train", "val"]
			
 
				     labels_cnt = defaultdict(str)
			
@@ -41,15 +40,22 @@ def deep_analyse(dataset_path, output):
 
				         line = line.strip().split()
			
 
				         labels_cnt[line[0]] = " ".join(line[1:])
			
 
				     for tag in tags:
			
 
				-        image_path = os.path.join(dataset_path, f"{tag}.txt")
			
 
				+        anno_path = os.path.join(dataset_path, f"{tag}.txt")
			
 
				         classes_num = defaultdict(int)
			
 
				         for i in range(len(labels_cnt)):
			
 
				             classes_num[labels_cnt[str(i)]] = 0
			
 
				-        with custom_open(image_path, "r") as f:
			
 
				+        with custom_open(anno_path, "r") as f:
			
 
				             lines = f.readlines()
			
 
				         for line in lines:
			
 
				             line = line.strip().split()
			
 
				-            classes_num[labels_cnt[line[1]]] += 1
			
 
				+            if dataset_type == "Cls":
			
 
				+                classes_num[labels_cnt[line[1]]] += 1
			
 
				+            elif dataset_type == "MLCls":
			
 
				+                for i, label in enumerate(line[1].split(",")):
			
 
				+                    if label == "1":
			
 
				+                        classes_num[labels_cnt[str(i)]] += 1
			
 
				+            else:
			
 
				+                raise ValueError(f"dataset_type {dataset_type} is not supported")
			
 
				         if tag == "train":
			
 
				             cnts_train = [cat_ids for cat_name, cat_ids in classes_num.items()]
			
 
				         elif tag == "val":
			
--- a/paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py
+++ b/paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py
@@ -12,7 +12,6 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-
			
 
				 import os
			
 
				 import os.path as osp
			
 
				 import random
			
@@ -20,10 +19,10 @@ from PIL import Image, ImageOps
 
				 from collections import defaultdict
			
 
				 
			
 
				 from .....utils.errors import DatasetFileNotFoundError, CheckFailedError
			
 
				-from .utils.visualizer import draw_label
			
 
				+from .utils.visualizer import draw_label, draw_multi_label
			
 
				 
			
 
				 
			
 
				-def check(dataset_dir, output, sample_num=10):
			
 
				+def check(dataset_dir, output, sample_num=10, dataset_type="Cls"):
			
 
				     """check dataset"""
			
 
				     dataset_dir = osp.abspath(dataset_dir)
			
 
				     # Custom dataset
			
@@ -31,7 +30,10 @@ def check(dataset_dir, output, sample_num=10):
 
				         raise DatasetFileNotFoundError(file_path=dataset_dir)
			
 
				 
			
 
				     tags = ["train", "val"]
			
 
				-    delim = " "
			
 
				+    if dataset_type == "MLCls":
			
 
				+        delim = "\t"
			
 
				+    else:
			
 
				+        delim = " "
			
 
				     valid_num_parts = 2
			
 
				 
			
 
				     sample_cnts = dict()
			
@@ -49,7 +51,7 @@ def check(dataset_dir, output, sample_num=10):
 
				     with open(label_file, "r", encoding="utf-8") as f:
			
 
				         all_lines = f.readlines()
			
 
				         for line in all_lines:
			
 
				-            substr = line.strip("\n").split(delim, 1)
			
 
				+            substr = line.strip("\n").split(" ", 1)
			
 
				             try:
			
 
				                 label_idx = int(substr[0])
			
 
				                 labels.append(label_idx)
			
@@ -103,7 +105,14 @@ def check(dataset_dir, output, sample_num=10):
 
				                     if len(sample_paths[tag]) < sample_num:
			
 
				                         img = Image.open(img_path)
			
 
				                         img = ImageOps.exif_transpose(img)
			
 
				-                        vis_im = draw_label(img, label, label_map_dict)
			
 
				+                        if dataset_type == "Cls":
			
 
				+                            vis_im = draw_label(img, label, label_map_dict)
			
 
				+                        elif dataset_type == "MLCls":
			
 
				+                            vis_im = draw_multi_label(img, label, label_map_dict)
			
 
				+                        else:
			
 
				+                            raise CheckFailedError(
			
 
				+                                f"Do not support dataset type '{dataset_type}', only support 'Cls' and 'MLCls'."
			
 
				+                            )
			
 
				                         vis_path = osp.join(vis_save_dir, osp.basename(file_name))
			
 
				                         vis_im.save(vis_path)
			
 
				                         sample_path = osp.join(
			
@@ -111,12 +120,20 @@ def check(dataset_dir, output, sample_num=10):
 
				                         )
			
 
				                         sample_paths[tag].append(sample_path)
			
 
				 
			
 
				-                    try:
			
 
				-                        label = int(label)
			
 
				-                    except (ValueError, TypeError) as e:
			
 
				-                        raise CheckFailedError(
			
 
				-                            f"Ensure that the second number in each line in {label_file} should be int."
			
 
				-                        ) from e
			
 
				+                    if dataset_type == "Cls":
			
 
				+                        try:
			
 
				+                            label = int(label)
			
 
				+                        except (ValueError, TypeError) as e:
			
 
				+                            raise CheckFailedError(
			
 
				+                                f"Ensure that the second number in each line in {label_file} should be int."
			
 
				+                            ) from e
			
 
				+                    elif dataset_type == "MLCls":
			
 
				+                        try:
			
 
				+                            label = list(map(int, label.split(",")))
			
 
				+                        except (ValueError, TypeError) as e:
			
 
				+                            raise CheckFailedError(
			
 
				+                                f"Ensure that the second number in each line in {label_file} should be int."
			
 
				+                            ) from e
			
 
				 
			
 
				     num_classes = max(labels) + 1
			
 
				 
			
--- a/paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py
+++ b/paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py
@@ -154,3 +154,47 @@ def draw_label(image, label, label_map_dict):
 
				     draw.text((text_x, text_y), label_map_dict[int(label)], fill=font_color, font=font)
			
 
				 
			
 
				     return image
			
 
				+
			
 
				+
			
 
				+def draw_multi_label(image, label, label_map_dict):
			
 
				+    labels = label.split(",")
			
 
				+    label_names = [
			
 
				+        label_map_dict[i] for i, label in enumerate(labels) if int(label) == 1
			
 
				+    ]
			
 
				+    image = image.convert("RGB")
			
 
				+    image_width, image_height = image.size
			
 
				+    font_size = int(image_width * 0.06)
			
 
				+
			
 
				+    font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size)
			
 
				+    text_lines = []
			
 
				+    row_width = 0
			
 
				+    row_height = 0
			
 
				+    row_text = "\t"
			
 
				+    for label_name in label_names:
			
 
				+        text = f"{label_name}\t"
			
 
				+        text_width, row_height = font.getsize(text)
			
 
				+        if row_width + text_width <= image_width:
			
 
				+            row_text += text
			
 
				+            row_width += text_width
			
 
				+        else:
			
 
				+            text_lines.append(row_text)
			
 
				+            row_text = "\t" + text
			
 
				+            row_width = text_width
			
 
				+    text_lines.append(row_text)
			
 
				+    color_list = colormap(rgb=True)
			
 
				+    color = tuple(color_list[0])
			
 
				+    new_image_height = image_height + len(text_lines) * int(row_height * 1.2)
			
 
				+    new_image = Image.new("RGB", (image_width, new_image_height), color)
			
 
				+    new_image.paste(image, (0, 0))
			
 
				+
			
 
				+    draw = ImageDraw.Draw(new_image)
			
 
				+    font_color = tuple(font_colormap(3))
			
 
				+    for i, text in enumerate(text_lines):
			
 
				+        text_width, _ = font.getsize(text)
			
 
				+        draw.text(
			
 
				+            (0, image_height + i * int(row_height * 1.2)),
			
 
				+            text,
			
 
				+            fill=font_color,
			
 
				+            font=font,
			
 
				+        )
			
 
				+    return new_image
			
--- a/paddlex/modules/image_classification/evaluator.py
+++ b/paddlex/modules/image_classification/evaluator.py
@@ -12,9 +12,8 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-
			
 
				 from ..base import BaseEvaluator
			
 
				-from .model_list import MODELS
			
 
				+from .model_list import MODELS, ML_MODELS
			
 
				 
			
 
				 
			
 
				 class ClsEvaluator(BaseEvaluator):
			
@@ -42,3 +41,17 @@ class ClsEvaluator(BaseEvaluator):
 
				             "weight_path": self.eval_config.weight_path,
			
 
				             "device": self.get_device(using_device_number=1),
			
 
				         }
			
 
				+
			
 
				+
			
 
				+class MlClsEvaluator(ClsEvaluator):
			
 
				+    entities = ML_MODELS
			
 
				+
			
 
				+    def update_config(self):
			
 
				+        """update evalution config"""
			
 
				+        if self.eval_config.log_interval:
			
 
				+            self.pdx_config.update_log_interval(self.eval_config.log_interval)
			
 
				+        if self.pdx_config["Arch"]["name"] == "DistillationModel":
			
 
				+            self.pdx_config.update_teacher_model(pretrained=False)
			
 
				+            self.pdx_config.update_student_model(pretrained=False)
			
 
				+        self.pdx_config.update_dataset(self.global_config.dataset_dir, "MLClsDataset")
			
 
				+        self.pdx_config.update_pretrained_weights(self.eval_config.weight_path)
			
--- a/paddlex/modules/image_classification/model_list.py
+++ b/paddlex/modules/image_classification/model_list.py
@@ -79,3 +79,12 @@ MODELS = [
 
				     "SwinTransformer_large_patch4_window7_224",
			
 
				     "SwinTransformer_large_patch4_window12_384",
			
 
				 ]
			
 
				+
			
 
				+ML_MODELS = [
			
 
				+    "ResNet50_ML",
			
 
				+    "PP-LCNet_x1_0_ML",
			
 
				+    "PP-HGNetV2-B0_ML",
			
 
				+    "PP-HGNetV2-B4_ML",
			
 
				+    "PP-HGNetV2-B6_ML",
			
 
				+    "CLIP_vit_base_patch16_448_ML",
			
 
				+]
			
--- a/paddlex/modules/image_classification/predictor/__init__.py
+++ b/paddlex/modules/image_classification/predictor/__init__.py
@@ -12,6 +12,6 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-
			
 
				 from .predictor import ClsPredictor
			
 
				+from .predictor_ml import MLClsPredictor
			
 
				 from . import transforms
			
--- a/paddlex/modules/image_classification/predictor/predictor_ml.py
+++ b/paddlex/modules/image_classification/predictor/predictor_ml.py
@@ -0,0 +1,40 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+# 
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import os
			
 
				+import numpy as np
			
 
				+from pathlib import Path
			
 
				+
			
 
				+from ...base import BasePredictor
			
 
				+from ...base.predictor.transforms import image_common
			
 
				+from .keys import ClsKeys as K
			
 
				+from .utils import InnerConfig
			
 
				+from ....utils import logging
			
 
				+from . import transforms as T
			
 
				+from .predictor import ClsPredictor
			
 
				+from ..model_list import ML_MODELS
			
 
				+
			
 
				+
			
 
				+class MLClsPredictor(ClsPredictor, BasePredictor):
			
 
				+    """ MLClssification Predictor """
			
 
				+    entities = ML_MODELS
			
 
				+
			
 
				+    def _get_post_transforms_from_config(self):
			
 
				+        """ get postprocess transforms """
			
 
				+        post_transforms = self.other_src.post_transforms
			
 
				+        post_transforms.extend([
			
 
				+            T.PrintResult(), T.SaveMLClsResults(self.output,
			
 
				+                                                self.other_src.labels)
			
 
				+        ])
			
 
				+        return post_transforms
			
--- a/paddlex/modules/image_classification/predictor/transforms.py
+++ b/paddlex/modules/image_classification/predictor/transforms.py
@@ -17,7 +17,7 @@ import json
 
				 from pathlib import Path
			
 
				 import numpy as np
			
 
				 import PIL
			
 
				-from PIL import ImageDraw, ImageFont
			
 
				+from PIL import ImageDraw, ImageFont, Image
			
 
				 
			
 
				 from .keys import ClsKeys as K
			
 
				 from ...base import BaseTransform
			
@@ -25,7 +25,13 @@ from ...base.predictor.io import ImageWriter, ImageReader
 
				 from ....utils.fonts import PINGFANG_FONT_FILE_PATH
			
 
				 from ....utils import logging
			
 
				 
			
 
				-__all__ = ["Topk", "NormalizeFeatures", "PrintResult", "SaveClsResults"]
			
 
				+__all__ = [
			
 
				+    "Topk",
			
 
				+    "NormalizeFeatures",
			
 
				+    "PrintResult",
			
 
				+    "SaveClsResults",
			
 
				+    "MultiLabelThreshOutput",
			
 
				+]
			
 
				 
			
 
				 
			
 
				 def _parse_class_id_map(class_ids):
			
@@ -282,3 +288,101 @@ class SaveClsResults(BaseTransform):
 
				     def get_output_keys(cls):
			
 
				         """get output keys"""
			
 
				         return []
			
 
				+
			
 
				+
			
 
				+class MultiLabelThreshOutput(BaseTransform):
			
 
				+    def __init__(self, threshold=0.5, class_ids=None, delimiter=None):
			
 
				+        super().__init__()
			
 
				+        assert isinstance(threshold, (float,))
			
 
				+        self.threshold = threshold
			
 
				+        self.delimiter = delimiter if delimiter is not None else " "
			
 
				+        self.class_id_map = _parse_class_id_map(class_ids)
			
 
				+
			
 
				+    def apply(self, data):
			
 
				+        """apply"""
			
 
				+        y = []
			
 
				+        x = data[K.CLS_PRED]
			
 
				+        pred_index = np.where(x >= self.threshold)[0].astype("int32")
			
 
				+        index = pred_index[np.argsort(x[pred_index])][::-1]
			
 
				+        clas_id_list = []
			
 
				+        score_list = []
			
 
				+        label_name_list = []
			
 
				+        for i in index:
			
 
				+            clas_id_list.append(i.item())
			
 
				+            score_list.append(x[i].item())
			
 
				+            if self.class_id_map is not None:
			
 
				+                label_name_list.append(self.class_id_map[i.item()])
			
 
				+        result = {
			
 
				+            "class_ids": clas_id_list,
			
 
				+            "scores": np.around(score_list, decimals=5).tolist(),
			
 
				+            "label_names": label_name_list,
			
 
				+        }
			
 
				+        y.append(result)
			
 
				+        data[K.CLS_RESULT] = y
			
 
				+        return data
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_input_keys(cls):
			
 
				+        """get input keys"""
			
 
				+        return [K.IM_PATH, K.CLS_PRED]
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_output_keys(cls):
			
 
				+        """get output keys"""
			
 
				+        return [K.CLS_RESULT]
			
 
				+
			
 
				+
			
 
				+class SaveMLClsResults(SaveClsResults, BaseTransform):
			
 
				+    def __init__(self, save_dir, class_ids=None):
			
 
				+        super().__init__(save_dir=save_dir)
			
 
				+        self.save_dir = save_dir
			
 
				+        self.class_id_map = _parse_class_id_map(class_ids)
			
 
				+        self._writer = ImageWriter(backend="pillow")
			
 
				+
			
 
				+    def apply(self, data):
			
 
				+        """Draw label on image"""
			
 
				+        ori_path = data[K.IM_PATH]
			
 
				+        results = data[K.CLS_RESULT]
			
 
				+        scores = results[0]["scores"]
			
 
				+        label_names = results[0]["label_names"]
			
 
				+        file_name = os.path.basename(ori_path)
			
 
				+        save_path = os.path.join(self.save_dir, file_name)
			
 
				+        image = ImageReader(backend="pil").read(ori_path)
			
 
				+        image = image.convert("RGB")
			
 
				+        image_width, image_height = image.size
			
 
				+        font_size = int(image_width * 0.06)
			
 
				+
			
 
				+        font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size)
			
 
				+        text_lines = []
			
 
				+        row_width = 0
			
 
				+        row_height = 0
			
 
				+        row_text = "\t"
			
 
				+        for label_name, score in zip(label_names, scores):
			
 
				+            text = f"{label_name}({score})\t"
			
 
				+            text_width, row_height = font.getsize(text)
			
 
				+            if row_width + text_width <= image_width:
			
 
				+                row_text += text
			
 
				+                row_width += text_width
			
 
				+            else:
			
 
				+                text_lines.append(row_text)
			
 
				+                row_text = "\t" + text
			
 
				+                row_width = text_width
			
 
				+        text_lines.append(row_text)
			
 
				+        color_list = self._get_colormap(rgb=True)
			
 
				+        color = tuple(color_list[0])
			
 
				+        new_image_height = image_height + len(text_lines) * int(row_height * 1.2)
			
 
				+        new_image = Image.new("RGB", (image_width, new_image_height), color)
			
 
				+        new_image.paste(image, (0, 0))
			
 
				+
			
 
				+        draw = ImageDraw.Draw(new_image)
			
 
				+        font_color = tuple(self._get_font_colormap(3))
			
 
				+        for i, text in enumerate(text_lines):
			
 
				+            text_width, _ = font.getsize(text)
			
 
				+            draw.text(
			
 
				+                (0, image_height + i * int(row_height * 1.2)),
			
 
				+                text,
			
 
				+                fill=font_color,
			
 
				+                font=font,
			
 
				+            )
			
 
				+        self._write_image(save_path, new_image)
			
 
				+        return data
			
--- a/paddlex/modules/image_classification/predictor/utils.py
+++ b/paddlex/modules/image_classification/predictor/utils.py
@@ -12,7 +12,6 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-
			
 
				 import codecs
			
 
				 
			
 
				 import yaml
			
@@ -80,6 +79,11 @@ class InnerConfig(object):
 
				                     topk=tfs_cfg["Topk"]["topk"],
			
 
				                     class_ids=tfs_cfg["Topk"].get("label_list", None),
			
 
				                 )
			
 
				+            elif tf_key == "MultiLabelThreshOutput":
			
 
				+                tf = T.MultiLabelThreshOutput(
			
 
				+                    threshold=0.5,
			
 
				+                    class_ids=tfs_cfg["MultiLabelThreshOutput"].get("label_list", None),
			
 
				+                )
			
 
				             elif tf_key in IGNORE_OPS:
			
 
				                 continue
			
 
				             else:
			
@@ -90,4 +94,12 @@ class InnerConfig(object):
 
				     @property
			
 
				     def labels(self):
			
 
				         """the labels in inner config"""
			
 
				-        return self.inner_cfg["PostProcess"]["Topk"].get("label_list", None)
			
 
				+        postprocess_name = self.inner_cfg["PostProcess"].keys()
			
 
				+        if "Topk" in postprocess_name:
			
 
				+            return self.inner_cfg["PostProcess"]["Topk"].get("label_list", None)
			
 
				+        elif "MultiLabelThreshOutput" in postprocess_name:
			
 
				+            return self.inner_cfg["PostProcess"]["MultiLabelThreshOutput"].get(
			
 
				+                "label_list", None
			
 
				+            )
			
 
				+        else:
			
 
				+            return None
			
--- a/paddlex/modules/image_classification/trainer_ml.py
+++ b/paddlex/modules/image_classification/trainer_ml.py
@@ -0,0 +1,64 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+# 
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import json
			
 
				+import shutil
			
 
				+import paddle
			
 
				+from pathlib import Path
			
 
				+
			
 
				+from ..base import BaseTrainer, BaseTrainDeamon
			
 
				+from .trainer import ClsTrainer, ClsTrainDeamon
			
 
				+from .model_list import ML_MODELS
			
 
				+from ...utils.config import AttrDict
			
 
				+
			
 
				+
			
 
				+class MLClsTrainer(ClsTrainer, BaseTrainer):
			
 
				+    """ Multi Label Image Classification Model Trainer """
			
 
				+    entities = ML_MODELS
			
 
				+
			
 
				+    def update_config(self):
			
 
				+        """update training config
			
 
				+        """
			
 
				+        if self.train_config.log_interval:
			
 
				+            self.pdx_config.update_log_interval(self.train_config.log_interval)
			
 
				+        if self.train_config.eval_interval:
			
 
				+            self.pdx_config.update_eval_interval(
			
 
				+                self.train_config.eval_interval)
			
 
				+        if self.train_config.save_interval:
			
 
				+            self.pdx_config.update_save_interval(
			
 
				+                self.train_config.save_interval)
			
 
				+
			
 
				+        self.pdx_config.update_dataset(self.global_config.dataset_dir,
			
 
				+                                       "MLClsDataset")
			
 
				+        if self.train_config.num_classes is not None:
			
 
				+            self.pdx_config.update_num_classes(self.train_config.num_classes)
			
 
				+        if self.train_config.pretrain_weight_path and self.train_config.pretrain_weight_path != "":
			
 
				+            self.pdx_config.update_pretrained_weights(
			
 
				+                self.train_config.pretrain_weight_path)
			
 
				+
			
 
				+        label_dict_path = Path(self.global_config.dataset_dir).joinpath(
			
 
				+            "label.txt")
			
 
				+        if label_dict_path.exists():
			
 
				+            self.dump_label_dict(label_dict_path)
			
 
				+        if self.train_config.batch_size is not None:
			
 
				+            self.pdx_config.update_batch_size(self.train_config.batch_size)
			
 
				+        if self.train_config.learning_rate is not None:
			
 
				+            self.pdx_config.update_learning_rate(
			
 
				+                self.train_config.learning_rate)
			
 
				+        if self.train_config.epochs_iters is not None:
			
 
				+            self.pdx_config._update_epochs(self.train_config.epochs_iters)
			
 
				+        if self.train_config.warmup_steps is not None:
			
 
				+            self.pdx_config.update_warmup_epochs(self.train_config.warmup_steps)
			
 
				+        if self.global_config.output is not None:
			
 
				+            self.pdx_config._update_output_dir(self.global_config.output)
			
--- a/paddlex/repo_apis/PaddleClas_api/cls/config.py
+++ b/paddlex/repo_apis/PaddleClas_api/cls/config.py
@@ -84,7 +84,7 @@ class ClsConfig(BaseConfig):
 
				         else:
			
 
				             train_list_path = f"{dataset_path}/train.txt"
			
 
				 
			
 
				-        if dataset_type in ["ClsDataset"]:
			
 
				+        if dataset_type in ["ClsDataset", "MLClsDataset"]:
			
 
				             ds_cfg = [
			
 
				                 f"DataLoader.Train.dataset.name={dataset_type}",
			
 
				                 f"DataLoader.Train.dataset.image_root={dataset_path}",
			
@@ -129,7 +129,12 @@ class ClsConfig(BaseConfig):
 
				         Args:
			
 
				             learning_rate (float): the learning rate value to set.
			
 
				         """
			
 
				-        _cfg = [f"Optimizer.lr.learning_rate={learning_rate}"]
			
 
				+        if self._dict["Optimizer"]["lr"].get("learning_rate", None) is not None:
			
 
				+            _cfg = [f"Optimizer.lr.learning_rate={learning_rate}"]
			
 
				+        elif self._dict["Optimizer"]["lr"].get("max_learning_rate", None) is not None:
			
 
				+            _cfg = [f"Optimizer.lr.max_learning_rate={learning_rate}"]
			
 
				+        else:
			
 
				+            raise ValueError("unsupported lr format")
			
 
				         self.update(_cfg)
			
 
				 
			
 
				     def update_warmup_epochs(self, warmup_epochs: int):
			
@@ -176,8 +181,32 @@ indicating that no pretrained model to be used."
 
				         if self._get_arch_name() == "DistillationModel":
			
 
				             update_str_list.append(f"Arch.models.0.Teacher.class_num={num_classes}")
			
 
				             update_str_list.append(f"Arch.models.1.Student.class_num={num_classes}")
			
 
				+        ml_decoder = self.dict.get("MLDecoder", None)
			
 
				+        if ml_decoder is not None:
			
 
				+            self.update_ml_query_num(num_classes)
			
 
				+            self.update_ml_class_num(num_classes)
			
 
				         self.update(update_str_list)
			
 
				 
			
 
				+    def update_ml_query_num(self, query_num: int):
			
 
				+        """update MLDecoder query number
			
 
				+        Args:
			
 
				+            query_num (int): the query number value to set,qury_num should be less than or equal to num_classes.
			
 
				+        """
			
 
				+        base_query_num = self.dict.get("MLDecoder", {}).get("query_num", None)
			
 
				+        if base_query_num is not None:
			
 
				+            _cfg = [f"MLDecoder.query_num={query_num}"]
			
 
				+            self.update(_cfg)
			
 
				+
			
 
				+    def update_ml_class_num(self, class_num: int):
			
 
				+        """update MLDecoder query number
			
 
				+        Args:
			
 
				+            num_classes (int): the classes number value to set.
			
 
				+        """
			
 
				+        base_class_num = self.dict.get("MLDecoder", {}).get("class_num", None)
			
 
				+        if base_class_num is not None:
			
 
				+            _cfg = [f"MLDecoder.class_num={class_num}"]
			
 
				+            self.update(_cfg)
			
 
				+
			
 
				     def _update_slim_config(self, slim_config_path: str):
			
 
				         """update slim settings
			
 
				 
			
--- a/paddlex/repo_apis/PaddleClas_api/cls/register.py
+++ b/paddlex/repo_apis/PaddleClas_api/cls/register.py
@@ -639,3 +639,63 @@ register_model_info(
 
				         "supported_apis": ["train", "evaluate", "predict", "export"],
			
 
				     }
			
 
				 )
			
 
				+
			
 
				+register_model_info(
			
 
				+    {
			
 
				+        "model_name": "PP-LCNet_x1_0_ML",
			
 
				+        "suite": "Cls",
			
 
				+        "config_path": osp.join(PDX_CONFIG_DIR, "PP-LCNet_x1_0_ML.yaml"),
			
 
				+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
			
 
				+        "infer_config": "deploy/configs/inference_cls.yaml",
			
 
				+    }
			
 
				+)
			
 
				+
			
 
				+register_model_info(
			
 
				+    {
			
 
				+        "model_name": "ResNet50_ML",
			
 
				+        "suite": "Cls",
			
 
				+        "config_path": osp.join(PDX_CONFIG_DIR, "ResNet50_ML.yaml"),
			
 
				+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
			
 
				+        "infer_config": "deploy/configs/inference_cls.yaml",
			
 
				+    }
			
 
				+)
			
 
				+
			
 
				+register_model_info(
			
 
				+    {
			
 
				+        "model_name": "PP-HGNetV2-B0_ML",
			
 
				+        "suite": "Cls",
			
 
				+        "config_path": osp.join(PDX_CONFIG_DIR, "PP-HGNetV2-B0_ML.yaml"),
			
 
				+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
			
 
				+        "infer_config": "deploy/configs/inference_cls.yaml",
			
 
				+    }
			
 
				+)
			
 
				+
			
 
				+register_model_info(
			
 
				+    {
			
 
				+        "model_name": "PP-HGNetV2-B4_ML",
			
 
				+        "suite": "Cls",
			
 
				+        "config_path": osp.join(PDX_CONFIG_DIR, "PP-HGNetV2-B4_ML.yaml"),
			
 
				+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
			
 
				+        "infer_config": "deploy/configs/inference_cls.yaml",
			
 
				+    }
			
 
				+)
			
 
				+
			
 
				+register_model_info(
			
 
				+    {
			
 
				+        "model_name": "PP-HGNetV2-B6_ML",
			
 
				+        "suite": "Cls",
			
 
				+        "config_path": osp.join(PDX_CONFIG_DIR, "PP-HGNetV2-B6_ML.yaml"),
			
 
				+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
			
 
				+        "infer_config": "deploy/configs/inference_cls.yaml",
			
 
				+    }
			
 
				+)
			
 
				+
			
 
				+register_model_info(
			
 
				+    {
			
 
				+        "model_name": "CLIP_vit_base_patch16_448_ML",
			
 
				+        "suite": "Cls",
			
 
				+        "config_path": osp.join(PDX_CONFIG_DIR, "CLIP_vit_base_patch16_448_ML.yaml"),
			
 
				+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
			
 
				+        "infer_config": "deploy/configs/inference_cls.yaml",
			
 
				+    }
			
 
				+)
			
--- a/paddlex/repo_apis/PaddleClas_api/cls/runner.py
+++ b/paddlex/repo_apis/PaddleClas_api/cls/runner.py
@@ -12,7 +12,6 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-
			
 
				 import os
			
 
				 import tempfile
			
 
				 
			
@@ -23,8 +22,6 @@ from ...base.utils.subprocess import CompletedProcess
 
				 class ClsRunner(BaseRunner):
			
 
				     """Cls Runner"""
			
 
				 
			
 
				-    _INFER_CONFIG_REL_PATH = os.path.join("deploy", "configs", "inference_cls.yaml")
			
 
				-
			
 
				     def train(
			
 
				         self,
			
 
				         config_path: str,
			
@@ -125,8 +122,6 @@ class ClsRunner(BaseRunner):
 
				             *cli_args,
			
 
				             "-o",
			
 
				             "Global.export_for_fd=True",
			
 
				-            "-o",
			
 
				-            f"Global.infer_config_path={os.path.join(self.runner_root_path, self._INFER_CONFIG_REL_PATH)}",
			
 
				         ]
			
 
				 
			
 
				         cp = self.run_cmd(cmd, switch_wdir=True, echo=True, silent=False)
			
--- a/paddlex/repo_apis/PaddleClas_api/configs/CLIP_vit_base_patch16_448_ML.yaml
+++ b/paddlex/repo_apis/PaddleClas_api/configs/CLIP_vit_base_patch16_448_ML.yaml
@@ -0,0 +1,168 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 10
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 40
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 448, 448]
			
 
				+  save_inference_dir: ./inference
			
 
				+  # training model under @to_static
			
 
				+  to_static: False
			
 
				+  use_multilabel: True
			
 
				+
			
 
				+# mixed precision
			
 
				+AMP:
			
 
				+  use_amp: False
			
 
				+  use_fp16_test: False
			
 
				+  scale_loss: 128.0
			
 
				+  use_dynamic_loss_scaling: True
			
 
				+  use_promote: False
			
 
				+  # O1: mixed fp16, O2: pure fp16
			
 
				+  level: O2
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: CLIP_vit_base_patch16_224
			
 
				+  class_num: 80
			
 
				+  return_embed: False
			
 
				+  use_fused_attn: False # fused attn can be used in AMP O2 mode only
			
 
				+  pretrained: True
			
 
				+  use_ml_decoder: True
			
 
				+
			
 
				+# ml-decoder head
			
 
				+MLDecoder:
			
 
				+  query_num: 80 # default: 80, query_num <= class_num
			
 
				+  in_channels: 768
			
 
				+  remove_layers: []
			
 
				+  replace_layer: 'head'
			
 
				+
			
 
				+# loss function config for training/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - MultiLabelAsymmetricLoss:
			
 
				+        weight: 1.0
			
 
				+        gamma_pos: 0
			
 
				+        gamma_neg: 4
			
 
				+        clip: 0.05
			
 
				+        disable_focal_loss_grad: True
			
 
				+
			
 
				+  Eval:
			
 
				+    - MultiLabelAsymmetricLoss:
			
 
				+        weight: 1.0
			
 
				+        gamma_pos: 0
			
 
				+        gamma_neg: 4
			
 
				+        clip: 0.05
			
 
				+        disable_focal_loss_grad: True
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: AdamW
			
 
				+  beta1: 0.9
			
 
				+  beta2: 0.999
			
 
				+  epsilon: 1e-8
			
 
				+  weight_decay: 1e-4 
			
 
				+  one_dim_param_no_weight_decay: True
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 5e-5
			
 
				+    eta_min: 1e-10
			
 
				+    warmup_epoch: 5
			
 
				+    warmup_start_lr: 1e-6
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: MultiLabelDataset
			
 
				+      image_root: dataset/coco_ml/images
			
 
				+      cls_label_path: dataset/coco_ml/train.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            size: 448
			
 
				+            interpolation: bilinear
			
 
				+            backend: pil
			
 
				+        - Cutout:
			
 
				+            length: 224
			
 
				+            fill_value: none
			
 
				+        - RandAugmentV4:
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 16
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 8
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset:
			
 
				+      name: MultiLabelDataset
			
 
				+      image_root: dataset/coco_ml/images
			
 
				+      cls_label_path: dataset/coco_ml/val.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            size: 448
			
 
				+            interpolation: bilinear
			
 
				+            backend: pil
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 4
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 8
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: deploy/images/coco_000000570688.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        size: 448
			
 
				+        interpolation: bilinear
			
 
				+        backend: pil
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: MultiLabelThreshOutput
			
 
				+    threshold: 0.5
			
 
				+    class_id_map_file: dataset/coco_ml/label.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+  Eval:
			
 
				+    - MultiLabelMAP:
			
 
				+        # support list: integral, 11point
			
 
				+        # default: integral
			
 
				+        map_type: integral
			
--- a/paddlex/repo_apis/PaddleClas_api/configs/PP-HGNetV2-B0_ML.yaml
+++ b/paddlex/repo_apis/PaddleClas_api/configs/PP-HGNetV2-B0_ML.yaml
@@ -0,0 +1,164 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 10
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 40
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 448, 448]
			
 
				+  save_inference_dir: ./inference
			
 
				+  # training model under @to_static
			
 
				+  to_static: False
			
 
				+  use_multilabel: True
			
 
				+
			
 
				+# mixed precision
			
 
				+AMP:
			
 
				+  use_amp: True
			
 
				+  use_fp16_test: False
			
 
				+  scale_loss: 128.0
			
 
				+  use_dynamic_loss_scaling: True
			
 
				+  use_promote: False
			
 
				+  # O1: mixed fp16, O2: pure fp16
			
 
				+  level: O1
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: PPHGNetV2_B0
			
 
				+  class_num: 80
			
 
				+  pretrained: True # ssld pretrained
			
 
				+  use_ml_decoder: True
			
 
				+
			
 
				+# ml-decoder head
			
 
				+MLDecoder:
			
 
				+  query_num: 80 # default: 80, query_num <= class_num
			
 
				+  in_channels: 2048
			
 
				+
			
 
				+# loss function config for training/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - MultiLabelAsymmetricLoss:
			
 
				+        weight: 1.0
			
 
				+        gamma_pos: 0
			
 
				+        gamma_neg: 4
			
 
				+        clip: 0.05
			
 
				+        disable_focal_loss_grad: True
			
 
				+
			
 
				+  Eval:
			
 
				+    - MultiLabelAsymmetricLoss:
			
 
				+        weight: 1.0
			
 
				+        gamma_pos: 0
			
 
				+        gamma_neg: 4
			
 
				+        clip: 0.05
			
 
				+        disable_focal_loss_grad: True
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: AdamW
			
 
				+  beta1: 0.9
			
 
				+  beta2: 0.999
			
 
				+  epsilon: 1e-8
			
 
				+  weight_decay: 1e-4 
			
 
				+  one_dim_param_no_weight_decay: True
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 1e-4
			
 
				+    eta_min: 1e-10
			
 
				+    warmup_epoch: 5
			
 
				+    warmup_start_lr: 1e-6
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: MultiLabelDataset
			
 
				+      image_root: dataset/coco_ml/images
			
 
				+      cls_label_path: dataset/coco_ml/train.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            size: 448
			
 
				+            interpolation: bilinear
			
 
				+            backend: pil
			
 
				+        - Cutout:
			
 
				+            length: 224
			
 
				+            fill_value: none
			
 
				+        - RandAugmentV4:
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 8
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset:
			
 
				+      name: MultiLabelDataset
			
 
				+      image_root: dataset/coco_ml/images
			
 
				+      cls_label_path: dataset/coco_ml/val.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            size: 448
			
 
				+            interpolation: bilinear
			
 
				+            backend: pil
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 32
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 8
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: deploy/images/coco_000000570688.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        size: 448
			
 
				+        interpolation: bilinear
			
 
				+        backend: pil
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: MultiLabelThreshOutput
			
 
				+    threshold: 0.5
			
 
				+    class_id_map_file: dataset/coco_ml/label.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+  Eval:
			
 
				+    - MultiLabelMAP:
			
 
				+        # support list: integral, 11point
			
 
				+        # default: integral
			
 
				+        map_type: integral
			
--- a/paddlex/repo_apis/PaddleClas_api/configs/PP-HGNetV2-B4_ML.yaml
+++ b/paddlex/repo_apis/PaddleClas_api/configs/PP-HGNetV2-B4_ML.yaml
@@ -0,0 +1,164 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 10
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 40
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 448, 448]
			
 
				+  save_inference_dir: ./inference
			
 
				+  # training model under @to_static
			
 
				+  to_static: False
			
 
				+  use_multilabel: True
			
 
				+
			
 
				+# mixed precision
			
 
				+AMP:
			
 
				+  use_amp: True
			
 
				+  use_fp16_test: False
			
 
				+  scale_loss: 128.0
			
 
				+  use_dynamic_loss_scaling: True
			
 
				+  use_promote: False
			
 
				+  # O1: mixed fp16, O2: pure fp16
			
 
				+  level: O1
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: PPHGNetV2_B4
			
 
				+  class_num: 80
			
 
				+  pretrained: True # ssld pretrained
			
 
				+  use_ml_decoder: True
			
 
				+
			
 
				+# ml-decoder head
			
 
				+MLDecoder:
			
 
				+  query_num: 80 # default: 80, query_num <= class_num
			
 
				+  in_channels: 2048
			
 
				+
			
 
				+# loss function config for training/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - MultiLabelAsymmetricLoss:
			
 
				+        weight: 1.0
			
 
				+        gamma_pos: 0
			
 
				+        gamma_neg: 4
			
 
				+        clip: 0.05
			
 
				+        disable_focal_loss_grad: True
			
 
				+
			
 
				+  Eval:
			
 
				+    - MultiLabelAsymmetricLoss:
			
 
				+        weight: 1.0
			
 
				+        gamma_pos: 0
			
 
				+        gamma_neg: 4
			
 
				+        clip: 0.05
			
 
				+        disable_focal_loss_grad: True
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: AdamW
			
 
				+  beta1: 0.9
			
 
				+  beta2: 0.999
			
 
				+  epsilon: 1e-8
			
 
				+  weight_decay: 1e-4 
			
 
				+  one_dim_param_no_weight_decay: True
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 1e-4
			
 
				+    eta_min: 1e-10
			
 
				+    warmup_epoch: 5
			
 
				+    warmup_start_lr: 1e-6
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: MultiLabelDataset
			
 
				+      image_root: dataset/coco_ml/images
			
 
				+      cls_label_path: dataset/coco_ml/train.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            size: 448
			
 
				+            interpolation: bilinear
			
 
				+            backend: pil
			
 
				+        - Cutout:
			
 
				+            length: 224
			
 
				+            fill_value: none
			
 
				+        - RandAugmentV4:
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 8
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset:
			
 
				+      name: MultiLabelDataset
			
 
				+      image_root: dataset/coco_ml/images
			
 
				+      cls_label_path: dataset/coco_ml/val.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            size: 448
			
 
				+            interpolation: bilinear
			
 
				+            backend: pil
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 16
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 8
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: deploy/images/coco_000000570688.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        size: 448
			
 
				+        interpolation: bilinear
			
 
				+        backend: pil
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: MultiLabelThreshOutput
			
 
				+    threshold: 0.5
			
 
				+    class_id_map_file: dataset/coco_ml/label.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+  Eval:
			
 
				+    - MultiLabelMAP:
			
 
				+        # support list: integral, 11point
			
 
				+        # default: integral
			
 
				+        map_type: integral
			
--- a/paddlex/repo_apis/PaddleClas_api/configs/PP-HGNetV2-B6_ML.yaml
+++ b/paddlex/repo_apis/PaddleClas_api/configs/PP-HGNetV2-B6_ML.yaml
@@ -0,0 +1,164 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 10
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 40
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 448, 448]
			
 
				+  save_inference_dir: ./inference
			
 
				+  # training model under @to_static
			
 
				+  to_static: False
			
 
				+  use_multilabel: True
			
 
				+
			
 
				+# mixed precision
			
 
				+AMP:
			
 
				+  use_amp: True
			
 
				+  use_fp16_test: False
			
 
				+  scale_loss: 128.0
			
 
				+  use_dynamic_loss_scaling: True
			
 
				+  use_promote: False
			
 
				+  # O1: mixed fp16, O2: pure fp16
			
 
				+  level: O1
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: PPHGNetV2_B6
			
 
				+  class_num: 80
			
 
				+  pretrained: True # ssld pretrained
			
 
				+  use_ml_decoder: True
			
 
				+
			
 
				+# ml-decoder head
			
 
				+MLDecoder:
			
 
				+  query_num: 80 # default: 80, query_num <= class_num
			
 
				+  in_channels: 2048
			
 
				+
			
 
				+# loss function config for training/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - MultiLabelAsymmetricLoss:
			
 
				+        weight: 1.0
			
 
				+        gamma_pos: 0
			
 
				+        gamma_neg: 4
			
 
				+        clip: 0.05
			
 
				+        disable_focal_loss_grad: True
			
 
				+
			
 
				+  Eval:
			
 
				+    - MultiLabelAsymmetricLoss:
			
 
				+        weight: 1.0
			
 
				+        gamma_pos: 0
			
 
				+        gamma_neg: 4
			
 
				+        clip: 0.05
			
 
				+        disable_focal_loss_grad: True
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: AdamW
			
 
				+  beta1: 0.9
			
 
				+  beta2: 0.999
			
 
				+  epsilon: 1e-8
			
 
				+  weight_decay: 1e-4 
			
 
				+  one_dim_param_no_weight_decay: True
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 1e-4
			
 
				+    eta_min: 1e-10
			
 
				+    warmup_epoch: 5
			
 
				+    warmup_start_lr: 1e-6
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: MultiLabelDataset
			
 
				+      image_root: dataset/coco_ml/images
			
 
				+      cls_label_path: dataset/coco_ml/train.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            size: 448
			
 
				+            interpolation: bilinear
			
 
				+            backend: pil
			
 
				+        - Cutout:
			
 
				+            length: 224
			
 
				+            fill_value: none
			
 
				+        - RandAugmentV4:
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 8
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 8
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset:
			
 
				+      name: MultiLabelDataset
			
 
				+      image_root: dataset/coco_ml/images
			
 
				+      cls_label_path: dataset/coco_ml/val.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            size: 448
			
 
				+            interpolation: bilinear
			
 
				+            backend: pil
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 4
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 8
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: deploy/images/coco_000000570688.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        size: 448
			
 
				+        interpolation: bilinear
			
 
				+        backend: pil
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: MultiLabelThreshOutput
			
 
				+    threshold: 0.5
			
 
				+    class_id_map_file: dataset/coco_ml/label.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+  Eval:
			
 
				+    - MultiLabelMAP:
			
 
				+        # support list: integral, 11point
			
 
				+        # default: integral
			
 
				+        map_type: integral
			
--- a/paddlex/repo_apis/PaddleClas_api/configs/PP-LCNet_x1_0_ML.yaml
+++ b/paddlex/repo_apis/PaddleClas_api/configs/PP-LCNet_x1_0_ML.yaml
@@ -0,0 +1,166 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 10
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 40
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 448, 448]
			
 
				+  save_inference_dir: ./inference
			
 
				+  # training model under @to_static
			
 
				+  to_static: False
			
 
				+  use_multilabel: True
			
 
				+
			
 
				+# mixed precision
			
 
				+AMP:
			
 
				+  use_amp: True
			
 
				+  use_fp16_test: False
			
 
				+  scale_loss: 128.0
			
 
				+  use_dynamic_loss_scaling: True
			
 
				+  use_promote: False
			
 
				+  # O1: mixed fp16, O2: pure fp16
			
 
				+  level: O1
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: PPLCNet_x1_0
			
 
				+  class_num: 80
			
 
				+  pretrained: True
			
 
				+  use_ml_decoder: True
			
 
				+
			
 
				+# ml-decoder head
			
 
				+MLDecoder:
			
 
				+  query_num: 80 # default: 80, query_num <= class_num
			
 
				+  class_num: 80
			
 
				+  in_channels: 1280
			
 
				+
			
 
				+
			
 
				+# loss function config for training/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - MultiLabelAsymmetricLoss:
			
 
				+        weight: 1.0
			
 
				+        gamma_pos: 0
			
 
				+        gamma_neg: 4
			
 
				+        clip: 0.05
			
 
				+        disable_focal_loss_grad: True
			
 
				+
			
 
				+  Eval:
			
 
				+    - MultiLabelAsymmetricLoss:
			
 
				+        weight: 1.0
			
 
				+        gamma_pos: 0
			
 
				+        gamma_neg: 4
			
 
				+        clip: 0.05
			
 
				+        disable_focal_loss_grad: True
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: AdamW
			
 
				+  beta1: 0.9
			
 
				+  beta2: 0.999
			
 
				+  epsilon: 1e-8
			
 
				+  weight_decay: 1e-4 
			
 
				+  one_dim_param_no_weight_decay: True
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 1e-4
			
 
				+    eta_min: 1e-10
			
 
				+    warmup_epoch: 5
			
 
				+    warmup_start_lr: 1e-6
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: MultiLabelDataset
			
 
				+      image_root: dataset/coco_ml/images
			
 
				+      cls_label_path: dataset/coco_ml/train.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            size: 448
			
 
				+            interpolation: bilinear
			
 
				+            backend: pil
			
 
				+        - Cutout:
			
 
				+            length: 224
			
 
				+            fill_value: none
			
 
				+        - RandAugmentV4:
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 8
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset:
			
 
				+      name: MultiLabelDataset
			
 
				+      image_root: dataset/coco_ml/images
			
 
				+      cls_label_path: dataset/coco_ml/val.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            size: 448
			
 
				+            interpolation: bilinear
			
 
				+            backend: pil
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 16
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 8
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: deploy/images/coco_000000570688.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        size: 448
			
 
				+        interpolation: bilinear
			
 
				+        backend: pil
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: MultiLabelThreshOutput
			
 
				+    threshold: 0.5
			
 
				+    class_id_map_file: dataset/coco_ml/label.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+  Eval:
			
 
				+    - MultiLabelMAP:
			
 
				+        # support list: integral, 11point
			
 
				+        # default: integral
			
 
				+        map_type: integral
			
--- a/paddlex/repo_apis/PaddleClas_api/configs/ResNet50_ML.yaml
+++ b/paddlex/repo_apis/PaddleClas_api/configs/ResNet50_ML.yaml
@@ -0,0 +1,164 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 10
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 40
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 448, 448]
			
 
				+  save_inference_dir: ./inference
			
 
				+  # training model under @to_static
			
 
				+  to_static: False
			
 
				+  use_multilabel: True
			
 
				+
			
 
				+# mixed precision
			
 
				+AMP:
			
 
				+  use_amp: True
			
 
				+  use_fp16_test: False
			
 
				+  scale_loss: 128.0
			
 
				+  use_dynamic_loss_scaling: True
			
 
				+  use_promote: False
			
 
				+  # O1: mixed fp16, O2: pure fp16
			
 
				+  level: O1
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: ResNet50
			
 
				+  class_num: 80
			
 
				+  pretrained: True
			
 
				+  use_ml_decoder: True
			
 
				+
			
 
				+# ml-decoder head
			
 
				+MLDecoder:
			
 
				+  query_num: 80 # default: 80, query_num <= class_num
			
 
				+  in_channels: 2048
			
 
				+
			
 
				+# loss function config for training/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - MultiLabelAsymmetricLoss:
			
 
				+        weight: 1.0
			
 
				+        gamma_pos: 0
			
 
				+        gamma_neg: 4
			
 
				+        clip: 0.05
			
 
				+        disable_focal_loss_grad: True
			
 
				+
			
 
				+  Eval:
			
 
				+    - MultiLabelAsymmetricLoss:
			
 
				+        weight: 1.0
			
 
				+        gamma_pos: 0
			
 
				+        gamma_neg: 4
			
 
				+        clip: 0.05
			
 
				+        disable_focal_loss_grad: True
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: AdamW
			
 
				+  beta1: 0.9
			
 
				+  beta2: 0.999
			
 
				+  epsilon: 1e-8
			
 
				+  weight_decay: 1e-4 
			
 
				+  one_dim_param_no_weight_decay: True
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 1e-4
			
 
				+    eta_min: 1e-10
			
 
				+    warmup_epoch: 5
			
 
				+    warmup_start_lr: 1e-6
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: MultiLabelDataset
			
 
				+      image_root: dataset/coco_ml/images
			
 
				+      cls_label_path: dataset/coco_ml/train.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            size: 448
			
 
				+            interpolation: bilinear
			
 
				+            backend: pil
			
 
				+        - Cutout:
			
 
				+            length: 224
			
 
				+            fill_value: none
			
 
				+        - RandAugmentV4:
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 8
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset:
			
 
				+      name: MultiLabelDataset
			
 
				+      image_root: dataset/coco_ml/images
			
 
				+      cls_label_path: dataset/coco_ml/val.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            size: 448
			
 
				+            interpolation: bilinear
			
 
				+            backend: pil
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 16
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 8
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: deploy/images/coco_000000570688.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        size: 448
			
 
				+        interpolation: bilinear
			
 
				+        backend: pil
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: MultiLabelThreshOutput
			
 
				+    threshold: 0.5
			
 
				+    class_id_map_file: dataset/coco_ml/label.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+  Eval:
			
 
				+    - MultiLabelMAP:
			
 
				+        # support list: integral, 11point
			
 
				+        # default: integral
			
 
				+        map_type: integral