Browse Source

support multi-label image classification

zhouchangda 1 year ago
parent
commit
24bfe4cfa1
28 changed files with 1690 additions and 33 deletions
  1. 11 0
      docs/tutorials/models/support_model_list.md
  2. 38 0
      paddlex/configs/multilabel_classification/CLIP_vit_base_patch16_448_ML.yaml
  3. 38 0
      paddlex/configs/multilabel_classification/PP-HGNetV2-B0_ML.yaml
  4. 38 0
      paddlex/configs/multilabel_classification/PP-HGNetV2-B4_ML.yaml
  5. 38 0
      paddlex/configs/multilabel_classification/PP-HGNetV2-B6_ML.yaml
  6. 38 0
      paddlex/configs/multilabel_classification/PP-LCNet_x1_0_ML.yaml
  7. 38 0
      paddlex/configs/multilabel_classification/ResNet50_ML.yaml
  8. 1 0
      paddlex/modules/image_classification/__init__.py
  9. 36 2
      paddlex/modules/image_classification/dataset_checker/__init__.py
  10. 11 5
      paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py
  11. 29 12
      paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py
  12. 44 0
      paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py
  13. 15 2
      paddlex/modules/image_classification/evaluator.py
  14. 9 0
      paddlex/modules/image_classification/model_list.py
  15. 1 1
      paddlex/modules/image_classification/predictor/__init__.py
  16. 40 0
      paddlex/modules/image_classification/predictor/predictor_ml.py
  17. 106 2
      paddlex/modules/image_classification/predictor/transforms.py
  18. 14 2
      paddlex/modules/image_classification/predictor/utils.py
  19. 64 0
      paddlex/modules/image_classification/trainer_ml.py
  20. 31 2
      paddlex/repo_apis/PaddleClas_api/cls/config.py
  21. 60 0
      paddlex/repo_apis/PaddleClas_api/cls/register.py
  22. 0 5
      paddlex/repo_apis/PaddleClas_api/cls/runner.py
  23. 168 0
      paddlex/repo_apis/PaddleClas_api/configs/CLIP_vit_base_patch16_448_ML.yaml
  24. 164 0
      paddlex/repo_apis/PaddleClas_api/configs/PP-HGNetV2-B0_ML.yaml
  25. 164 0
      paddlex/repo_apis/PaddleClas_api/configs/PP-HGNetV2-B4_ML.yaml
  26. 164 0
      paddlex/repo_apis/PaddleClas_api/configs/PP-HGNetV2-B6_ML.yaml
  27. 166 0
      paddlex/repo_apis/PaddleClas_api/configs/PP-LCNet_x1_0_ML.yaml
  28. 164 0
      paddlex/repo_apis/PaddleClas_api/configs/ResNet50_ML.yaml

+ 11 - 0
docs/tutorials/models/support_model_list.md

@@ -96,6 +96,17 @@
 | SwinTransformer_base_patch4_window12_384 | [SwinTransformer_base_patch4_window12_384.yaml](../../../paddlex/configs/image_classification/SwinTransformer_base_patch4_window12_384.yaml)|
 | SwinTransformer_large_patch4_window7_224 | [SwinTransformer_large_patch4_window7_224.yaml](../../../paddlex/configs/image_classification/SwinTransformer_large_patch4_window7_224.yaml)|
 | SwinTransformer_large_patch4_window12_384 | [SwinTransformer_large_patch4_window12_384.yaml](../../../paddlex/configs/image_classification/SwinTransformer_large_patch4_window12_384.yaml)|
+
+### 11. 多标签分类系列
+| 模型名称 | config |
+| :--- | :---: |
+| ResNet50_ML | [ResNet50_ML.yaml](../../../paddlex/configs/multilabel_classification/ResNet50_ML.yaml)|
+| PP-LCNet_x1_0_ML | [PP-LCNet_x1_0_ML.yaml](../../../paddlex/configs/multilabel_classification/PP-LCNet_x1_0_ML.yaml)|
+| PP-HGNetV2-B0_ML | [PP-HGNetV2-B0_ML.yaml](../../../paddlex/configs/multilabel_classification/PP-HGNetV2-B0_ML.yaml)|
+| PP-HGNetV2-B4_ML | [PP-HGNetV2-B4_ML.yaml](../../../paddlex/configs/multilabel_classification/PP-HGNetV2-B4_ML.yaml)|
+| PP-HGNetV2-B6_ML | [PP-HGNetV2-B6_ML.yaml](../../../paddlex/configs/multilabel_classification/PP-HGNetV2-B6_ML.yaml)|
+| CLIP_vit_base_patch16_448_ML | [CLIP_vit_base_patch16_448_ML.yaml](../../../paddlex/configs/multilabel_classification/CLIP_vit_base_patch16_448_ML.yaml)|
+
 ## 二、目标检测
 ### 1. PP-YOLOE_plus 系列
 | 模型名称 | config |

+ 38 - 0
paddlex/configs/multilabel_classification/CLIP_vit_base_patch16_448_ML.yaml

@@ -0,0 +1,38 @@
+Global:
+  model: CLIP_vit_base_patch16_448_ML
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: dataset/mlcls_nus_examples
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 33
+  epochs_iters: 20
+  batch_size: 16
+  learning_rate: 0.0003
+  pretrain_weight_path: null
+  warmup_steps: 5
+  resume_path: null
+  log_interval: 1
+  eval_interval: 1
+  save_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 1
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 38 - 0
paddlex/configs/multilabel_classification/PP-HGNetV2-B0_ML.yaml

@@ -0,0 +1,38 @@
+Global:
+  model: PP-HGNetV2-B0_ML
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: dataset/mlcls_nus_examples
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 33
+  epochs_iters: 20
+  batch_size: 64
+  learning_rate: 0.0008
+  pretrain_weight_path: null
+  warmup_steps: 5
+  resume_path: null
+  log_interval: 1
+  eval_interval: 1
+  save_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 1
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 38 - 0
paddlex/configs/multilabel_classification/PP-HGNetV2-B4_ML.yaml

@@ -0,0 +1,38 @@
+Global:
+  model: PP-HGNetV2-B4_ML
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: dataset/mlcls_nus_examples
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 33
+  epochs_iters: 20
+  batch_size: 64
+  learning_rate: 0.05
+  pretrain_weight_path: null
+  warmup_steps: 5
+  resume_path: null
+  log_interval: 1
+  eval_interval: 1
+  save_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 1
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 38 - 0
paddlex/configs/multilabel_classification/PP-HGNetV2-B6_ML.yaml

@@ -0,0 +1,38 @@
+Global:
+  model: PP-HGNetV2-B6_ML
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: dataset/mlcls_nus_examples
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 33
+  epochs_iters: 20
+  batch_size: 16
+  learning_rate: 0.05
+  pretrain_weight_path: null
+  warmup_steps: 5
+  resume_path: null
+  log_interval: 1
+  eval_interval: 1
+  save_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 1
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 38 - 0
paddlex/configs/multilabel_classification/PP-LCNet_x1_0_ML.yaml

@@ -0,0 +1,38 @@
+Global:
+  model: PP-LCNet_x1_0_ML
+  mode: check_dataset # check_dataset/train/evaluate/predict/predict
+  dataset_dir: dataset/mlcls_nus_examples
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 33
+  epochs_iters: 20
+  batch_size: 32
+  learning_rate: 0.1
+  pretrain_weight_path: null
+  warmup_steps: 5
+  resume_path: null
+  log_interval: 1
+  eval_interval: 1
+  save_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 1
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 38 - 0
paddlex/configs/multilabel_classification/ResNet50_ML.yaml

@@ -0,0 +1,38 @@
+Global:
+  model: ResNet50_ML
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: dataset/mlcls_nus_examples
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: null
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  num_classes: 33
+  epochs_iters: 20
+  batch_size: 64
+  learning_rate: 0.1
+  pretrain_weight_path: null
+  warmup_steps: 5
+  resume_path: null
+  log_interval: 1
+  eval_interval: 1
+  save_interval: 1
+
+Evaluate:
+  weight_path: "output/best_model.pdparams"
+  log_interval: 1
+
+Predict:
+  model_dir: "output/best_model"
+  input_path: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg"
+  kernel_option:
+    run_mode: paddle
+    batch_size: 1

+ 1 - 0
paddlex/modules/image_classification/__init__.py

@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from .trainer import ClsTrainer
+from .trainer_ml import MLClsTrainer
 from .dataset_checker import ClsDatasetChecker
 from .evaluator import ClsEvaluator
 from .exportor import ClsExportor

+ 36 - 2
paddlex/modules/image_classification/dataset_checker/__init__.py

@@ -12,12 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 from pathlib import Path
 
 from ...base import BaseDatasetChecker
 from .dataset_src import check, split_dataset, deep_analyse
-from ..model_list import MODELS
+from ..model_list import MODELS, ML_MODELS
 
 
 class ClsDatasetChecker(BaseDatasetChecker):
@@ -103,3 +102,38 @@ class ClsDatasetChecker(BaseDatasetChecker):
             str: dataset type
         """
         return "ClsDataset"
+
+
+class MLClsDatasetChecker(ClsDatasetChecker):
+    entities = ML_MODELS
+    sample_num = 10
+
+    def check_dataset(self, dataset_dir: str, sample_num: int = sample_num) -> dict:
+        """check if the dataset meets the specifications and get dataset summary
+
+        Args:
+            dataset_dir (str): the root directory of dataset.
+            sample_num (int): the number to be sampled.
+        Returns:
+            dict: dataset summary.
+        """
+        return check(dataset_dir, self.output, dataset_type="MLCls")
+
+    def analyse(self, dataset_dir: str) -> dict:
+        """deep analyse dataset
+
+        Args:
+            dataset_dir (str): the root directory of dataset.
+
+        Returns:
+            dict: the deep analysis results.
+        """
+        return deep_analyse(dataset_dir, self.output, dataset_type="MLCls")
+
+    def get_dataset_type(self) -> str:
+        """return the dataset type
+
+        Returns:
+            str: dataset type
+        """
+        return "MLClsDataset"

+ 11 - 5
paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import os
 import json
 import math
@@ -30,7 +29,7 @@ from .....utils.file_interface import custom_open
 from .....utils.fonts import PINGFANG_FONT_FILE_PATH
 
 
-def deep_analyse(dataset_path, output):
+def deep_analyse(dataset_path, output, dataset_type="Cls"):
     """class analysis for dataset"""
     tags = ["train", "val"]
     labels_cnt = defaultdict(str)
@@ -41,15 +40,22 @@ def deep_analyse(dataset_path, output):
         line = line.strip().split()
         labels_cnt[line[0]] = " ".join(line[1:])
     for tag in tags:
-        image_path = os.path.join(dataset_path, f"{tag}.txt")
+        anno_path = os.path.join(dataset_path, f"{tag}.txt")
         classes_num = defaultdict(int)
         for i in range(len(labels_cnt)):
             classes_num[labels_cnt[str(i)]] = 0
-        with custom_open(image_path, "r") as f:
+        with custom_open(anno_path, "r") as f:
             lines = f.readlines()
         for line in lines:
             line = line.strip().split()
-            classes_num[labels_cnt[line[1]]] += 1
+            if dataset_type == "Cls":
+                classes_num[labels_cnt[line[1]]] += 1
+            elif dataset_type == "MLCls":
+                for i, label in enumerate(line[1].split(",")):
+                    if label == "1":
+                        classes_num[labels_cnt[str(i)]] += 1
+            else:
+                raise ValueError(f"dataset_type {dataset_type} is not supported")
         if tag == "train":
             cnts_train = [cat_ids for cat_name, cat_ids in classes_num.items()]
         elif tag == "val":

+ 29 - 12
paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import os
 import os.path as osp
 import random
@@ -20,10 +19,10 @@ from PIL import Image, ImageOps
 from collections import defaultdict
 
 from .....utils.errors import DatasetFileNotFoundError, CheckFailedError
-from .utils.visualizer import draw_label
+from .utils.visualizer import draw_label, draw_multi_label
 
 
-def check(dataset_dir, output, sample_num=10):
+def check(dataset_dir, output, sample_num=10, dataset_type="Cls"):
     """check dataset"""
     dataset_dir = osp.abspath(dataset_dir)
     # Custom dataset
@@ -31,7 +30,10 @@ def check(dataset_dir, output, sample_num=10):
         raise DatasetFileNotFoundError(file_path=dataset_dir)
 
     tags = ["train", "val"]
-    delim = " "
+    if dataset_type == "MLCls":
+        delim = "\t"
+    else:
+        delim = " "
     valid_num_parts = 2
 
     sample_cnts = dict()
@@ -49,7 +51,7 @@ def check(dataset_dir, output, sample_num=10):
     with open(label_file, "r", encoding="utf-8") as f:
         all_lines = f.readlines()
         for line in all_lines:
-            substr = line.strip("\n").split(delim, 1)
+            substr = line.strip("\n").split(" ", 1)
             try:
                 label_idx = int(substr[0])
                 labels.append(label_idx)
@@ -103,7 +105,14 @@ def check(dataset_dir, output, sample_num=10):
                     if len(sample_paths[tag]) < sample_num:
                         img = Image.open(img_path)
                         img = ImageOps.exif_transpose(img)
-                        vis_im = draw_label(img, label, label_map_dict)
+                        if dataset_type == "Cls":
+                            vis_im = draw_label(img, label, label_map_dict)
+                        elif dataset_type == "MLCls":
+                            vis_im = draw_multi_label(img, label, label_map_dict)
+                        else:
+                            raise CheckFailedError(
+                                f"Do not support dataset type '{dataset_type}', only support 'Cls' and 'MLCls'."
+                            )
                         vis_path = osp.join(vis_save_dir, osp.basename(file_name))
                         vis_im.save(vis_path)
                         sample_path = osp.join(
@@ -111,12 +120,20 @@ def check(dataset_dir, output, sample_num=10):
                         )
                         sample_paths[tag].append(sample_path)
 
-                    try:
-                        label = int(label)
-                    except (ValueError, TypeError) as e:
-                        raise CheckFailedError(
-                            f"Ensure that the second number in each line in {label_file} should be int."
-                        ) from e
+                    if dataset_type == "Cls":
+                        try:
+                            label = int(label)
+                        except (ValueError, TypeError) as e:
+                            raise CheckFailedError(
+                                f"Ensure that the second number in each line in {label_file} should be int."
+                            ) from e
+                    elif dataset_type == "MLCls":
+                        try:
+                            label = list(map(int, label.split(",")))
+                        except (ValueError, TypeError) as e:
+                            raise CheckFailedError(
+                                f"Ensure that the second number in each line in {label_file} should be int."
+                            ) from e
 
     num_classes = max(labels) + 1
 

+ 44 - 0
paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py

@@ -154,3 +154,47 @@ def draw_label(image, label, label_map_dict):
     draw.text((text_x, text_y), label_map_dict[int(label)], fill=font_color, font=font)
 
     return image
+
+
+def draw_multi_label(image, label, label_map_dict):
+    labels = label.split(",")
+    label_names = [
+        label_map_dict[i] for i, label in enumerate(labels) if int(label) == 1
+    ]
+    image = image.convert("RGB")
+    image_width, image_height = image.size
+    font_size = int(image_width * 0.06)
+
+    font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size)
+    text_lines = []
+    row_width = 0
+    row_height = 0
+    row_text = "\t"
+    for label_name in label_names:
+        text = f"{label_name}\t"
+        text_width, row_height = font.getsize(text)
+        if row_width + text_width <= image_width:
+            row_text += text
+            row_width += text_width
+        else:
+            text_lines.append(row_text)
+            row_text = "\t" + text
+            row_width = text_width
+    text_lines.append(row_text)
+    color_list = colormap(rgb=True)
+    color = tuple(color_list[0])
+    new_image_height = image_height + len(text_lines) * int(row_height * 1.2)
+    new_image = Image.new("RGB", (image_width, new_image_height), color)
+    new_image.paste(image, (0, 0))
+
+    draw = ImageDraw.Draw(new_image)
+    font_color = tuple(font_colormap(3))
+    for i, text in enumerate(text_lines):
+        text_width, _ = font.getsize(text)
+        draw.text(
+            (0, image_height + i * int(row_height * 1.2)),
+            text,
+            fill=font_color,
+            font=font,
+        )
+    return new_image

+ 15 - 2
paddlex/modules/image_classification/evaluator.py

@@ -12,9 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 from ..base import BaseEvaluator
-from .model_list import MODELS
+from .model_list import MODELS, ML_MODELS
 
 
 class ClsEvaluator(BaseEvaluator):
@@ -42,3 +41,17 @@ class ClsEvaluator(BaseEvaluator):
             "weight_path": self.eval_config.weight_path,
             "device": self.get_device(using_device_number=1),
         }
+
+
+class MlClsEvaluator(ClsEvaluator):
+    entities = ML_MODELS
+
+    def update_config(self):
+        """update evalution config"""
+        if self.eval_config.log_interval:
+            self.pdx_config.update_log_interval(self.eval_config.log_interval)
+        if self.pdx_config["Arch"]["name"] == "DistillationModel":
+            self.pdx_config.update_teacher_model(pretrained=False)
+            self.pdx_config.update_student_model(pretrained=False)
+        self.pdx_config.update_dataset(self.global_config.dataset_dir, "MLClsDataset")
+        self.pdx_config.update_pretrained_weights(self.eval_config.weight_path)

+ 9 - 0
paddlex/modules/image_classification/model_list.py

@@ -79,3 +79,12 @@ MODELS = [
     "SwinTransformer_large_patch4_window7_224",
     "SwinTransformer_large_patch4_window12_384",
 ]
+
+ML_MODELS = [
+    "ResNet50_ML",
+    "PP-LCNet_x1_0_ML",
+    "PP-HGNetV2-B0_ML",
+    "PP-HGNetV2-B4_ML",
+    "PP-HGNetV2-B6_ML",
+    "CLIP_vit_base_patch16_448_ML",
+]

+ 1 - 1
paddlex/modules/image_classification/predictor/__init__.py

@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 from .predictor import ClsPredictor
+from .predictor_ml import MLClsPredictor
 from . import transforms

+ 40 - 0
paddlex/modules/image_classification/predictor/predictor_ml.py

@@ -0,0 +1,40 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+from pathlib import Path
+
+from ...base import BasePredictor
+from ...base.predictor.transforms import image_common
+from .keys import ClsKeys as K
+from .utils import InnerConfig
+from ....utils import logging
+from . import transforms as T
+from .predictor import ClsPredictor
+from ..model_list import ML_MODELS
+
+
+class MLClsPredictor(ClsPredictor, BasePredictor):
+    """ MLClssification Predictor """
+    entities = ML_MODELS
+
+    def _get_post_transforms_from_config(self):
+        """ get postprocess transforms """
+        post_transforms = self.other_src.post_transforms
+        post_transforms.extend([
+            T.PrintResult(), T.SaveMLClsResults(self.output,
+                                                self.other_src.labels)
+        ])
+        return post_transforms

+ 106 - 2
paddlex/modules/image_classification/predictor/transforms.py

@@ -17,7 +17,7 @@ import json
 from pathlib import Path
 import numpy as np
 import PIL
-from PIL import ImageDraw, ImageFont
+from PIL import ImageDraw, ImageFont, Image
 
 from .keys import ClsKeys as K
 from ...base import BaseTransform
@@ -25,7 +25,13 @@ from ...base.predictor.io import ImageWriter, ImageReader
 from ....utils.fonts import PINGFANG_FONT_FILE_PATH
 from ....utils import logging
 
-__all__ = ["Topk", "NormalizeFeatures", "PrintResult", "SaveClsResults"]
+__all__ = [
+    "Topk",
+    "NormalizeFeatures",
+    "PrintResult",
+    "SaveClsResults",
+    "MultiLabelThreshOutput",
+]
 
 
 def _parse_class_id_map(class_ids):
@@ -282,3 +288,101 @@ class SaveClsResults(BaseTransform):
     def get_output_keys(cls):
         """get output keys"""
         return []
+
+
+class MultiLabelThreshOutput(BaseTransform):
+    def __init__(self, threshold=0.5, class_ids=None, delimiter=None):
+        super().__init__()
+        assert isinstance(threshold, (float,))
+        self.threshold = threshold
+        self.delimiter = delimiter if delimiter is not None else " "
+        self.class_id_map = _parse_class_id_map(class_ids)
+
+    def apply(self, data):
+        """apply"""
+        y = []
+        x = data[K.CLS_PRED]
+        pred_index = np.where(x >= self.threshold)[0].astype("int32")
+        index = pred_index[np.argsort(x[pred_index])][::-1]
+        clas_id_list = []
+        score_list = []
+        label_name_list = []
+        for i in index:
+            clas_id_list.append(i.item())
+            score_list.append(x[i].item())
+            if self.class_id_map is not None:
+                label_name_list.append(self.class_id_map[i.item()])
+        result = {
+            "class_ids": clas_id_list,
+            "scores": np.around(score_list, decimals=5).tolist(),
+            "label_names": label_name_list,
+        }
+        y.append(result)
+        data[K.CLS_RESULT] = y
+        return data
+
+    @classmethod
+    def get_input_keys(cls):
+        """get input keys"""
+        return [K.IM_PATH, K.CLS_PRED]
+
+    @classmethod
+    def get_output_keys(cls):
+        """get output keys"""
+        return [K.CLS_RESULT]
+
+
+class SaveMLClsResults(SaveClsResults, BaseTransform):
+    def __init__(self, save_dir, class_ids=None):
+        super().__init__(save_dir=save_dir)
+        self.save_dir = save_dir
+        self.class_id_map = _parse_class_id_map(class_ids)
+        self._writer = ImageWriter(backend="pillow")
+
+    def apply(self, data):
+        """Draw label on image"""
+        ori_path = data[K.IM_PATH]
+        results = data[K.CLS_RESULT]
+        scores = results[0]["scores"]
+        label_names = results[0]["label_names"]
+        file_name = os.path.basename(ori_path)
+        save_path = os.path.join(self.save_dir, file_name)
+        image = ImageReader(backend="pil").read(ori_path)
+        image = image.convert("RGB")
+        image_width, image_height = image.size
+        font_size = int(image_width * 0.06)
+
+        font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size)
+        text_lines = []
+        row_width = 0
+        row_height = 0
+        row_text = "\t"
+        for label_name, score in zip(label_names, scores):
+            text = f"{label_name}({score})\t"
+            text_width, row_height = font.getsize(text)
+            if row_width + text_width <= image_width:
+                row_text += text
+                row_width += text_width
+            else:
+                text_lines.append(row_text)
+                row_text = "\t" + text
+                row_width = text_width
+        text_lines.append(row_text)
+        color_list = self._get_colormap(rgb=True)
+        color = tuple(color_list[0])
+        new_image_height = image_height + len(text_lines) * int(row_height * 1.2)
+        new_image = Image.new("RGB", (image_width, new_image_height), color)
+        new_image.paste(image, (0, 0))
+
+        draw = ImageDraw.Draw(new_image)
+        font_color = tuple(self._get_font_colormap(3))
+        for i, text in enumerate(text_lines):
+            text_width, _ = font.getsize(text)
+            draw.text(
+                (0, image_height + i * int(row_height * 1.2)),
+                text,
+                fill=font_color,
+                font=font,
+            )
+        self._write_image(save_path, new_image)
+        return data

+ 14 - 2
paddlex/modules/image_classification/predictor/utils.py

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import codecs
 
 import yaml
@@ -80,6 +79,11 @@ class InnerConfig(object):
                     topk=tfs_cfg["Topk"]["topk"],
                     class_ids=tfs_cfg["Topk"].get("label_list", None),
                 )
+            elif tf_key == "MultiLabelThreshOutput":
+                tf = T.MultiLabelThreshOutput(
+                    threshold=0.5,
+                    class_ids=tfs_cfg["MultiLabelThreshOutput"].get("label_list", None),
+                )
             elif tf_key in IGNORE_OPS:
                 continue
             else:
@@ -90,4 +94,12 @@ class InnerConfig(object):
     @property
     def labels(self):
         """the labels in inner config"""
-        return self.inner_cfg["PostProcess"]["Topk"].get("label_list", None)
+        postprocess_name = self.inner_cfg["PostProcess"].keys()
+        if "Topk" in postprocess_name:
+            return self.inner_cfg["PostProcess"]["Topk"].get("label_list", None)
+        elif "MultiLabelThreshOutput" in postprocess_name:
+            return self.inner_cfg["PostProcess"]["MultiLabelThreshOutput"].get(
+                "label_list", None
+            )
+        else:
+            return None

+ 64 - 0
paddlex/modules/image_classification/trainer_ml.py

@@ -0,0 +1,64 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import shutil
+import paddle
+from pathlib import Path
+
+from ..base import BaseTrainer, BaseTrainDeamon
+from .trainer import ClsTrainer, ClsTrainDeamon
+from .model_list import ML_MODELS
+from ...utils.config import AttrDict
+
+
+class MLClsTrainer(ClsTrainer, BaseTrainer):
+    """ Multi Label Image Classification Model Trainer """
+    entities = ML_MODELS
+
+    def update_config(self):
+        """update training config
+        """
+        if self.train_config.log_interval:
+            self.pdx_config.update_log_interval(self.train_config.log_interval)
+        if self.train_config.eval_interval:
+            self.pdx_config.update_eval_interval(
+                self.train_config.eval_interval)
+        if self.train_config.save_interval:
+            self.pdx_config.update_save_interval(
+                self.train_config.save_interval)
+
+        self.pdx_config.update_dataset(self.global_config.dataset_dir,
+                                       "MLClsDataset")
+        if self.train_config.num_classes is not None:
+            self.pdx_config.update_num_classes(self.train_config.num_classes)
+        if self.train_config.pretrain_weight_path and self.train_config.pretrain_weight_path != "":
+            self.pdx_config.update_pretrained_weights(
+                self.train_config.pretrain_weight_path)
+
+        label_dict_path = Path(self.global_config.dataset_dir).joinpath(
+            "label.txt")
+        if label_dict_path.exists():
+            self.dump_label_dict(label_dict_path)
+        if self.train_config.batch_size is not None:
+            self.pdx_config.update_batch_size(self.train_config.batch_size)
+        if self.train_config.learning_rate is not None:
+            self.pdx_config.update_learning_rate(
+                self.train_config.learning_rate)
+        if self.train_config.epochs_iters is not None:
+            self.pdx_config._update_epochs(self.train_config.epochs_iters)
+        if self.train_config.warmup_steps is not None:
+            self.pdx_config.update_warmup_epochs(self.train_config.warmup_steps)
+        if self.global_config.output is not None:
+            self.pdx_config._update_output_dir(self.global_config.output)

+ 31 - 2
paddlex/repo_apis/PaddleClas_api/cls/config.py

@@ -84,7 +84,7 @@ class ClsConfig(BaseConfig):
         else:
             train_list_path = f"{dataset_path}/train.txt"
 
-        if dataset_type in ["ClsDataset"]:
+        if dataset_type in ["ClsDataset", "MLClsDataset"]:
             ds_cfg = [
                 f"DataLoader.Train.dataset.name={dataset_type}",
                 f"DataLoader.Train.dataset.image_root={dataset_path}",
@@ -129,7 +129,12 @@ class ClsConfig(BaseConfig):
         Args:
             learning_rate (float): the learning rate value to set.
         """
-        _cfg = [f"Optimizer.lr.learning_rate={learning_rate}"]
+        if self._dict["Optimizer"]["lr"].get("learning_rate", None) is not None:
+            _cfg = [f"Optimizer.lr.learning_rate={learning_rate}"]
+        elif self._dict["Optimizer"]["lr"].get("max_learning_rate", None) is not None:
+            _cfg = [f"Optimizer.lr.max_learning_rate={learning_rate}"]
+        else:
+            raise ValueError("unsupported lr format")
         self.update(_cfg)
 
     def update_warmup_epochs(self, warmup_epochs: int):
@@ -176,8 +181,32 @@ indicating that no pretrained model to be used."
         if self._get_arch_name() == "DistillationModel":
             update_str_list.append(f"Arch.models.0.Teacher.class_num={num_classes}")
             update_str_list.append(f"Arch.models.1.Student.class_num={num_classes}")
+        ml_decoder = self.dict.get("MLDecoder", None)
+        if ml_decoder is not None:
+            self.update_ml_query_num(num_classes)
+            self.update_ml_class_num(num_classes)
         self.update(update_str_list)
 
+    def update_ml_query_num(self, query_num: int):
+        """update MLDecoder query number
+        Args:
+            query_num (int): the query number value to set,qury_num should be less than or equal to num_classes.
+        """
+        base_query_num = self.dict.get("MLDecoder", {}).get("query_num", None)
+        if base_query_num is not None:
+            _cfg = [f"MLDecoder.query_num={query_num}"]
+            self.update(_cfg)
+
+    def update_ml_class_num(self, class_num: int):
+        """update MLDecoder query number
+        Args:
+            num_classes (int): the classes number value to set.
+        """
+        base_class_num = self.dict.get("MLDecoder", {}).get("class_num", None)
+        if base_class_num is not None:
+            _cfg = [f"MLDecoder.class_num={class_num}"]
+            self.update(_cfg)
+
     def _update_slim_config(self, slim_config_path: str):
         """update slim settings
 

+ 60 - 0
paddlex/repo_apis/PaddleClas_api/cls/register.py

@@ -639,3 +639,63 @@ register_model_info(
         "supported_apis": ["train", "evaluate", "predict", "export"],
     }
 )
+
+register_model_info(
+    {
+        "model_name": "PP-LCNet_x1_0_ML",
+        "suite": "Cls",
+        "config_path": osp.join(PDX_CONFIG_DIR, "PP-LCNet_x1_0_ML.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
+        "infer_config": "deploy/configs/inference_cls.yaml",
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "ResNet50_ML",
+        "suite": "Cls",
+        "config_path": osp.join(PDX_CONFIG_DIR, "ResNet50_ML.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
+        "infer_config": "deploy/configs/inference_cls.yaml",
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "PP-HGNetV2-B0_ML",
+        "suite": "Cls",
+        "config_path": osp.join(PDX_CONFIG_DIR, "PP-HGNetV2-B0_ML.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
+        "infer_config": "deploy/configs/inference_cls.yaml",
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "PP-HGNetV2-B4_ML",
+        "suite": "Cls",
+        "config_path": osp.join(PDX_CONFIG_DIR, "PP-HGNetV2-B4_ML.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
+        "infer_config": "deploy/configs/inference_cls.yaml",
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "PP-HGNetV2-B6_ML",
+        "suite": "Cls",
+        "config_path": osp.join(PDX_CONFIG_DIR, "PP-HGNetV2-B6_ML.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
+        "infer_config": "deploy/configs/inference_cls.yaml",
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "CLIP_vit_base_patch16_448_ML",
+        "suite": "Cls",
+        "config_path": osp.join(PDX_CONFIG_DIR, "CLIP_vit_base_patch16_448_ML.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
+        "infer_config": "deploy/configs/inference_cls.yaml",
+    }
+)

+ 0 - 5
paddlex/repo_apis/PaddleClas_api/cls/runner.py

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import os
 import tempfile
 
@@ -23,8 +22,6 @@ from ...base.utils.subprocess import CompletedProcess
 class ClsRunner(BaseRunner):
     """Cls Runner"""
 
-    _INFER_CONFIG_REL_PATH = os.path.join("deploy", "configs", "inference_cls.yaml")
-
     def train(
         self,
         config_path: str,
@@ -125,8 +122,6 @@ class ClsRunner(BaseRunner):
             *cli_args,
             "-o",
             "Global.export_for_fd=True",
-            "-o",
-            f"Global.infer_config_path={os.path.join(self.runner_root_path, self._INFER_CONFIG_REL_PATH)}",
         ]
 
         cp = self.run_cmd(cmd, switch_wdir=True, echo=True, silent=False)

+ 168 - 0
paddlex/repo_apis/PaddleClas_api/configs/CLIP_vit_base_patch16_448_ML.yaml

@@ -0,0 +1,168 @@
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: ./output/
+  device: gpu
+  save_interval: 10
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 40
+  print_batch_step: 10
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 448, 448]
+  save_inference_dir: ./inference
+  # training model under @to_static
+  to_static: False
+  use_multilabel: True
+
+# mixed precision
+AMP:
+  use_amp: False
+  use_fp16_test: False
+  scale_loss: 128.0
+  use_dynamic_loss_scaling: True
+  use_promote: False
+  # O1: mixed fp16, O2: pure fp16
+  level: O2
+
+# model architecture
+Arch:
+  name: CLIP_vit_base_patch16_224
+  class_num: 80
+  return_embed: False
+  use_fused_attn: False # fused attn can be used in AMP O2 mode only
+  pretrained: True
+  use_ml_decoder: True
+
+# ml-decoder head
+MLDecoder:
+  query_num: 80 # default: 80, query_num <= class_num
+  in_channels: 768
+  remove_layers: []
+  replace_layer: 'head'
+
+# loss function config for training/eval process
+Loss:
+  Train:
+    - MultiLabelAsymmetricLoss:
+        weight: 1.0
+        gamma_pos: 0
+        gamma_neg: 4
+        clip: 0.05
+        disable_focal_loss_grad: True
+
+  Eval:
+    - MultiLabelAsymmetricLoss:
+        weight: 1.0
+        gamma_pos: 0
+        gamma_neg: 4
+        clip: 0.05
+        disable_focal_loss_grad: True
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1e-8
+  weight_decay: 1e-4 
+  one_dim_param_no_weight_decay: True
+  lr:
+    name: Cosine
+    learning_rate: 5e-5
+    eta_min: 1e-10
+    warmup_epoch: 5
+    warmup_start_lr: 1e-6
+
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+      name: MultiLabelDataset
+      image_root: dataset/coco_ml/images
+      cls_label_path: dataset/coco_ml/train.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - ResizeImage:
+            size: 448
+            interpolation: bilinear
+            backend: pil
+        - Cutout:
+            length: 224
+            fill_value: none
+        - RandAugmentV4:
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 16
+      drop_last: False
+      shuffle: True
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+  Eval:
+    dataset:
+      name: MultiLabelDataset
+      image_root: dataset/coco_ml/images
+      cls_label_path: dataset/coco_ml/val.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - ResizeImage:
+            size: 448
+            interpolation: bilinear
+            backend: pil
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 4
+      drop_last: False
+      shuffle: False
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+Infer:
+  infer_imgs: deploy/images/coco_000000570688.jpg
+  batch_size: 10
+  transforms:
+    - DecodeImage:
+        to_rgb: True
+        channel_first: False
+    - ResizeImage:
+        size: 448
+        interpolation: bilinear
+        backend: pil
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.485, 0.456, 0.406]
+        std: [0.229, 0.224, 0.225]
+        order: ''
+    - ToCHWImage:
+  PostProcess:
+    name: MultiLabelThreshOutput
+    threshold: 0.5
+    class_id_map_file: dataset/coco_ml/label.txt
+
+Metric:
+  Train:
+  Eval:
+    - MultiLabelMAP:
+        # support list: integral, 11point
+        # default: integral
+        map_type: integral

+ 164 - 0
paddlex/repo_apis/PaddleClas_api/configs/PP-HGNetV2-B0_ML.yaml

@@ -0,0 +1,164 @@
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: ./output/
+  device: gpu
+  save_interval: 10
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 40
+  print_batch_step: 10
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 448, 448]
+  save_inference_dir: ./inference
+  # training model under @to_static
+  to_static: False
+  use_multilabel: True
+
+# mixed precision
+AMP:
+  use_amp: True
+  use_fp16_test: False
+  scale_loss: 128.0
+  use_dynamic_loss_scaling: True
+  use_promote: False
+  # O1: mixed fp16, O2: pure fp16
+  level: O1
+
+# model architecture
+Arch:
+  name: PPHGNetV2_B0
+  class_num: 80
+  pretrained: True # ssld pretrained
+  use_ml_decoder: True
+
+# ml-decoder head
+MLDecoder:
+  query_num: 80 # default: 80, query_num <= class_num
+  in_channels: 2048
+
+# loss function config for training/eval process
+Loss:
+  Train:
+    - MultiLabelAsymmetricLoss:
+        weight: 1.0
+        gamma_pos: 0
+        gamma_neg: 4
+        clip: 0.05
+        disable_focal_loss_grad: True
+
+  Eval:
+    - MultiLabelAsymmetricLoss:
+        weight: 1.0
+        gamma_pos: 0
+        gamma_neg: 4
+        clip: 0.05
+        disable_focal_loss_grad: True
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1e-8
+  weight_decay: 1e-4 
+  one_dim_param_no_weight_decay: True
+  lr:
+    name: Cosine
+    learning_rate: 1e-4
+    eta_min: 1e-10
+    warmup_epoch: 5
+    warmup_start_lr: 1e-6
+
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+      name: MultiLabelDataset
+      image_root: dataset/coco_ml/images
+      cls_label_path: dataset/coco_ml/train.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - ResizeImage:
+            size: 448
+            interpolation: bilinear
+            backend: pil
+        - Cutout:
+            length: 224
+            fill_value: none
+        - RandAugmentV4:
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 64
+      drop_last: False
+      shuffle: True
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+  Eval:
+    dataset:
+      name: MultiLabelDataset
+      image_root: dataset/coco_ml/images
+      cls_label_path: dataset/coco_ml/val.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - ResizeImage:
+            size: 448
+            interpolation: bilinear
+            backend: pil
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 32
+      drop_last: False
+      shuffle: False
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+Infer:
+  infer_imgs: deploy/images/coco_000000570688.jpg
+  batch_size: 10
+  transforms:
+    - DecodeImage:
+        to_rgb: True
+        channel_first: False
+    - ResizeImage:
+        size: 448
+        interpolation: bilinear
+        backend: pil
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.485, 0.456, 0.406]
+        std: [0.229, 0.224, 0.225]
+        order: ''
+    - ToCHWImage:
+  PostProcess:
+    name: MultiLabelThreshOutput
+    threshold: 0.5
+    class_id_map_file: dataset/coco_ml/label.txt
+
+Metric:
+  Train:
+  Eval:
+    - MultiLabelMAP:
+        # support list: integral, 11point
+        # default: integral
+        map_type: integral

+ 164 - 0
paddlex/repo_apis/PaddleClas_api/configs/PP-HGNetV2-B4_ML.yaml

@@ -0,0 +1,164 @@
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: ./output/
+  device: gpu
+  save_interval: 10
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 40
+  print_batch_step: 10
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 448, 448]
+  save_inference_dir: ./inference
+  # training model under @to_static
+  to_static: False
+  use_multilabel: True
+
+# mixed precision
+AMP:
+  use_amp: True
+  use_fp16_test: False
+  scale_loss: 128.0
+  use_dynamic_loss_scaling: True
+  use_promote: False
+  # O1: mixed fp16, O2: pure fp16
+  level: O1
+
+# model architecture
+Arch:
+  name: PPHGNetV2_B4
+  class_num: 80
+  pretrained: True # ssld pretrained
+  use_ml_decoder: True
+
+# ml-decoder head
+MLDecoder:
+  query_num: 80 # default: 80, query_num <= class_num
+  in_channels: 2048
+
+# loss function config for training/eval process
+Loss:
+  Train:
+    - MultiLabelAsymmetricLoss:
+        weight: 1.0
+        gamma_pos: 0
+        gamma_neg: 4
+        clip: 0.05
+        disable_focal_loss_grad: True
+
+  Eval:
+    - MultiLabelAsymmetricLoss:
+        weight: 1.0
+        gamma_pos: 0
+        gamma_neg: 4
+        clip: 0.05
+        disable_focal_loss_grad: True
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1e-8
+  weight_decay: 1e-4 
+  one_dim_param_no_weight_decay: True
+  lr:
+    name: Cosine
+    learning_rate: 1e-4
+    eta_min: 1e-10
+    warmup_epoch: 5
+    warmup_start_lr: 1e-6
+
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+      name: MultiLabelDataset
+      image_root: dataset/coco_ml/images
+      cls_label_path: dataset/coco_ml/train.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - ResizeImage:
+            size: 448
+            interpolation: bilinear
+            backend: pil
+        - Cutout:
+            length: 224
+            fill_value: none
+        - RandAugmentV4:
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 64
+      drop_last: False
+      shuffle: True
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+  Eval:
+    dataset:
+      name: MultiLabelDataset
+      image_root: dataset/coco_ml/images
+      cls_label_path: dataset/coco_ml/val.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - ResizeImage:
+            size: 448
+            interpolation: bilinear
+            backend: pil
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 16
+      drop_last: False
+      shuffle: False
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+Infer:
+  infer_imgs: deploy/images/coco_000000570688.jpg
+  batch_size: 10
+  transforms:
+    - DecodeImage:
+        to_rgb: True
+        channel_first: False
+    - ResizeImage:
+        size: 448
+        interpolation: bilinear
+        backend: pil
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.485, 0.456, 0.406]
+        std: [0.229, 0.224, 0.225]
+        order: ''
+    - ToCHWImage:
+  PostProcess:
+    name: MultiLabelThreshOutput
+    threshold: 0.5
+    class_id_map_file: dataset/coco_ml/label.txt
+
+Metric:
+  Train:
+  Eval:
+    - MultiLabelMAP:
+        # support list: integral, 11point
+        # default: integral
+        map_type: integral

+ 164 - 0
paddlex/repo_apis/PaddleClas_api/configs/PP-HGNetV2-B6_ML.yaml

@@ -0,0 +1,164 @@
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: ./output/
+  device: gpu
+  save_interval: 10
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 40
+  print_batch_step: 10
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 448, 448]
+  save_inference_dir: ./inference
+  # training model under @to_static
+  to_static: False
+  use_multilabel: True
+
+# mixed precision
+AMP:
+  use_amp: True
+  use_fp16_test: False
+  scale_loss: 128.0
+  use_dynamic_loss_scaling: True
+  use_promote: False
+  # O1: mixed fp16, O2: pure fp16
+  level: O1
+
+# model architecture
+Arch:
+  name: PPHGNetV2_B6
+  class_num: 80
+  pretrained: True # ssld pretrained
+  use_ml_decoder: True
+
+# ml-decoder head
+MLDecoder:
+  query_num: 80 # default: 80, query_num <= class_num
+  in_channels: 2048
+
+# loss function config for training/eval process
+Loss:
+  Train:
+    - MultiLabelAsymmetricLoss:
+        weight: 1.0
+        gamma_pos: 0
+        gamma_neg: 4
+        clip: 0.05
+        disable_focal_loss_grad: True
+
+  Eval:
+    - MultiLabelAsymmetricLoss:
+        weight: 1.0
+        gamma_pos: 0
+        gamma_neg: 4
+        clip: 0.05
+        disable_focal_loss_grad: True
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1e-8
+  weight_decay: 1e-4 
+  one_dim_param_no_weight_decay: True
+  lr:
+    name: Cosine
+    learning_rate: 1e-4
+    eta_min: 1e-10
+    warmup_epoch: 5
+    warmup_start_lr: 1e-6
+
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+      name: MultiLabelDataset
+      image_root: dataset/coco_ml/images
+      cls_label_path: dataset/coco_ml/train.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - ResizeImage:
+            size: 448
+            interpolation: bilinear
+            backend: pil
+        - Cutout:
+            length: 224
+            fill_value: none
+        - RandAugmentV4:
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 8
+      drop_last: False
+      shuffle: True
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+  Eval:
+    dataset:
+      name: MultiLabelDataset
+      image_root: dataset/coco_ml/images
+      cls_label_path: dataset/coco_ml/val.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - ResizeImage:
+            size: 448
+            interpolation: bilinear
+            backend: pil
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 4
+      drop_last: False
+      shuffle: False
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+Infer:
+  infer_imgs: deploy/images/coco_000000570688.jpg
+  batch_size: 10
+  transforms:
+    - DecodeImage:
+        to_rgb: True
+        channel_first: False
+    - ResizeImage:
+        size: 448
+        interpolation: bilinear
+        backend: pil
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.485, 0.456, 0.406]
+        std: [0.229, 0.224, 0.225]
+        order: ''
+    - ToCHWImage:
+  PostProcess:
+    name: MultiLabelThreshOutput
+    threshold: 0.5
+    class_id_map_file: dataset/coco_ml/label.txt
+
+Metric:
+  Train:
+  Eval:
+    - MultiLabelMAP:
+        # support list: integral, 11point
+        # default: integral
+        map_type: integral

+ 166 - 0
paddlex/repo_apis/PaddleClas_api/configs/PP-LCNet_x1_0_ML.yaml

@@ -0,0 +1,166 @@
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: ./output/
+  device: gpu
+  save_interval: 10
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 40
+  print_batch_step: 10
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 448, 448]
+  save_inference_dir: ./inference
+  # training model under @to_static
+  to_static: False
+  use_multilabel: True
+
+# mixed precision
+AMP:
+  use_amp: True
+  use_fp16_test: False
+  scale_loss: 128.0
+  use_dynamic_loss_scaling: True
+  use_promote: False
+  # O1: mixed fp16, O2: pure fp16
+  level: O1
+
+# model architecture
+Arch:
+  name: PPLCNet_x1_0
+  class_num: 80
+  pretrained: True
+  use_ml_decoder: True
+
+# ml-decoder head
+MLDecoder:
+  query_num: 80 # default: 80, query_num <= class_num
+  class_num: 80
+  in_channels: 1280
+
+
+# loss function config for training/eval process
+Loss:
+  Train:
+    - MultiLabelAsymmetricLoss:
+        weight: 1.0
+        gamma_pos: 0
+        gamma_neg: 4
+        clip: 0.05
+        disable_focal_loss_grad: True
+
+  Eval:
+    - MultiLabelAsymmetricLoss:
+        weight: 1.0
+        gamma_pos: 0
+        gamma_neg: 4
+        clip: 0.05
+        disable_focal_loss_grad: True
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1e-8
+  weight_decay: 1e-4 
+  one_dim_param_no_weight_decay: True
+  lr:
+    name: Cosine
+    learning_rate: 1e-4
+    eta_min: 1e-10
+    warmup_epoch: 5
+    warmup_start_lr: 1e-6
+
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+      name: MultiLabelDataset
+      image_root: dataset/coco_ml/images
+      cls_label_path: dataset/coco_ml/train.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - ResizeImage:
+            size: 448
+            interpolation: bilinear
+            backend: pil
+        - Cutout:
+            length: 224
+            fill_value: none
+        - RandAugmentV4:
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 64
+      drop_last: False
+      shuffle: True
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+  Eval:
+    dataset:
+      name: MultiLabelDataset
+      image_root: dataset/coco_ml/images
+      cls_label_path: dataset/coco_ml/val.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - ResizeImage:
+            size: 448
+            interpolation: bilinear
+            backend: pil
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 16
+      drop_last: False
+      shuffle: False
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+Infer:
+  infer_imgs: deploy/images/coco_000000570688.jpg
+  batch_size: 10
+  transforms:
+    - DecodeImage:
+        to_rgb: True
+        channel_first: False
+    - ResizeImage:
+        size: 448
+        interpolation: bilinear
+        backend: pil
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.485, 0.456, 0.406]
+        std: [0.229, 0.224, 0.225]
+        order: ''
+    - ToCHWImage:
+  PostProcess:
+    name: MultiLabelThreshOutput
+    threshold: 0.5
+    class_id_map_file: dataset/coco_ml/label.txt
+
+Metric:
+  Train:
+  Eval:
+    - MultiLabelMAP:
+        # support list: integral, 11point
+        # default: integral
+        map_type: integral

+ 164 - 0
paddlex/repo_apis/PaddleClas_api/configs/ResNet50_ML.yaml

@@ -0,0 +1,164 @@
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: ./output/
+  device: gpu
+  save_interval: 10
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 40
+  print_batch_step: 10
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 448, 448]
+  save_inference_dir: ./inference
+  # training model under @to_static
+  to_static: False
+  use_multilabel: True
+
+# mixed precision
+AMP:
+  use_amp: True
+  use_fp16_test: False
+  scale_loss: 128.0
+  use_dynamic_loss_scaling: True
+  use_promote: False
+  # O1: mixed fp16, O2: pure fp16
+  level: O1
+
+# model architecture
+Arch:
+  name: ResNet50
+  class_num: 80
+  pretrained: True
+  use_ml_decoder: True
+
+# ml-decoder head
+MLDecoder:
+  query_num: 80 # default: 80, query_num <= class_num
+  in_channels: 2048
+
+# loss function config for training/eval process
+Loss:
+  Train:
+    - MultiLabelAsymmetricLoss:
+        weight: 1.0
+        gamma_pos: 0
+        gamma_neg: 4
+        clip: 0.05
+        disable_focal_loss_grad: True
+
+  Eval:
+    - MultiLabelAsymmetricLoss:
+        weight: 1.0
+        gamma_pos: 0
+        gamma_neg: 4
+        clip: 0.05
+        disable_focal_loss_grad: True
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1e-8
+  weight_decay: 1e-4 
+  one_dim_param_no_weight_decay: True
+  lr:
+    name: Cosine
+    learning_rate: 1e-4
+    eta_min: 1e-10
+    warmup_epoch: 5
+    warmup_start_lr: 1e-6
+
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+      name: MultiLabelDataset
+      image_root: dataset/coco_ml/images
+      cls_label_path: dataset/coco_ml/train.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - ResizeImage:
+            size: 448
+            interpolation: bilinear
+            backend: pil
+        - Cutout:
+            length: 224
+            fill_value: none
+        - RandAugmentV4:
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 64
+      drop_last: False
+      shuffle: True
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+  Eval:
+    dataset:
+      name: MultiLabelDataset
+      image_root: dataset/coco_ml/images
+      cls_label_path: dataset/coco_ml/val.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - ResizeImage:
+            size: 448
+            interpolation: bilinear
+            backend: pil
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 16
+      drop_last: False
+      shuffle: False
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+Infer:
+  infer_imgs: deploy/images/coco_000000570688.jpg
+  batch_size: 10
+  transforms:
+    - DecodeImage:
+        to_rgb: True
+        channel_first: False
+    - ResizeImage:
+        size: 448
+        interpolation: bilinear
+        backend: pil
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.485, 0.456, 0.406]
+        std: [0.229, 0.224, 0.225]
+        order: ''
+    - ToCHWImage:
+  PostProcess:
+    name: MultiLabelThreshOutput
+    threshold: 0.5
+    class_id_map_file: dataset/coco_ml/label.txt
+
+Metric:
+  Train:
+  Eval:
+    - MultiLabelMAP:
+        # support list: integral, 11point
+        # default: integral
+        map_type: integral