فهرست منبع

add_convert_for_clas (#2031)

Liu Jiaxuan 1 سال پیش
والد
کامیت
bff9f55b15

+ 2 - 2
paddlex/modules/image_classification/dataset_checker/__init__.py

@@ -15,7 +15,7 @@
 from pathlib import Path
 
 from ...base import BaseDatasetChecker
-from .dataset_src import check, split_dataset, deep_analyse
+from .dataset_src import check, convert, split_dataset, deep_analyse
 from ..model_list import MODELS
 
 
@@ -48,7 +48,7 @@ class ClsDatasetChecker(BaseDatasetChecker):
         Returns:
             str: the root directory of converted dataset.
         """
-        return src_dataset_dir
+        return convert(src_dataset_dir)
 
     def split_dataset(self, src_dataset_dir: str) -> str:
         """repartition the train and validation dataset

+ 1 - 0
paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py

@@ -14,5 +14,6 @@
 
 
 from .check_dataset import check
+from .convert_dataset import convert
 from .split_dataset import split_dataset
 from .analyse_dataset import deep_analyse

+ 50 - 0
paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py

@@ -0,0 +1,50 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import json
+
+
+def convert(input_dir):
+    """
+    Convert json in file into imagenet format.
+    """
+    label_path = os.path.join(input_dir, "flags.txt")
+    label_dict = {}
+    label_content = []
+    with open(label_path, "r") as f:
+        lines = f.readlines()
+        for idx, line in enumerate(lines):
+            line = line.strip()
+            label_dict[line] = str(idx)
+            label_content.append(f"{str(idx)} {line}\n")
+    with open(os.path.join(input_dir, "label.txt"), "w", encoding='utf-8') as f:
+        f.write("".join(label_content))
+    anno_path = os.path.join(input_dir, "annotations")
+    jsons_path = os.listdir()
+    train_list = os.path.join(input_dir, "train.txt")
+    val_list = os.path.join(input_dir, "val.txt")
+    label_info = []
+    for json_file in os.listdir(anno_path):
+        with open(os.path.join(anno_path, json_file), "r", encoding='utf-8') as f:
+            data = json.load(f)
+            file_name = os.path.join("images", data["imagePath"].strip().split("/")[2])
+            for label, value in data["flags"].items():
+                if value:
+                    label_info.append(f"{file_name} {label_dict[label]}\n")
+    with open(train_list, "w", encoding='utf-8') as file:
+        file.write("".join(label_info))
+    with open(val_list, "w", encoding='utf-8') as file:
+        pass