1 سال پیش · bff9f55b15
--- a/paddlex/modules/image_classification/dataset_checker/__init__.py
+++ b/paddlex/modules/image_classification/dataset_checker/__init__.py
@@ -15,7 +15,7 @@
 
				 from pathlib import Path
			
 
				 
			
 
				 from ...base import BaseDatasetChecker
			
 
				-from .dataset_src import check, split_dataset, deep_analyse
			
 
				+from .dataset_src import check, convert, split_dataset, deep_analyse
			
 
				 from ..model_list import MODELS
			
 
				 
			
 
				 
			
@@ -48,7 +48,7 @@ class ClsDatasetChecker(BaseDatasetChecker):
 
				         Returns:
			
 
				             str: the root directory of converted dataset.
			
 
				         """
			
 
				-        return src_dataset_dir
			
 
				+        return convert(src_dataset_dir)
			
 
				 
			
 
				     def split_dataset(self, src_dataset_dir: str) -> str:
			
 
				         """repartition the train and validation dataset
			
--- a/paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py
+++ b/paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py
@@ -14,5 +14,6 @@
 
				 
			
 
				 
			
 
				 from .check_dataset import check
			
 
				+from .convert_dataset import convert
			
 
				 from .split_dataset import split_dataset
			
 
				 from .analyse_dataset import deep_analyse
			
--- a/paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py
+++ b/paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py
@@ -0,0 +1,50 @@
 
				+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import argparse
			
 
				+import os
			
 
				+import json
			
 
				+
			
 
				+
			
 
				+def convert(input_dir):
			
 
				+    """
			
 
				+    Convert json in file into imagenet format.
			
 
				+    """
			
 
				+    label_path = os.path.join(input_dir, "flags.txt")
			
 
				+    label_dict = {}
			
 
				+    label_content = []
			
 
				+    with open(label_path, "r") as f:
			
 
				+        lines = f.readlines()
			
 
				+        for idx, line in enumerate(lines):
			
 
				+            line = line.strip()
			
 
				+            label_dict[line] = str(idx)
			
 
				+            label_content.append(f"{str(idx)} {line}\n")
			
 
				+    with open(os.path.join(input_dir, "label.txt"), "w", encoding='utf-8') as f:
			
 
				+        f.write("".join(label_content))
			
 
				+    anno_path = os.path.join(input_dir, "annotations")
			
 
				+    jsons_path = os.listdir()
			
 
				+    train_list = os.path.join(input_dir, "train.txt")
			
 
				+    val_list = os.path.join(input_dir, "val.txt")
			
 
				+    label_info = []
			
 
				+    for json_file in os.listdir(anno_path):
			
 
				+        with open(os.path.join(anno_path, json_file), "r", encoding='utf-8') as f:
			
 
				+            data = json.load(f)
			
 
				+            file_name = os.path.join("images", data["imagePath"].strip().split("/")[2])
			
 
				+            for label, value in data["flags"].items():
			
 
				+                if value:
			
 
				+                    label_info.append(f"{file_name} {label_dict[label]}\n")
			
 
				+    with open(train_list, "w", encoding='utf-8') as file:
			
 
				+        file.write("".join(label_info))
			
 
				+    with open(val_list, "w", encoding='utf-8') as file:
			
 
				+        pass