convert_dataset.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import argparse
  15. import os
  16. import json
  17. from .....utils.file_interface import custom_open
  18. def convert(input_dir):
  19. """
  20. Convert json in file into imagenet format.
  21. """
  22. label_path = os.path.join(input_dir, "flags.txt")
  23. label_dict = {}
  24. label_content = []
  25. with custom_open(label_path, "r") as f:
  26. lines = f.readlines()
  27. for idx, line in enumerate(lines):
  28. line = line.strip()
  29. label_dict[line] = str(idx)
  30. label_content.append(f"{str(idx)} {line}\n")
  31. with custom_open(os.path.join(input_dir, "label.txt"), "w") as f:
  32. f.write("".join(label_content))
  33. anno_path = os.path.join(input_dir, "annotations")
  34. jsons_path = os.listdir()
  35. train_list = os.path.join(input_dir, "train.txt")
  36. val_list = os.path.join(input_dir, "val.txt")
  37. label_info = []
  38. for json_file in os.listdir(anno_path):
  39. with custom_open(os.path.join(anno_path, json_file), "r") as f:
  40. data = json.load(f)
  41. file_name = os.path.join("images", data["imagePath"].strip().split("/")[2])
  42. for label, value in data["flags"].items():
  43. if value:
  44. label_info.append(f"{file_name} {label_dict[label]}\n")
  45. with custom_open(train_list, "w") as file:
  46. file.write("".join(label_info))
  47. with custom_open(val_list, "w") as file:
  48. pass