convert_dataset.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import argparse
  15. import os
  16. import json
  17. def convert(input_dir):
  18. """
  19. Convert json in file into imagenet format.
  20. """
  21. label_path = os.path.join(input_dir, "flags.txt")
  22. label_dict = {}
  23. label_content = []
  24. with open(label_path, "r") as f:
  25. lines = f.readlines()
  26. for idx, line in enumerate(lines):
  27. line = line.strip()
  28. label_dict[line] = str(idx)
  29. label_content.append(f"{str(idx)} {line}\n")
  30. with open(os.path.join(input_dir, "label.txt"), "w", encoding='utf-8') as f:
  31. f.write("".join(label_content))
  32. anno_path = os.path.join(input_dir, "annotations")
  33. jsons_path = os.listdir()
  34. train_list = os.path.join(input_dir, "train.txt")
  35. val_list = os.path.join(input_dir, "val.txt")
  36. label_info = []
  37. for json_file in os.listdir(anno_path):
  38. with open(os.path.join(anno_path, json_file), "r", encoding='utf-8') as f:
  39. data = json.load(f)
  40. file_name = os.path.join("images", data["imagePath"].strip().split("/")[2])
  41. for label, value in data["flags"].items():
  42. if value:
  43. label_info.append(f"{file_name} {label_dict[label]}\n")
  44. with open(train_list, "w", encoding='utf-8') as file:
  45. file.write("".join(label_info))
  46. with open(val_list, "w", encoding='utf-8') as file:
  47. pass