__init__.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. from pathlib import Path
  16. from ...base import BaseDatasetChecker
  17. from ..model_list import MODELS
  18. from .dataset_src import check_train, check_val
  19. class FaceRecDatasetChecker(BaseDatasetChecker):
  20. """Dataset Checker for Image Classification Model"""
  21. entities = MODELS
  22. sample_num = 10
  23. def get_dataset_root(self, dataset_dir: str) -> str:
  24. """find the dataset root dir
  25. Args:
  26. dataset_dir (str): the directory that contain dataset.
  27. Returns:
  28. str: the root directory of dataset.
  29. """
  30. anno_dirs = list(Path(dataset_dir).glob("**/images"))
  31. assert len(anno_dirs) == 2
  32. dataset_dir = anno_dirs[0].parent.parent.as_posix()
  33. return dataset_dir
  34. def check_dataset(self, dataset_dir: str, sample_num: int = sample_num) -> dict:
  35. """check if the dataset meets the specifications and get dataset summary
  36. Args:
  37. dataset_dir (str): the root directory of dataset.
  38. sample_num (int): the number to be sampled.
  39. Returns:
  40. dict: dataset summary.
  41. """
  42. train_attr = check_train(os.path.join(dataset_dir, "train"), self.output)
  43. val_attr = check_val(os.path.join(dataset_dir, "val"), self.output)
  44. train_attr.update(val_attr)
  45. return train_attr
  46. def get_show_type(self) -> str:
  47. """get the show type of dataset
  48. Returns:
  49. str: show type
  50. """
  51. return "image"
  52. def get_dataset_type(self) -> str:
  53. """return the dataset type
  54. Returns:
  55. str: dataset type
  56. """
  57. return "ClsDataset"