model.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. from ...base import BaseModel
  16. from ...base.utils.arg import CLIArgument
  17. from ...base.utils.subprocess import CompletedProcess
  18. from ....utils.device import parse_device
  19. from ....utils.misc import abspath
  20. from ....utils.errors import raise_unsupported_api_error
  21. class TSModel(BaseModel):
  22. """TS Model"""
  23. def train(
  24. self,
  25. batch_size: int = None,
  26. learning_rate: float = None,
  27. epochs_iters: int = None,
  28. ips: str = None,
  29. device: str = "gpu",
  30. resume_path: str = None,
  31. dy2st: bool = False,
  32. amp: str = "OFF",
  33. num_workers: int = None,
  34. use_vdl: bool = False,
  35. save_dir: str = None,
  36. **kwargs,
  37. ) -> CompletedProcess:
  38. """train self
  39. Args:
  40. batch_size (int, optional): the train batch size value. Defaults to None.
  41. learning_rate (float, optional): the train learning rate value. Defaults to None.
  42. epochs_iters (int, optional): the train epochs value. Defaults to None.
  43. ips (str, optional): the ip addresses of nodes when using distribution. Defaults to None.
  44. device (str, optional): the running device. Defaults to 'gpu'.
  45. resume_path (str, optional): the checkpoint file path to resume training. Train from scratch if it is set
  46. to None. Defaults to None.
  47. dy2st (bool, optional): Enable dynamic to static. Defaults to False.
  48. amp (str, optional): the amp settings. Defaults to 'OFF'.
  49. num_workers (int, optional): the workers number. Defaults to None.
  50. use_vdl (bool, optional): enable VisualDL. Defaults to False.
  51. save_dir (str, optional): the directory path to save train output. Defaults to None.
  52. Returns:
  53. CompletedProcess: the result of training subprocess execution.
  54. """
  55. config = self.config.copy()
  56. cli_args = []
  57. if batch_size is not None:
  58. cli_args.append(CLIArgument("--batch_size", batch_size))
  59. if learning_rate is not None:
  60. cli_args.append(CLIArgument("--learning_rate", learning_rate))
  61. if epochs_iters is not None:
  62. cli_args.append(CLIArgument("--epoch", epochs_iters))
  63. if resume_path:
  64. raise ValueError("`resume_path` is not supported.")
  65. # No need to handle `ips`
  66. if amp is not None and amp != "OFF":
  67. raise ValueError(f"`amp`={amp} is not supported.")
  68. if dy2st:
  69. raise ValueError(f"`dy2st`={dy2st} is not supported.")
  70. if use_vdl:
  71. raise ValueError(f"`use_vdl`={use_vdl} is not supported.")
  72. if device is not None:
  73. device_type, _ = parse_device(device)
  74. cli_args.append(CLIArgument("--device", device_type))
  75. if save_dir is not None:
  76. save_dir = abspath(save_dir)
  77. else:
  78. # `save_dir` is None
  79. save_dir = abspath(os.path.join("output", "train"))
  80. cli_args.append(CLIArgument("--save_dir", save_dir))
  81. # Benchmarking mode settings
  82. benchmark = kwargs.pop("benchmark", None)
  83. if benchmark is not None:
  84. envs = benchmark.get("env", None)
  85. num_workers = benchmark.get("num_workers", None)
  86. config.update_log_ranks(device)
  87. config.update_print_mem_info(benchmark.get("print_mem_info", True))
  88. if num_workers is not None:
  89. assert isinstance(num_workers, int), "num_workers must be an integer"
  90. cli_args.append(CLIArgument("--num_workers", num_workers))
  91. if envs is not None:
  92. for env_name, env_value in envs.items():
  93. os.environ[env_name] = str(env_value)
  94. else:
  95. if num_workers is not None:
  96. cli_args.append(CLIArgument("--num_workers", num_workers))
  97. config.update({"uniform_output_enabled": True})
  98. config.update({"pdx_model_name": self.name})
  99. self._assert_empty_kwargs(kwargs)
  100. with self._create_new_config_file() as config_path:
  101. config.dump(config_path)
  102. return self.runner.train(config_path, cli_args, device, ips, save_dir)
  103. def evaluate(
  104. self,
  105. weight_path: str,
  106. batch_size: int = None,
  107. ips: str = None,
  108. device: str = "gpu",
  109. amp: str = "OFF",
  110. num_workers: int = None,
  111. **kwargs,
  112. ) -> CompletedProcess:
  113. """evaluate self using specified weight
  114. Args:
  115. weight_path (str): the path of model weight file to be evaluated.
  116. batch_size (int, optional): the batch size value in evaluating. Defaults to None.
  117. ips (str, optional): the ip addresses of nodes when using distribution. Defaults to None.
  118. device (str, optional): the running device. Defaults to 'gpu'.
  119. amp (str, optional): the AMP setting. Defaults to 'OFF'.
  120. num_workers (int, optional): the workers number in evaluating. Defaults to None.
  121. Returns:
  122. CompletedProcess: the result of evaluating subprocess execution.
  123. """
  124. config = self.config.copy()
  125. cli_args = []
  126. weight_path = abspath(weight_path)
  127. cli_args.append(CLIArgument("--checkpoints", weight_path))
  128. if batch_size is not None:
  129. if batch_size != 1:
  130. raise ValueError("Batch size other than 1 is not supported.")
  131. # No need to handle `ips`
  132. if device is not None:
  133. device_type, _ = parse_device(device)
  134. cli_args.append(CLIArgument("--device", device_type))
  135. if amp is not None:
  136. if amp != "OFF":
  137. raise ValueError(f"`amp`={amp} is not supported.")
  138. if num_workers is not None:
  139. cli_args.append(CLIArgument("--num_workers", num_workers))
  140. self._assert_empty_kwargs(kwargs)
  141. with self._create_new_config_file() as config_path:
  142. config.dump(config_path)
  143. cp = self.runner.evaluate(config_path, cli_args, device, ips)
  144. return cp
  145. def predict(
  146. self,
  147. weight_path: str,
  148. input_path: str,
  149. device: str = "gpu",
  150. save_dir: str = None,
  151. **kwargs,
  152. ) -> CompletedProcess:
  153. """predict using specified weight
  154. Args:
  155. weight_path (str): the path of model weight file used to predict.
  156. input_path (str): the path of image file to be predicted.
  157. device (str, optional): the running device. Defaults to 'gpu'.
  158. save_dir (str, optional): the directory path to save predict output. Defaults to None.
  159. Returns:
  160. CompletedProcess: the result of predicting subprocess execution.
  161. """
  162. config = self.config.copy()
  163. cli_args = []
  164. weight_path = abspath(weight_path)
  165. cli_args.append(CLIArgument("--checkpoints", weight_path))
  166. input_path = abspath(input_path)
  167. cli_args.append(CLIArgument("--csv_path", input_path))
  168. if device is not None:
  169. device_type, _ = parse_device(device)
  170. cli_args.append(CLIArgument("--device", device_type))
  171. if save_dir is not None:
  172. save_dir = abspath(save_dir)
  173. else:
  174. # `save_dir` is None
  175. save_dir = abspath(os.path.join("output", "predict"))
  176. cli_args.append(CLIArgument("--save_dir", save_dir))
  177. self._assert_empty_kwargs(kwargs)
  178. with self._create_new_config_file() as config_path:
  179. config.dump(config_path)
  180. return self.runner.predict(config_path, cli_args, device)
  181. def export(
  182. self, weight_path: str, save_dir: str = None, device: str = "gpu", **kwargs
  183. ):
  184. """export"""
  185. weight_path = abspath(weight_path)
  186. save_dir = abspath(save_dir)
  187. cli_args = []
  188. weight_path = abspath(weight_path)
  189. cli_args.append(CLIArgument("--checkpoints", weight_path))
  190. if save_dir is not None:
  191. save_dir = abspath(save_dir)
  192. else:
  193. save_dir = abspath(os.path.join("output", "inference"))
  194. cli_args.append(CLIArgument("--save_dir", save_dir))
  195. if device is not None:
  196. device_type, _ = parse_device(device)
  197. cli_args.append(CLIArgument("--device", device_type))
  198. self._assert_empty_kwargs(kwargs)
  199. with self._create_new_config_file() as config_path:
  200. # Update YAML config file
  201. config = self.config.copy()
  202. config.update_pretrained_weights(weight_path)
  203. config.update({"pdx_model_name": self.name})
  204. config.dump(config_path)
  205. return self.runner.export(config_path, cli_args, device)
  206. def infer(
  207. self,
  208. model_dir: str,
  209. input_path: str,
  210. device: str = "gpu",
  211. save_dir: str = None,
  212. **kwargs,
  213. ):
  214. """infer"""
  215. raise_unsupported_api_error("infer", self.__class__)
  216. def compression(
  217. self,
  218. weight_path: str,
  219. batch_size=None,
  220. learning_rate=None,
  221. epochs_iters=None,
  222. device: str = "gpu",
  223. use_vdl=True,
  224. save_dir=None,
  225. **kwargs,
  226. ):
  227. """compression"""
  228. raise_unsupported_api_error("compression", self.__class__)