model.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. from ...base import BaseModel
  16. from ...base.utils.arg import CLIArgument
  17. from ...base.utils.subprocess import CompletedProcess
  18. from ....utils.device import parse_device
  19. from ....utils.misc import abspath
  20. from ....utils.download import download
  21. from ....utils.cache import DEFAULT_CACHE_DIR
  22. class SegModel(BaseModel):
  23. """Semantic Segmentation Model"""
  24. def train(
  25. self,
  26. batch_size: int = None,
  27. learning_rate: float = None,
  28. epochs_iters: int = None,
  29. ips: str = None,
  30. device: str = "gpu",
  31. resume_path: str = None,
  32. dy2st: bool = False,
  33. amp: str = "OFF",
  34. num_workers: int = None,
  35. use_vdl: bool = True,
  36. save_dir: str = None,
  37. **kwargs,
  38. ) -> CompletedProcess:
  39. """train self
  40. Args:
  41. batch_size (int, optional): the train batch size value. Defaults to None.
  42. learning_rate (float, optional): the train learning rate value. Defaults to None.
  43. epochs_iters (int, optional): the train epochs value. Defaults to None.
  44. ips (str, optional): the ip addresses of nodes when using distribution. Defaults to None.
  45. device (str, optional): the running device. Defaults to 'gpu'.
  46. resume_path (str, optional): the checkpoint file path to resume training. Train from scratch if it is set
  47. to None. Defaults to None.
  48. dy2st (bool, optional): Enable dynamic to static. Defaults to False.
  49. amp (str, optional): the amp settings. Defaults to 'OFF'.
  50. num_workers (int, optional): the workers number. Defaults to None.
  51. use_vdl (bool, optional): enable VisualDL. Defaults to True.
  52. save_dir (str, optional): the directory path to save train output. Defaults to None.
  53. Returns:
  54. CompletedProcess: the result of training subprocess execution.
  55. """
  56. config = self.config.copy()
  57. cli_args = []
  58. if batch_size is not None:
  59. cli_args.append(CLIArgument("--batch_size", batch_size))
  60. if learning_rate is not None:
  61. cli_args.append(CLIArgument("--learning_rate", learning_rate))
  62. if epochs_iters is not None:
  63. cli_args.append(CLIArgument("--iters", epochs_iters))
  64. # No need to handle `ips`
  65. if device is not None:
  66. device_type, _ = parse_device(device)
  67. cli_args.append(CLIArgument("--device", device_type))
  68. # For compatibility
  69. resume_dir = kwargs.pop("resume_dir", None)
  70. if resume_path is None and resume_dir is not None:
  71. resume_path = os.path.join(resume_dir, "model.pdparams")
  72. if resume_path is not None:
  73. # NOTE: We must use an absolute path here,
  74. # so we can run the scripts either inside or outside the repo dir.
  75. resume_path = abspath(resume_path)
  76. if os.path.basename(resume_path) != "model.pdparams":
  77. raise ValueError(f"{resume_path} has an incorrect file name.")
  78. if not os.path.exists(resume_path):
  79. raise FileNotFoundError(f"{resume_path} does not exist.")
  80. resume_dir = os.path.dirname(resume_path)
  81. opts_path = os.path.join(resume_dir, "model.pdopt")
  82. if not os.path.exists(opts_path):
  83. raise FileNotFoundError(f"{opts_path} must exist.")
  84. cli_args.append(CLIArgument("--resume_model", resume_dir))
  85. if dy2st:
  86. config.update_dy2st(dy2st)
  87. if use_vdl:
  88. cli_args.append(CLIArgument("--use_vdl"))
  89. if save_dir is not None:
  90. save_dir = abspath(save_dir)
  91. else:
  92. # `save_dir` is None
  93. save_dir = abspath(os.path.join("output", "train"))
  94. cli_args.append(CLIArgument("--save_dir", save_dir))
  95. save_interval = kwargs.pop("save_interval", None)
  96. if save_interval is not None:
  97. cli_args.append(CLIArgument("--save_interval", save_interval))
  98. do_eval = kwargs.pop("do_eval", True)
  99. repeats = kwargs.pop("repeats", None)
  100. seed = kwargs.pop("seed", None)
  101. profile = kwargs.pop("profile", None)
  102. if profile is not None:
  103. cli_args.append(CLIArgument("--profiler_options", profile))
  104. log_iters = kwargs.pop("log_iters", None)
  105. if log_iters is not None:
  106. cli_args.append(CLIArgument("--log_iters", log_iters))
  107. input_shape = kwargs.pop("input_shape", None)
  108. if input_shape is not None:
  109. cli_args.append(CLIArgument("--input_shape", *input_shape))
  110. # Benchmarking mode settings
  111. benchmark = kwargs.pop("benchmark", None)
  112. if benchmark is not None:
  113. envs = benchmark.get("env", None)
  114. seed = benchmark.get("seed", None)
  115. repeats = benchmark.get("repeats", None)
  116. do_eval = benchmark.get("do_eval", False)
  117. num_workers = benchmark.get("num_workers", None)
  118. config.update_log_ranks(device)
  119. amp = benchmark.get("amp", None)
  120. config.update_print_mem_info(benchmark.get("print_mem_info", True))
  121. config.update_shuffle(benchmark.get("shuffle", False))
  122. if repeats is not None:
  123. assert isinstance(repeats, int), "repeats must be an integer."
  124. cli_args.append(CLIArgument("--repeats", repeats))
  125. if num_workers is not None:
  126. assert isinstance(num_workers, int), "num_workers must be an integer."
  127. cli_args.append(CLIArgument("--num_workers", num_workers))
  128. if seed is not None:
  129. assert isinstance(seed, int), "seed must be an integer."
  130. cli_args.append(CLIArgument("--seed", seed))
  131. if amp in ["O1", "O2"]:
  132. cli_args.append(CLIArgument("--precision", "fp16"))
  133. cli_args.append(CLIArgument("--amp_level", amp))
  134. if envs is not None:
  135. for env_name, env_value in envs.items():
  136. os.environ[env_name] = str(env_value)
  137. else:
  138. if amp is not None:
  139. if amp != "OFF":
  140. cli_args.append(CLIArgument("--precision", "fp16"))
  141. cli_args.append(CLIArgument("--amp_level", amp))
  142. if num_workers is not None:
  143. cli_args.append(CLIArgument("--num_workers", num_workers))
  144. if repeats is not None:
  145. cli_args.append(CLIArgument("--repeats", repeats))
  146. if seed is not None:
  147. cli_args.append(CLIArgument("--seed", seed))
  148. # PDX related settings
  149. uniform_output_enabled = kwargs.pop("uniform_output_enabled", True)
  150. export_with_pir = kwargs.pop("export_with_pir", False)
  151. config.set_val("uniform_output_enabled", uniform_output_enabled)
  152. config.set_val("pdx_model_name", self.name)
  153. if export_with_pir:
  154. config.set_val("export_with_pir", export_with_pir)
  155. self._assert_empty_kwargs(kwargs)
  156. with self._create_new_config_file() as config_path:
  157. config.dump(config_path)
  158. return self.runner.train(
  159. config_path, cli_args, device, ips, save_dir, do_eval=do_eval
  160. )
  161. def evaluate(
  162. self,
  163. weight_path: str,
  164. batch_size: int = None,
  165. ips: str = None,
  166. device: str = "gpu",
  167. amp: str = "OFF",
  168. num_workers: int = None,
  169. **kwargs,
  170. ) -> CompletedProcess:
  171. """evaluate self using specified weight
  172. Args:
  173. weight_path (str): the path of model weight file to be evaluated.
  174. batch_size (int, optional): the batch size value in evaluating. Defaults to None.
  175. ips (str, optional): the ip addresses of nodes when using distribution. Defaults to None.
  176. device (str, optional): the running device. Defaults to 'gpu'.
  177. amp (str, optional): the AMP setting. Defaults to 'OFF'.
  178. num_workers (int, optional): the workers number in evaluating. Defaults to None.
  179. Returns:
  180. CompletedProcess: the result of evaluating subprocess execution.
  181. """
  182. config = self.config.copy()
  183. cli_args = []
  184. weight_path = abspath(weight_path)
  185. cli_args.append(CLIArgument("--model_path", weight_path))
  186. if batch_size is not None:
  187. if batch_size != 1:
  188. raise ValueError("Batch size other than 1 is not supported.")
  189. # No need to handle `ips`
  190. if device is not None:
  191. device_type, _ = parse_device(device)
  192. cli_args.append(CLIArgument("--device", device_type))
  193. if amp is not None:
  194. if amp != "OFF":
  195. cli_args.append(CLIArgument("--precision", "fp16"))
  196. cli_args.append(CLIArgument("--amp_level", amp))
  197. if num_workers is not None:
  198. cli_args.append(CLIArgument("--num_workers", num_workers))
  199. self._assert_empty_kwargs(kwargs)
  200. with self._create_new_config_file() as config_path:
  201. config.dump(config_path)
  202. cp = self.runner.evaluate(config_path, cli_args, device, ips)
  203. return cp
  204. def predict(
  205. self,
  206. weight_path: str,
  207. input_path: str,
  208. device: str = "gpu",
  209. save_dir: str = None,
  210. **kwargs,
  211. ) -> CompletedProcess:
  212. """predict using specified weight
  213. Args:
  214. weight_path (str): the path of model weight file used to predict.
  215. input_path (str): the path of image file to be predicted.
  216. device (str, optional): the running device. Defaults to 'gpu'.
  217. save_dir (str, optional): the directory path to save predict output. Defaults to None.
  218. Returns:
  219. CompletedProcess: the result of predicting subprocess execution.
  220. """
  221. config = self.config.copy()
  222. cli_args = []
  223. weight_path = abspath(weight_path)
  224. cli_args.append(CLIArgument("--model_path", weight_path))
  225. input_path = abspath(input_path)
  226. cli_args.append(CLIArgument("--image_path", input_path))
  227. if device is not None:
  228. device_type, _ = parse_device(device)
  229. cli_args.append(CLIArgument("--device", device_type))
  230. if save_dir is not None:
  231. save_dir = abspath(save_dir)
  232. else:
  233. # `save_dir` is None
  234. save_dir = abspath(os.path.join("output", "predict"))
  235. cli_args.append(CLIArgument("--save_dir", save_dir))
  236. self._assert_empty_kwargs(kwargs)
  237. with self._create_new_config_file() as config_path:
  238. config.dump(config_path)
  239. return self.runner.predict(config_path, cli_args, device)
  240. def analyse(self, weight_path, ips=None, device="gpu", save_dir=None, **kwargs):
  241. """analyse"""
  242. config = self.config.copy()
  243. cli_args = []
  244. weight_path = abspath(weight_path)
  245. cli_args.append(CLIArgument("--model_path", weight_path))
  246. if device is not None:
  247. device_type, _ = parse_device(device)
  248. cli_args.append(CLIArgument("--device", device_type))
  249. if save_dir is not None:
  250. save_dir = abspath(save_dir)
  251. else:
  252. # `save_dir` is None
  253. save_dir = abspath(os.path.join("output", "analysis"))
  254. cli_args.append(CLIArgument("--save_dir", save_dir))
  255. self._assert_empty_kwargs(kwargs)
  256. with self._create_new_config_file() as config_path:
  257. config.dump(config_path)
  258. cp = self.runner.analyse(config_path, cli_args, device, ips)
  259. return cp
  260. def export(self, weight_path: str, save_dir: str, **kwargs) -> CompletedProcess:
  261. """export the dynamic model to static model
  262. Args:
  263. weight_path (str): the model weight file path that used to export.
  264. save_dir (str): the directory path to save export output.
  265. Returns:
  266. CompletedProcess: the result of exporting subprocess execution.
  267. """
  268. config = self.config.copy()
  269. cli_args = []
  270. if not weight_path.startswith("http"):
  271. weight_path = abspath(weight_path)
  272. else:
  273. filename = os.path.basename(weight_path)
  274. save_path = os.path.join(DEFAULT_CACHE_DIR, filename)
  275. download(weight_path, save_path, print_progress=True, overwrite=True)
  276. weight_path = save_path
  277. cli_args.append(CLIArgument("--model_path", weight_path))
  278. if save_dir is not None:
  279. save_dir = abspath(save_dir)
  280. else:
  281. # `save_dir` is None
  282. save_dir = abspath(os.path.join("output", "export"))
  283. cli_args.append(CLIArgument("--save_dir", save_dir))
  284. input_shape = kwargs.pop("input_shape", None)
  285. if input_shape is not None:
  286. cli_args.append(CLIArgument("--input_shape", *input_shape))
  287. try:
  288. output_op = config["output_op"]
  289. except:
  290. output_op = kwargs.pop("output_op", None)
  291. if output_op is not None:
  292. assert output_op in [
  293. "softmax",
  294. "argmax",
  295. "none",
  296. ], "`output_op` must be 'none', 'softmax' or 'argmax'."
  297. cli_args.append(CLIArgument("--output_op", output_op))
  298. # PDX related settings
  299. uniform_output_enabled = kwargs.pop("uniform_output_enabled", True)
  300. export_with_pir = kwargs.pop("export_with_pir", False)
  301. config.set_val("uniform_output_enabled", uniform_output_enabled)
  302. config.set_val("pdx_model_name", self.name)
  303. if export_with_pir:
  304. config.set_val("export_with_pir", export_with_pir)
  305. self._assert_empty_kwargs(kwargs)
  306. with self._create_new_config_file() as config_path:
  307. config.dump(config_path)
  308. return self.runner.export(config_path, cli_args, None)
  309. def infer(
  310. self,
  311. model_dir: str,
  312. input_path: str,
  313. device: str = "gpu",
  314. save_dir: str = None,
  315. **kwargs,
  316. ) -> CompletedProcess:
  317. """predict image using infernece model
  318. Args:
  319. model_dir (str): the directory path of inference model files that would use to predict.
  320. input_path (str): the path of image that would be predict.
  321. device (str, optional): the running device. Defaults to 'gpu'.
  322. save_dir (str, optional): the directory path to save output. Defaults to None.
  323. Returns:
  324. CompletedProcess: the result of infering subprocess execution.
  325. """
  326. config = self.config.copy()
  327. cli_args = []
  328. model_dir = abspath(model_dir)
  329. input_path = abspath(input_path)
  330. cli_args.append(CLIArgument("--image_path", input_path))
  331. if device is not None:
  332. device_type, _ = parse_device(device)
  333. cli_args.append(CLIArgument("--device", device_type))
  334. if save_dir is not None:
  335. save_dir = abspath(save_dir)
  336. else:
  337. # `save_dir` is None
  338. save_dir = abspath(os.path.join("output", "infer"))
  339. cli_args.append(CLIArgument("--save_dir", save_dir))
  340. self._assert_empty_kwargs(kwargs)
  341. with self._create_new_config_file() as config_path:
  342. config.dump(config_path)
  343. deploy_config_path = os.path.join(model_dir, "inference.yml")
  344. return self.runner.infer(deploy_config_path, cli_args, device)
  345. def compression(
  346. self,
  347. weight_path: str,
  348. batch_size: int = None,
  349. learning_rate: float = None,
  350. epochs_iters: int = None,
  351. device: str = "gpu",
  352. use_vdl: bool = True,
  353. save_dir: str = None,
  354. **kwargs,
  355. ) -> CompletedProcess:
  356. """compression model
  357. Args:
  358. weight_path (str): the path to weight file of model.
  359. batch_size (int, optional): the batch size value of compression training. Defaults to None.
  360. learning_rate (float, optional): the learning rate value of compression training. Defaults to None.
  361. epochs_iters (int, optional): the epochs or iters of compression training. Defaults to None.
  362. device (str, optional): the device to run compression training. Defaults to 'gpu'.
  363. use_vdl (bool, optional): whether or not to use VisualDL. Defaults to True.
  364. save_dir (str, optional): the directory to save output. Defaults to None.
  365. Returns:
  366. CompletedProcess: the result of compression subprocess execution.
  367. """
  368. # Update YAML config file
  369. # NOTE: In PaddleSeg, QAT does not use a different config file than regular training
  370. # Reusing `self.config` preserves the config items modified by the user when
  371. # `SegModel` is initialized with a `SegConfig` object.
  372. config = self.config.copy()
  373. train_cli_args = []
  374. export_cli_args = []
  375. weight_path = abspath(weight_path)
  376. train_cli_args.append(CLIArgument("--model_path", weight_path))
  377. if batch_size is not None:
  378. train_cli_args.append(CLIArgument("--batch_size", batch_size))
  379. if learning_rate is not None:
  380. train_cli_args.append(CLIArgument("--learning_rate", learning_rate))
  381. if epochs_iters is not None:
  382. train_cli_args.append(CLIArgument("--iters", epochs_iters))
  383. if device is not None:
  384. device_type, _ = parse_device(device)
  385. train_cli_args.append(CLIArgument("--device", device_type))
  386. if use_vdl:
  387. train_cli_args.append(CLIArgument("--use_vdl"))
  388. if save_dir is not None:
  389. save_dir = abspath(save_dir)
  390. else:
  391. # `save_dir` is None
  392. save_dir = abspath(os.path.join("output", "compress"))
  393. train_cli_args.append(CLIArgument("--save_dir", save_dir))
  394. # The exported model saved in a subdirectory named `export`
  395. export_cli_args.append(
  396. CLIArgument("--save_dir", os.path.join(save_dir, "export"))
  397. )
  398. input_shape = kwargs.pop("input_shape", None)
  399. if input_shape is not None:
  400. export_cli_args.append(CLIArgument("--input_shape", *input_shape))
  401. self._assert_empty_kwargs(kwargs)
  402. with self._create_new_config_file() as config_path:
  403. config.dump(config_path)
  404. return self.runner.compression(
  405. config_path, train_cli_args, export_cli_args, device, save_dir
  406. )