runner.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. from ...base import BaseRunner
  16. from ...base.utils.arg import gather_opts_args
  17. from ...base.utils.subprocess import CompletedProcess
  18. class SegRunner(BaseRunner):
  19. """Semantic Segmentation Runner"""
  20. def train(
  21. self,
  22. config_path: str,
  23. cli_args: list,
  24. device: str,
  25. ips: str,
  26. save_dir: str,
  27. do_eval=True,
  28. ) -> CompletedProcess:
  29. """train model
  30. Args:
  31. config_path (str): the config file path used to train.
  32. cli_args (list): the additional parameters.
  33. device (str): the training device.
  34. ips (str): the ip addresses of nodes when using distribution.
  35. save_dir (str): the directory path to save training output.
  36. do_eval (bool, optional): whether or not to evaluate model during training. Defaults to True.
  37. Returns:
  38. CompletedProcess: the result of training subprocess execution.
  39. """
  40. args, env = self.distributed(device, ips, log_dir=save_dir)
  41. cli_args = self._gather_opts_args(cli_args)
  42. cmd = [*args, "tools/train.py"]
  43. if do_eval:
  44. cmd.append("--do_eval")
  45. cmd.extend(["--config", config_path, *cli_args])
  46. return self.run_cmd(
  47. cmd,
  48. env=env,
  49. switch_wdir=True,
  50. echo=True,
  51. silent=False,
  52. capture_output=True,
  53. log_path=self._get_train_log_path(save_dir),
  54. )
  55. def evaluate(
  56. self, config_path: str, cli_args: list, device: str, ips: str
  57. ) -> CompletedProcess:
  58. """run model evaluating
  59. Args:
  60. config_path (str): the config file path used to evaluate.
  61. cli_args (list): the additional parameters.
  62. device (str): the evaluating device.
  63. ips (str): the ip addresses of nodes when using distribution.
  64. Returns:
  65. CompletedProcess: the result of evaluating subprocess execution.
  66. """
  67. args, env = self.distributed(device, ips)
  68. cli_args = self._gather_opts_args(cli_args)
  69. cmd = [*args, "tools/val.py", "--config", config_path, *cli_args]
  70. cp = self.run_cmd(
  71. cmd, env=env, switch_wdir=True, echo=True, silent=False, capture_output=True
  72. )
  73. if cp.returncode == 0:
  74. metric_dict = _extract_eval_metrics(cp.stdout)
  75. cp.metrics = metric_dict
  76. return cp
  77. def predict(
  78. self, config_path: str, cli_args: list, device: str
  79. ) -> CompletedProcess:
  80. """run predicting using dynamic mode
  81. Args:
  82. config_path (str): the config file path used to predict.
  83. cli_args (list): the additional parameters.
  84. device (str): unused.
  85. Returns:
  86. CompletedProcess: the result of predicting subprocess execution.
  87. """
  88. # `device` unused
  89. cli_args = self._gather_opts_args(cli_args)
  90. cmd = [self.python, "tools/predict.py", "--config", config_path, *cli_args]
  91. return self.run_cmd(cmd, switch_wdir=True, echo=True, silent=False)
  92. def analyse(self, config_path, cli_args, device, ips):
  93. """analyse"""
  94. args, env = self.distributed(device, ips)
  95. cli_args = self._gather_opts_args(cli_args)
  96. cmd = [*args, "tools/analyse.py", "--config", config_path, *cli_args]
  97. cp = self.run_cmd(
  98. cmd, env=env, switch_wdir=True, echo=True, silent=False, capture_output=True
  99. )
  100. return cp
  101. def export(self, config_path: str, cli_args: list, device: str) -> CompletedProcess:
  102. """run exporting
  103. Args:
  104. config_path (str): the path of config file used to export.
  105. cli_args (list): the additional parameters.
  106. device (str): unused.
  107. Returns:
  108. CompletedProcess: the result of exporting subprocess execution.
  109. """
  110. # `device` unused
  111. cli_args = self._gather_opts_args(cli_args)
  112. cmd = [
  113. self.python,
  114. "tools/export.py",
  115. "--for_fd",
  116. "--config",
  117. config_path,
  118. *cli_args,
  119. ]
  120. cp = self.run_cmd(cmd, switch_wdir=True, echo=True, silent=False)
  121. return cp
  122. def infer(self, config_path: str, cli_args: list, device: str) -> CompletedProcess:
  123. """run predicting using inference model
  124. Args:
  125. config_path (str): the path of config file used to predict.
  126. cli_args (list): the additional parameters.
  127. device (str): unused.
  128. Returns:
  129. CompletedProcess: the result of infering subprocess execution.
  130. """
  131. # `device` unused
  132. cli_args = self._gather_opts_args(cli_args)
  133. cmd = [
  134. self.python,
  135. "deploy/python/infer.py",
  136. "--config",
  137. config_path,
  138. *cli_args,
  139. ]
  140. return self.run_cmd(cmd, switch_wdir=True, echo=True, silent=False)
  141. def compression(
  142. self,
  143. config_path: str,
  144. train_cli_args: list,
  145. export_cli_args: list,
  146. device: str,
  147. train_save_dir: str,
  148. ) -> CompletedProcess:
  149. """run compression model
  150. Args:
  151. config_path (str): the path of config file used to predict.
  152. train_cli_args (list): the additional training parameters.
  153. export_cli_args (list): the additional exporting parameters.
  154. device (str): the running device.
  155. train_save_dir (str): the directory path to save output.
  156. Returns:
  157. CompletedProcess: the result of compression subprocess execution.
  158. """
  159. # Step 1: Train model
  160. args, env = self.distributed(device, log_dir=train_save_dir)
  161. train_cli_args = self._gather_opts_args(train_cli_args)
  162. # Note that we add `--do_eval` here so we can have `train_save_dir/best_model/model.pdparams` saved
  163. cmd = [
  164. *args,
  165. "deploy/slim/quant/qat_train.py",
  166. "--do_eval",
  167. "--config",
  168. config_path,
  169. *train_cli_args,
  170. ]
  171. cp_train = self.run_cmd(
  172. cmd,
  173. env=env,
  174. switch_wdir=True,
  175. echo=True,
  176. silent=False,
  177. capture_output=True,
  178. log_path=self._get_train_log_path(train_save_dir),
  179. )
  180. # Step 2: Export model
  181. export_cli_args = self._gather_opts_args(export_cli_args)
  182. # We export the best model on the validation dataset
  183. weight_path = os.path.join(train_save_dir, "best_model", "model.pdparams")
  184. cmd = [
  185. self.python,
  186. "deploy/slim/quant/qat_export.py",
  187. "--for_fd",
  188. "--config",
  189. config_path,
  190. "--model_path",
  191. weight_path,
  192. *export_cli_args,
  193. ]
  194. cp_export = self.run_cmd(cmd, switch_wdir=True, echo=True, silent=False)
  195. return cp_train, cp_export
  196. def _gather_opts_args(self, args):
  197. # Since `--opts` in PaddleSeg does not use `action='append'`
  198. # We collect and arrange all opts args here
  199. # e.g.: python tools/train.py --config xxx --opts a=1 c=3 --opts b=2
  200. # => python tools/train.py --config xxx c=3 --opts a=1 b=2
  201. return gather_opts_args(args, "--opts")
  202. def _extract_eval_metrics(stdout: str) -> dict:
  203. """extract evaluation metrics from training log
  204. Args:
  205. stdout (str): the training log
  206. Returns:
  207. dict: the training metric
  208. """
  209. import re
  210. _DP = r"[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?"
  211. pattern = r"Images: \d+ mIoU: (_dp) Acc: (_dp) Kappa: (_dp) Dice: (_dp)".replace(
  212. "_dp", _DP
  213. )
  214. keys = ["mIoU", "Acc", "Kappa", "Dice"]
  215. metric_dict = dict()
  216. pattern = re.compile(pattern)
  217. # TODO: Use lazy version to make it more efficient
  218. lines = stdout.splitlines()
  219. for line in lines:
  220. match = pattern.search(line)
  221. if match:
  222. for k, v in zip(keys, map(float, match.groups())):
  223. metric_dict[k] = v
  224. return metric_dict