model.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. from ...base import BaseModel
  16. from ...base.utils.arg import CLIArgument
  17. from ...base.utils.subprocess import CompletedProcess
  18. from ....utils.device import parse_device
  19. from ....utils.misc import abspath
  20. from .config import InstanceSegConfig
  21. class InstanceSegModel(BaseModel):
  22. """Instance Segmentation Model"""
  23. def train(
  24. self,
  25. batch_size: int = None,
  26. learning_rate: float = None,
  27. epochs_iters: int = None,
  28. ips: str = None,
  29. device: str = "gpu",
  30. resume_path: str = None,
  31. dy2st: bool = False,
  32. amp: str = "OFF",
  33. num_workers: int = None,
  34. use_vdl: bool = True,
  35. save_dir: str = None,
  36. **kwargs,
  37. ) -> CompletedProcess:
  38. """train self
  39. Args:
  40. batch_size (int, optional): the train batch size value. Defaults to None.
  41. learning_rate (float, optional): the train learning rate value. Defaults to None.
  42. epochs_iters (int, optional): the train epochs value. Defaults to None.
  43. ips (str, optional): the ip addresses of nodes when using distribution. Defaults to None.
  44. device (str, optional): the running device. Defaults to 'gpu'.
  45. resume_path (str, optional): the checkpoint file path to resume training. Train from scratch if it is set
  46. to None. Defaults to None.
  47. dy2st (bool, optional): Enable dynamic to static. Defaults to False.
  48. amp (str, optional): the amp settings. Defaults to 'OFF'.
  49. num_workers (int, optional): the workers number. Defaults to None.
  50. use_vdl (bool, optional): enable VisualDL. Defaults to True.
  51. save_dir (str, optional): the directory path to save train output. Defaults to None.
  52. Returns:
  53. CompletedProcess: the result of training subprocess execution.
  54. """
  55. config = self.config.copy()
  56. cli_args = []
  57. if batch_size is not None:
  58. config.update_batch_size(batch_size, "train")
  59. if learning_rate is not None:
  60. config.update_learning_rate(learning_rate)
  61. if epochs_iters is not None:
  62. config.update_epochs(epochs_iters)
  63. config.update_cossch_epoch(epochs_iters)
  64. device_type, _ = parse_device(device)
  65. config.update_device(device_type)
  66. if resume_path is not None:
  67. assert resume_path.endswith(
  68. ".pdparams"
  69. ), "resume_path should be endswith .pdparam"
  70. resume_dir = resume_path[0:-9]
  71. cli_args.append(CLIArgument("--resume", resume_dir))
  72. if dy2st:
  73. cli_args.append(CLIArgument("--to_static"))
  74. if num_workers is not None:
  75. config.update_num_workers(num_workers)
  76. if save_dir is None:
  77. save_dir = abspath(config.get_train_save_dir())
  78. else:
  79. save_dir = abspath(save_dir)
  80. config.update_save_dir(save_dir)
  81. if use_vdl:
  82. cli_args.append(CLIArgument("--use_vdl", use_vdl))
  83. cli_args.append(CLIArgument("--vdl_log_dir", save_dir))
  84. do_eval = kwargs.pop("do_eval", True)
  85. enable_ce = kwargs.pop("enable_ce", None)
  86. profile = kwargs.pop("profile", None)
  87. if profile is not None:
  88. cli_args.append(CLIArgument("--profiler_options", profile))
  89. # Benchmarking mode settings
  90. benchmark = kwargs.pop("benchmark", None)
  91. if benchmark is not None:
  92. envs = benchmark.get("env", None)
  93. amp = benchmark.get("amp", None)
  94. do_eval = benchmark.get("do_eval", False)
  95. num_workers = benchmark.get("num_workers", None)
  96. config.update_log_ranks(device)
  97. config.update_shuffle(benchmark.get("shuffle", False))
  98. config.update_shared_memory(benchmark.get("shared_memory", True))
  99. config.update_print_mem_info(benchmark.get("print_mem_info", True))
  100. if num_workers is not None:
  101. config.update_num_workers(num_workers)
  102. if amp == "O1":
  103. # TODO: ppdet only support ampO1
  104. cli_args.append(CLIArgument("--amp"))
  105. if envs is not None:
  106. for env_name, env_value in envs.items():
  107. os.environ[env_name] = str(env_value)
  108. # set seed to 0 for benchmark mode by enable_ce
  109. cli_args.append(CLIArgument("--enable_ce", True))
  110. else:
  111. if amp != "OFF" and amp is not None:
  112. # TODO: consider amp is O1 or O2 in ppdet
  113. cli_args.append(CLIArgument("--amp"))
  114. if enable_ce is not None:
  115. cli_args.append(CLIArgument("--enable_ce", enable_ce))
  116. # PDX related settings
  117. uniform_output_enabled = kwargs.pop("uniform_output_enabled", True)
  118. config.update({"uniform_output_enabled": uniform_output_enabled})
  119. config.update({"pdx_model_name": self.name})
  120. self._assert_empty_kwargs(kwargs)
  121. with self._create_new_config_file() as config_path:
  122. config.dump(config_path)
  123. return self.runner.train(
  124. config_path, cli_args, device, ips, save_dir, do_eval=do_eval
  125. )
  126. def evaluate(
  127. self,
  128. weight_path: str,
  129. batch_size: int = None,
  130. ips: bool = None,
  131. device: bool = "gpu",
  132. amp: bool = "OFF",
  133. num_workers: int = None,
  134. **kwargs,
  135. ) -> CompletedProcess:
  136. """evaluate self using specified weight
  137. Args:
  138. weight_path (str): the path of model weight file to be evaluated.
  139. batch_size (int, optional): the batch size value in evaluating. Defaults to None.
  140. ips (str, optional): the ip addresses of nodes when using distribution. Defaults to None.
  141. device (str, optional): the running device. Defaults to 'gpu'.
  142. amp (str, optional): the AMP setting. Defaults to 'OFF'.
  143. num_workers (int, optional): the workers number in evaluating. Defaults to None.
  144. Returns:
  145. CompletedProcess: the result of evaluating subprocess execution.
  146. """
  147. config = self.config.copy()
  148. cli_args = []
  149. weight_path = abspath(weight_path)
  150. config.update_weights(weight_path)
  151. if batch_size is not None:
  152. config.update_batch_size(batch_size, "eval")
  153. device_type, device_ids = parse_device(device)
  154. if len(device_ids) > 1:
  155. raise ValueError(
  156. f"multi-{device_type} evaluation is not supported. Please use a single {device_type}."
  157. )
  158. config.update_device(device_type)
  159. if amp != "OFF":
  160. # TODO: consider amp is O1 or O2 in ppdet
  161. cli_args.append(CLIArgument("--amp"))
  162. if num_workers is not None:
  163. config.update_num_workers(num_workers)
  164. self._assert_empty_kwargs(kwargs)
  165. with self._create_new_config_file() as config_path:
  166. config.dump(config_path)
  167. cp = self.runner.evaluate(config_path, cli_args, device, ips)
  168. return cp
  169. def predict(
  170. self,
  171. input_path: str,
  172. weight_path: str,
  173. device: str = "gpu",
  174. save_dir: str = None,
  175. **kwargs,
  176. ) -> CompletedProcess:
  177. """predict using specified weight
  178. Args:
  179. weight_path (str): the path of model weight file used to predict.
  180. input_path (str): the path of image file to be predicted.
  181. device (str, optional): the running device. Defaults to 'gpu'.
  182. save_dir (str, optional): the directory path to save predict output. Defaults to None.
  183. Returns:
  184. CompletedProcess: the result of predicting subprocess execution.
  185. """
  186. config = self.config.copy()
  187. cli_args = []
  188. input_path = abspath(input_path)
  189. if os.path.isfile(input_path):
  190. cli_args.append(CLIArgument("--infer_img", input_path))
  191. else:
  192. cli_args.append(CLIArgument("--infer_dir", input_path))
  193. if "infer_list" in kwargs:
  194. infer_list = abspath(kwargs.get("infer_list"))
  195. cli_args.append(CLIArgument("--infer_list", infer_list))
  196. if "visualize" in kwargs:
  197. cli_args.append(CLIArgument("--visualize", kwargs["visualize"]))
  198. if "save_results" in kwargs:
  199. cli_args.append(CLIArgument("--save_results", kwargs["save_results"]))
  200. if "save_threshold" in kwargs:
  201. cli_args.append(CLIArgument("--save_threshold", kwargs["save_threshold"]))
  202. if "rtn_im_file" in kwargs:
  203. cli_args.append(CLIArgument("--rtn_im_file", kwargs["rtn_im_file"]))
  204. weight_path = abspath(weight_path)
  205. config.update_weights(weight_path)
  206. device_type, _ = parse_device(device)
  207. config.update_device(device_type)
  208. if save_dir is not None:
  209. save_dir = abspath(save_dir)
  210. cli_args.append(CLIArgument("--output_dir", save_dir))
  211. self._assert_empty_kwargs(kwargs)
  212. with self._create_new_config_file() as config_path:
  213. config.dump(config_path)
  214. return self.runner.predict(config_path, cli_args, device)
  215. def export(self, weight_path: str, save_dir: str, **kwargs) -> CompletedProcess:
  216. """export the dynamic model to static model
  217. Args:
  218. weight_path (str): the model weight file path that used to export.
  219. save_dir (str): the directory path to save export output.
  220. Returns:
  221. CompletedProcess: the result of exporting subprocess execution.
  222. """
  223. config = self.config.copy()
  224. cli_args = []
  225. device = kwargs.pop("device", None)
  226. if device:
  227. device_type, _ = parse_device(device)
  228. config.update_device(device_type)
  229. if not weight_path.startswith("http"):
  230. weight_path = abspath(weight_path)
  231. config.update_weights(weight_path)
  232. save_dir = abspath(save_dir)
  233. cli_args.append(CLIArgument("--output_dir", save_dir))
  234. input_shape = kwargs.pop("input_shape", None)
  235. if input_shape is not None:
  236. cli_args.append(
  237. CLIArgument("-o", f"TestReader.inputs_def.image_shape={input_shape}")
  238. )
  239. use_trt = kwargs.pop("use_trt", None)
  240. if use_trt is not None:
  241. cli_args.append(CLIArgument("-o", f"trt={bool(use_trt)}"))
  242. exclude_nms = kwargs.pop("exclude_nms", None)
  243. if exclude_nms is not None:
  244. cli_args.append(CLIArgument("-o", f"exclude_nms={bool(exclude_nms)}"))
  245. # PDX related settings
  246. uniform_output_enabled = kwargs.pop("uniform_output_enabled", True)
  247. config.update({"uniform_output_enabled": uniform_output_enabled})
  248. config.update({"pdx_model_name": self.name})
  249. self._assert_empty_kwargs(kwargs)
  250. with self._create_new_config_file() as config_path:
  251. config.dump(config_path)
  252. return self.runner.export(config_path, cli_args, None)
  253. def infer(
  254. self,
  255. model_dir: str,
  256. input_path: str,
  257. device: str = "gpu",
  258. save_dir: str = None,
  259. **kwargs,
  260. ):
  261. """predict image using infernece model
  262. Args:
  263. model_dir (str): the directory path of inference model files that would use to predict.
  264. input_path (str): the path of image that would be predict.
  265. device (str, optional): the running device. Defaults to 'gpu'.
  266. save_dir (str, optional): the directory path to save output. Defaults to None.
  267. Returns:
  268. CompletedProcess: the result of infering subprocess execution.
  269. """
  270. model_dir = abspath(model_dir)
  271. input_path = abspath(input_path)
  272. if save_dir is not None:
  273. save_dir = abspath(save_dir)
  274. cli_args = []
  275. cli_args.append(CLIArgument("--model_dir", model_dir))
  276. cli_args.append(CLIArgument("--image_file", input_path))
  277. if save_dir is not None:
  278. cli_args.append(CLIArgument("--output_dir", save_dir))
  279. device_type, _ = parse_device(device)
  280. cli_args.append(CLIArgument("--device", device_type))
  281. self._assert_empty_kwargs(kwargs)
  282. return self.runner.infer(cli_args, device)
  283. def compression(
  284. self,
  285. weight_path: str,
  286. batch_size: int = None,
  287. learning_rate: float = None,
  288. epochs_iters: int = None,
  289. device: str = None,
  290. use_vdl: bool = True,
  291. save_dir: str = None,
  292. **kwargs,
  293. ) -> CompletedProcess:
  294. """compression model
  295. Args:
  296. weight_path (str): the path to weight file of model.
  297. batch_size (int, optional): the batch size value of compression training. Defaults to None.
  298. learning_rate (float, optional): the learning rate value of compression training. Defaults to None.
  299. epochs_iters (int, optional): the epochs or iters of compression training. Defaults to None.
  300. device (str, optional): the device to run compression training. Defaults to 'gpu'.
  301. use_vdl (bool, optional): whether or not to use VisualDL. Defaults to True.
  302. save_dir (str, optional): the directory to save output. Defaults to None.
  303. Returns:
  304. CompletedProcess: the result of compression subprocess execution.
  305. """
  306. weight_path = abspath(weight_path)
  307. if save_dir is None:
  308. save_dir = self.config["save_dir"]
  309. save_dir = abspath(save_dir)
  310. config = self.config.copy()
  311. cps_config = InstanceSegConfig(
  312. self.name, config_path=self.model_info["auto_compression_config_path"]
  313. )
  314. train_cli_args = []
  315. export_cli_args = []
  316. cps_config.update_pretrained_weights(weight_path)
  317. if batch_size is not None:
  318. cps_config.update_batch_size(batch_size, "train")
  319. if learning_rate is not None:
  320. cps_config.update_learning_rate(learning_rate)
  321. if epochs_iters is not None:
  322. cps_config.update_epochs(epochs_iters)
  323. if device is not None:
  324. device_type, _ = parse_device(device)
  325. config.update_device(device_type)
  326. if save_dir is not None:
  327. save_dir = abspath(config.get_train_save_dir())
  328. else:
  329. save_dir = abspath(save_dir)
  330. cps_config.update_save_dir(save_dir)
  331. if use_vdl:
  332. train_cli_args.append(CLIArgument("--use_vdl", use_vdl))
  333. train_cli_args.append(CLIArgument("--vdl_log_dir", save_dir))
  334. export_cli_args.append(
  335. CLIArgument("--output_dir", os.path.join(save_dir, "export"))
  336. )
  337. with self._create_new_config_file() as config_path:
  338. config.dump(config_path)
  339. # TODO: refactor me
  340. cps_config_path = config_path[0:-4] + "_compression" + config_path[-4:]
  341. cps_config.dump(cps_config_path)
  342. train_cli_args.append(CLIArgument("--slim_config", cps_config_path))
  343. export_cli_args.append(CLIArgument("--slim_config", cps_config_path))
  344. self._assert_empty_kwargs(kwargs)
  345. self.runner.compression(
  346. config_path, train_cli_args, export_cli_args, device, save_dir
  347. )