model.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import json
  16. from ...base import BaseModel
  17. from ...base.utils.arg import CLIArgument
  18. from ...base.utils.subprocess import CompletedProcess
  19. from ....utils.device import parse_device
  20. from ....utils.misc import abspath
  21. from ....utils import logging
  22. from .config import DetConfig
  23. from .official_categories import official_categories
  24. class DetModel(BaseModel):
  25. """Object Detection Model"""
  26. def train(
  27. self,
  28. batch_size: int = None,
  29. learning_rate: float = None,
  30. epochs_iters: int = None,
  31. ips: str = None,
  32. device: str = "gpu",
  33. resume_path: str = None,
  34. dy2st: bool = False,
  35. amp: str = "OFF",
  36. num_workers: int = None,
  37. use_vdl: bool = True,
  38. save_dir: str = None,
  39. **kwargs,
  40. ) -> CompletedProcess:
  41. """train self
  42. Args:
  43. batch_size (int, optional): the train batch size value. Defaults to None.
  44. learning_rate (float, optional): the train learning rate value. Defaults to None.
  45. epochs_iters (int, optional): the train epochs value. Defaults to None.
  46. ips (str, optional): the ip addresses of nodes when using distribution. Defaults to None.
  47. device (str, optional): the running device. Defaults to 'gpu'.
  48. resume_path (str, optional): the checkpoint file path to resume training. Train from scratch if it is set
  49. to None. Defaults to None.
  50. dy2st (bool, optional): Enable dynamic to static. Defaults to False.
  51. amp (str, optional): the amp settings. Defaults to 'OFF'.
  52. num_workers (int, optional): the workers number. Defaults to None.
  53. use_vdl (bool, optional): enable VisualDL. Defaults to True.
  54. save_dir (str, optional): the directory path to save train output. Defaults to None.
  55. Returns:
  56. CompletedProcess: the result of training subprocess execution.
  57. """
  58. config = self.config.copy()
  59. cli_args = []
  60. if batch_size is not None:
  61. config.update_batch_size(batch_size, "train")
  62. if learning_rate is not None:
  63. config.update_learning_rate(learning_rate)
  64. if epochs_iters is not None:
  65. config.update_epochs(epochs_iters)
  66. config.update_cossch_epoch(epochs_iters)
  67. device_type, _ = parse_device(device)
  68. config.update_device(device_type)
  69. if resume_path is not None:
  70. assert resume_path.endswith(
  71. ".pdparams"
  72. ), "resume_path should be endswith .pdparam"
  73. resume_dir = resume_path[0:-9]
  74. cli_args.append(CLIArgument("--resume", resume_dir))
  75. if dy2st:
  76. cli_args.append(CLIArgument("--to_static"))
  77. if num_workers is not None:
  78. config.update_num_workers(num_workers)
  79. if save_dir is None:
  80. save_dir = abspath(config.get_train_save_dir())
  81. else:
  82. save_dir = abspath(save_dir)
  83. config.update_save_dir(save_dir)
  84. if use_vdl:
  85. cli_args.append(CLIArgument("--use_vdl", use_vdl))
  86. cli_args.append(CLIArgument("--vdl_log_dir", save_dir))
  87. do_eval = kwargs.pop("do_eval", True)
  88. enable_ce = kwargs.pop("enable_ce", None)
  89. profile = kwargs.pop("profile", None)
  90. if profile is not None:
  91. cli_args.append(CLIArgument("--profiler_options", profile))
  92. # Benchmarking mode settings
  93. benchmark = kwargs.pop("benchmark", None)
  94. if benchmark is not None:
  95. envs = benchmark.get("env", None)
  96. amp = benchmark.get("amp", None)
  97. do_eval = benchmark.get("do_eval", False)
  98. num_workers = benchmark.get("num_workers", None)
  99. config.update_log_ranks(device)
  100. config.update_shuffle(benchmark.get("shuffle", False))
  101. config.update_shared_memory(benchmark.get("shared_memory", True))
  102. config.update_print_mem_info(benchmark.get("print_mem_info", True))
  103. if num_workers is not None:
  104. config.update_num_workers(num_workers)
  105. if amp == "O1":
  106. # TODO: ppdet only support ampO1
  107. cli_args.append(CLIArgument("--amp"))
  108. if envs is not None:
  109. for env_name, env_value in envs.items():
  110. os.environ[env_name] = str(env_value)
  111. # set seed to 0 for benchmark mode by enable_ce
  112. cli_args.append(CLIArgument("--enable_ce", True))
  113. else:
  114. if amp != "OFF" and amp is not None:
  115. # TODO: consider amp is O1 or O2 in ppdet
  116. cli_args.append(CLIArgument("--amp"))
  117. if enable_ce is not None:
  118. cli_args.append(CLIArgument("--enable_ce", enable_ce))
  119. # PDX related settings
  120. if device_type in ["npu", "xpu", "mlu"]:
  121. uniform_output_enabled = False
  122. else:
  123. uniform_output_enabled = True
  124. config.update({"uniform_output_enabled": uniform_output_enabled})
  125. config.update({"pdx_model_name": self.name})
  126. hpi_config_path = self.model_info.get("hpi_config_path", None)
  127. if hpi_config_path:
  128. hpi_config_path = hpi_config_path.as_posix()
  129. config.update({"hpi_config_path": hpi_config_path})
  130. self._assert_empty_kwargs(kwargs)
  131. with self._create_new_config_file() as config_path:
  132. config.dump(config_path)
  133. return self.runner.train(
  134. config_path, cli_args, device, ips, save_dir, do_eval=do_eval
  135. )
  136. def evaluate(
  137. self,
  138. weight_path: str,
  139. batch_size: int = None,
  140. ips: bool = None,
  141. device: bool = "gpu",
  142. amp: bool = "OFF",
  143. num_workers: int = None,
  144. **kwargs,
  145. ) -> CompletedProcess:
  146. """evaluate self using specified weight
  147. Args:
  148. weight_path (str): the path of model weight file to be evaluated.
  149. batch_size (int, optional): the batch size value in evaluating. Defaults to None.
  150. ips (str, optional): the ip addresses of nodes when using distribution. Defaults to None.
  151. device (str, optional): the running device. Defaults to 'gpu'.
  152. amp (str, optional): the AMP setting. Defaults to 'OFF'.
  153. num_workers (int, optional): the workers number in evaluating. Defaults to None.
  154. Returns:
  155. CompletedProcess: the result of evaluating subprocess execution.
  156. """
  157. config = self.config.copy()
  158. cli_args = []
  159. weight_path = abspath(weight_path)
  160. config.update_weights(weight_path)
  161. if batch_size is not None:
  162. config.update_batch_size(batch_size, "eval")
  163. device_type, device_ids = parse_device(device)
  164. if len(device_ids) > 1:
  165. raise ValueError(
  166. f"multi-{device_type} evaluation is not supported. Please use a single {device_type}."
  167. )
  168. config.update_device(device_type)
  169. if amp != "OFF":
  170. # TODO: consider amp is O1 or O2 in ppdet
  171. cli_args.append(CLIArgument("--amp"))
  172. if num_workers is not None:
  173. config.update_num_workers(num_workers)
  174. self._assert_empty_kwargs(kwargs)
  175. with self._create_new_config_file() as config_path:
  176. config.dump(config_path)
  177. cp = self.runner.evaluate(config_path, cli_args, device, ips)
  178. return cp
  179. def predict(
  180. self,
  181. input_path: str,
  182. weight_path: str,
  183. device: str = "gpu",
  184. save_dir: str = None,
  185. **kwargs,
  186. ) -> CompletedProcess:
  187. """predict using specified weight
  188. Args:
  189. weight_path (str): the path of model weight file used to predict.
  190. input_path (str): the path of image file to be predicted.
  191. device (str, optional): the running device. Defaults to 'gpu'.
  192. save_dir (str, optional): the directory path to save predict output. Defaults to None.
  193. Returns:
  194. CompletedProcess: the result of predicting subprocess execution.
  195. """
  196. config = self.config.copy()
  197. cli_args = []
  198. input_path = abspath(input_path)
  199. if os.path.isfile(input_path):
  200. cli_args.append(CLIArgument("--infer_img", input_path))
  201. else:
  202. cli_args.append(CLIArgument("--infer_dir", input_path))
  203. if "infer_list" in kwargs:
  204. infer_list = abspath(kwargs.get("infer_list"))
  205. cli_args.append(CLIArgument("--infer_list", infer_list))
  206. if "visualize" in kwargs:
  207. cli_args.append(CLIArgument("--visualize", kwargs["visualize"]))
  208. if "save_results" in kwargs:
  209. cli_args.append(CLIArgument("--save_results", kwargs["save_results"]))
  210. if "save_threshold" in kwargs:
  211. cli_args.append(CLIArgument("--save_threshold", kwargs["save_threshold"]))
  212. if "rtn_im_file" in kwargs:
  213. cli_args.append(CLIArgument("--rtn_im_file", kwargs["rtn_im_file"]))
  214. weight_path = abspath(weight_path)
  215. config.update_weights(weight_path)
  216. device_type, _ = parse_device(device)
  217. config.update_device(device_type)
  218. if save_dir is not None:
  219. save_dir = abspath(save_dir)
  220. cli_args.append(CLIArgument("--output_dir", save_dir))
  221. self._assert_empty_kwargs(kwargs)
  222. with self._create_new_config_file() as config_path:
  223. config.dump(config_path)
  224. return self.runner.predict(config_path, cli_args, device)
  225. def export(self, weight_path: str, save_dir: str, **kwargs) -> CompletedProcess:
  226. """export the dynamic model to static model
  227. Args:
  228. weight_path (str): the model weight file path that used to export.
  229. save_dir (str): the directory path to save export output.
  230. Returns:
  231. CompletedProcess: the result of exporting subprocess execution.
  232. """
  233. config = self.config.copy()
  234. cli_args = []
  235. device = kwargs.pop("device", None)
  236. if device:
  237. device_type, _ = parse_device(device)
  238. config.update_device(device_type)
  239. if not weight_path.startswith("http"):
  240. weight_path = abspath(weight_path)
  241. config.update_weights(weight_path)
  242. save_dir = abspath(save_dir)
  243. cli_args.append(CLIArgument("--output_dir", save_dir))
  244. input_shape = kwargs.pop("input_shape", None)
  245. if input_shape is not None:
  246. cli_args.append(
  247. CLIArgument("-o", f"TestReader.inputs_def.image_shape={input_shape}")
  248. )
  249. use_trt = kwargs.pop("use_trt", None)
  250. if use_trt is not None:
  251. cli_args.append(CLIArgument("-o", f"trt={bool(use_trt)}"))
  252. exclude_nms = kwargs.pop("exclude_nms", None)
  253. if exclude_nms is not None:
  254. cli_args.append(CLIArgument("-o", f"exclude_nms={bool(exclude_nms)}"))
  255. # PDX related settings
  256. config.update({"pdx_model_name": self.name})
  257. hpi_config_path = self.model_info.get("hpi_config_path", None)
  258. if hpi_config_path:
  259. hpi_config_path = hpi_config_path.as_posix()
  260. config.update({"hpi_config_path": hpi_config_path})
  261. if self.name in official_categories.keys():
  262. anno_val_file = abspath(
  263. os.path.join(
  264. config.TestDataset["dataset_dir"], config.TestDataset["anno_path"]
  265. )
  266. )
  267. if anno_val_file == None or (not os.path.isfile(anno_val_file)):
  268. categories = official_categories[self.name]
  269. temp_anno = {"images": [], "annotations": [], "categories": categories}
  270. with self._create_new_val_json_file() as anno_file:
  271. json.dump(temp_anno, open(anno_file, "w"))
  272. config.update(
  273. {"TestDataset": {"dataset_dir": "", "anno_path": anno_file}}
  274. )
  275. logging.warning(
  276. f"{self.name} does not have validate annotations, use {anno_file} default instead."
  277. )
  278. self._assert_empty_kwargs(kwargs)
  279. with self._create_new_config_file() as config_path:
  280. config.dump(config_path)
  281. return self.runner.export(config_path, cli_args, None)
  282. self._assert_empty_kwargs(kwargs)
  283. with self._create_new_config_file() as config_path:
  284. config.dump(config_path)
  285. return self.runner.export(config_path, cli_args, None)
  286. def infer(
  287. self,
  288. model_dir: str,
  289. input_path: str,
  290. device: str = "gpu",
  291. save_dir: str = None,
  292. **kwargs,
  293. ):
  294. """predict image using infernece model
  295. Args:
  296. model_dir (str): the directory path of inference model files that would use to predict.
  297. input_path (str): the path of image that would be predict.
  298. device (str, optional): the running device. Defaults to 'gpu'.
  299. save_dir (str, optional): the directory path to save output. Defaults to None.
  300. Returns:
  301. CompletedProcess: the result of infering subprocess execution.
  302. """
  303. model_dir = abspath(model_dir)
  304. input_path = abspath(input_path)
  305. if save_dir is not None:
  306. save_dir = abspath(save_dir)
  307. cli_args = []
  308. cli_args.append(CLIArgument("--model_dir", model_dir))
  309. cli_args.append(CLIArgument("--image_file", input_path))
  310. if save_dir is not None:
  311. cli_args.append(CLIArgument("--output_dir", save_dir))
  312. device_type, _ = parse_device(device)
  313. cli_args.append(CLIArgument("--device", device_type))
  314. self._assert_empty_kwargs(kwargs)
  315. return self.runner.infer(cli_args, device)
  316. def compression(
  317. self,
  318. weight_path: str,
  319. batch_size: int = None,
  320. learning_rate: float = None,
  321. epochs_iters: int = None,
  322. device: str = None,
  323. use_vdl: bool = True,
  324. save_dir: str = None,
  325. **kwargs,
  326. ) -> CompletedProcess:
  327. """compression model
  328. Args:
  329. weight_path (str): the path to weight file of model.
  330. batch_size (int, optional): the batch size value of compression training. Defaults to None.
  331. learning_rate (float, optional): the learning rate value of compression training. Defaults to None.
  332. epochs_iters (int, optional): the epochs or iters of compression training. Defaults to None.
  333. device (str, optional): the device to run compression training. Defaults to 'gpu'.
  334. use_vdl (bool, optional): whether or not to use VisualDL. Defaults to True.
  335. save_dir (str, optional): the directory to save output. Defaults to None.
  336. Returns:
  337. CompletedProcess: the result of compression subprocess execution.
  338. """
  339. weight_path = abspath(weight_path)
  340. if save_dir is None:
  341. save_dir = self.config["save_dir"]
  342. save_dir = abspath(save_dir)
  343. config = self.config.copy()
  344. cps_config = DetConfig(
  345. self.name, config_path=self.model_info["auto_compression_config_path"]
  346. )
  347. train_cli_args = []
  348. export_cli_args = []
  349. cps_config.update_pretrained_weights(weight_path)
  350. if batch_size is not None:
  351. cps_config.update_batch_size(batch_size, "train")
  352. if learning_rate is not None:
  353. cps_config.update_learning_rate(learning_rate)
  354. if epochs_iters is not None:
  355. cps_config.update_epochs(epochs_iters)
  356. if device is not None:
  357. device_type, _ = parse_device(device)
  358. config.update_device(device_type)
  359. if save_dir is not None:
  360. save_dir = abspath(config.get_train_save_dir())
  361. else:
  362. save_dir = abspath(save_dir)
  363. cps_config.update_save_dir(save_dir)
  364. if use_vdl:
  365. train_cli_args.append(CLIArgument("--use_vdl", use_vdl))
  366. train_cli_args.append(CLIArgument("--vdl_log_dir", save_dir))
  367. export_cli_args.append(
  368. CLIArgument("--output_dir", os.path.join(save_dir, "export"))
  369. )
  370. with self._create_new_config_file() as config_path:
  371. config.dump(config_path)
  372. # TODO: refactor me
  373. cps_config_path = config_path[0:-4] + "_compression" + config_path[-4:]
  374. cps_config.dump(cps_config_path)
  375. train_cli_args.append(CLIArgument("--slim_config", cps_config_path))
  376. export_cli_args.append(CLIArgument("--slim_config", cps_config_path))
  377. self._assert_empty_kwargs(kwargs)
  378. self.runner.compression(
  379. config_path, train_cli_args, export_cli_args, device, save_dir
  380. )