callbacks.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import os
  18. import sys
  19. import datetime
  20. import six
  21. import paddle.distributed as dist
  22. from paddlex.ppdet.utils.checkpoint import save_model
  23. from paddlex.ppdet.utils.logger import setup_logger
  24. logger = setup_logger('paddlex.ppdet.engine')
  25. __all__ = ['Callback', 'ComposeCallback', 'LogPrinter', 'Checkpointer']
  26. class Callback(object):
  27. def __init__(self, model):
  28. self.model = model
  29. def on_step_begin(self, status):
  30. pass
  31. def on_step_end(self, status):
  32. pass
  33. def on_epoch_begin(self, status):
  34. pass
  35. def on_epoch_end(self, status):
  36. pass
  37. class ComposeCallback(object):
  38. def __init__(self, callbacks):
  39. callbacks = [c for c in list(callbacks) if c is not None]
  40. for c in callbacks:
  41. assert isinstance(
  42. c, Callback), "callback should be subclass of Callback"
  43. self._callbacks = callbacks
  44. def on_step_begin(self, status):
  45. for c in self._callbacks:
  46. c.on_step_begin(status)
  47. def on_step_end(self, status):
  48. for c in self._callbacks:
  49. c.on_step_end(status)
  50. def on_epoch_begin(self, status):
  51. for c in self._callbacks:
  52. c.on_epoch_begin(status)
  53. def on_epoch_end(self, status):
  54. for c in self._callbacks:
  55. c.on_epoch_end(status)
  56. class LogPrinter(Callback):
  57. def __init__(self, model):
  58. super(LogPrinter, self).__init__(model)
  59. def on_step_end(self, status):
  60. if dist.get_world_size() < 2 or dist.get_rank() == 0:
  61. mode = status['mode']
  62. if mode == 'train':
  63. epoch_id = status['epoch_id']
  64. step_id = status['step_id']
  65. steps_per_epoch = status['steps_per_epoch']
  66. training_staus = status['training_staus']
  67. batch_time = status['batch_time']
  68. data_time = status['data_time']
  69. epoches = self.model.cfg.epoch
  70. batch_size = self.model.cfg['{}Reader'.format(mode.capitalize(
  71. ))]['batch_size']
  72. logs = training_staus.log()
  73. space_fmt = ':' + str(len(str(steps_per_epoch))) + 'd'
  74. if step_id % self.model.cfg.log_iter == 0:
  75. eta_steps = (epoches - epoch_id
  76. ) * steps_per_epoch - step_id
  77. eta_sec = eta_steps * batch_time.global_avg
  78. eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
  79. ips = float(batch_size) / batch_time.avg
  80. fmt = ' '.join([
  81. 'Epoch: [{}]',
  82. '[{' + space_fmt + '}/{}]',
  83. 'learning_rate: {lr:.6f}',
  84. '{meters}',
  85. 'eta: {eta}',
  86. 'batch_cost: {btime}',
  87. 'data_cost: {dtime}',
  88. 'ips: {ips:.4f} images/s',
  89. ])
  90. fmt = fmt.format(
  91. epoch_id,
  92. step_id,
  93. steps_per_epoch,
  94. lr=status['learning_rate'],
  95. meters=logs,
  96. eta=eta_str,
  97. btime=str(batch_time),
  98. dtime=str(data_time),
  99. ips=ips)
  100. logger.info(fmt)
  101. if mode == 'eval':
  102. step_id = status['step_id']
  103. if step_id % 100 == 0:
  104. logger.info("Eval iter: {}".format(step_id))
  105. def on_epoch_end(self, status):
  106. if dist.get_world_size() < 2 or dist.get_rank() == 0:
  107. mode = status['mode']
  108. if mode == 'eval':
  109. sample_num = status['sample_num']
  110. cost_time = status['cost_time']
  111. logger.info('Total sample number: {}, averge FPS: {}'.format(
  112. sample_num, sample_num / cost_time))
  113. class Checkpointer(Callback):
  114. def __init__(self, model):
  115. super(Checkpointer, self).__init__(model)
  116. cfg = self.model.cfg
  117. self.best_ap = 0.
  118. self.save_dir = os.path.join(self.model.cfg.save_dir,
  119. self.model.cfg.filename)
  120. if hasattr(self.model.model, 'student_model'):
  121. self.weight = self.model.model.student_model
  122. else:
  123. self.weight = self.model.model
  124. def on_epoch_end(self, status):
  125. # Checkpointer only performed during training
  126. mode = status['mode']
  127. epoch_id = status['epoch_id']
  128. weight = None
  129. save_name = None
  130. if dist.get_world_size() < 2 or dist.get_rank() == 0:
  131. if mode == 'train':
  132. end_epoch = self.model.cfg.epoch
  133. if (
  134. epoch_id + 1
  135. ) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
  136. save_name = str(
  137. epoch_id
  138. ) if epoch_id != end_epoch - 1 else "model_final"
  139. weight = self.weight
  140. elif mode == 'eval':
  141. if 'save_best_model' in status and status['save_best_model']:
  142. for metric in self.model._metrics:
  143. map_res = metric.get_results()
  144. if 'bbox' in map_res:
  145. key = 'bbox'
  146. elif 'keypoint' in map_res:
  147. key = 'keypoint'
  148. else:
  149. key = 'mask'
  150. if key not in map_res:
  151. logger.warning("Evaluation results empty, this may be due to " \
  152. "training iterations being too few or not " \
  153. "loading the correct weights.")
  154. return
  155. if map_res[key][0] > self.best_ap:
  156. self.best_ap = map_res[key][0]
  157. save_name = 'best_model'
  158. weight = self.weight
  159. logger.info("Best test {} ap is {:0.3f}.".format(
  160. key, self.best_ap))
  161. if weight:
  162. save_model(weight, self.model.optimizer, self.save_dir,
  163. save_name, epoch_id + 1)
  164. class WiferFaceEval(Callback):
  165. def __init__(self, model):
  166. super(WiferFaceEval, self).__init__(model)
  167. def on_epoch_begin(self, status):
  168. assert self.model.mode == 'eval', \
  169. "WiferFaceEval can only be set during evaluation"
  170. for metric in self.model._metrics:
  171. metric.update(self.model.model)
  172. sys.exit()
  173. class VisualDLWriter(Callback):
  174. """
  175. Use VisualDL to log data or image
  176. """
  177. def __init__(self, model):
  178. super(VisualDLWriter, self).__init__(model)
  179. assert six.PY3, "VisualDL requires Python >= 3.5"
  180. try:
  181. from visualdl import LogWriter
  182. except Exception as e:
  183. logger.error('visualdl not found, plaese install visualdl. '
  184. 'for example: `pip install visualdl`.')
  185. raise e
  186. self.vdl_writer = LogWriter(
  187. model.cfg.get('vdl_log_dir', 'vdl_log_dir/scalar'))
  188. self.vdl_loss_step = 0
  189. self.vdl_mAP_step = 0
  190. self.vdl_image_step = 0
  191. self.vdl_image_frame = 0
  192. def on_step_end(self, status):
  193. mode = status['mode']
  194. if dist.get_world_size() < 2 or dist.get_rank() == 0:
  195. if mode == 'train':
  196. training_staus = status['training_staus']
  197. for loss_name, loss_value in training_staus.get().items():
  198. self.vdl_writer.add_scalar(loss_name, loss_value,
  199. self.vdl_loss_step)
  200. self.vdl_loss_step += 1
  201. elif mode == 'test':
  202. ori_image = status['original_image']
  203. result_image = status['result_image']
  204. self.vdl_writer.add_image(
  205. "original/frame_{}".format(self.vdl_image_frame),
  206. ori_image, self.vdl_image_step)
  207. self.vdl_writer.add_image(
  208. "result/frame_{}".format(self.vdl_image_frame),
  209. result_image, self.vdl_image_step)
  210. self.vdl_image_step += 1
  211. # each frame can display ten pictures at most.
  212. if self.vdl_image_step % 10 == 0:
  213. self.vdl_image_step = 0
  214. self.vdl_image_frame += 1
  215. def on_epoch_end(self, status):
  216. mode = status['mode']
  217. if dist.get_world_size() < 2 or dist.get_rank() == 0:
  218. if mode == 'eval':
  219. for metric in self.model._metrics:
  220. for key, map_value in metric.get_results().items():
  221. self.vdl_writer.add_scalar("{}-mAP".format(key),
  222. map_value[0],
  223. self.vdl_mAP_step)
  224. self.vdl_mAP_step += 1