deploy.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os.path as osp
  15. import numpy as np
  16. from paddle.inference import Config
  17. from paddle.inference import create_predictor
  18. from paddle.inference import PrecisionType
  19. from paddlex.cv.models import load_model
  20. from paddlex.utils import logging, Timer
  21. class Predictor(object):
  22. def __init__(self,
  23. model_dir,
  24. use_gpu=False,
  25. gpu_id=0,
  26. cpu_thread_num=1,
  27. use_mkl=True,
  28. mkl_thread_num=4,
  29. use_trt=False,
  30. use_glog=False,
  31. memory_optimize=True,
  32. max_trt_batch_size=1,
  33. trt_precision_mode='float32'):
  34. """ 创建Paddle Predictor
  35. Args:
  36. model_dir: 模型路径(必须是导出的部署或量化模型)
  37. use_gpu: 是否使用gpu,默认False
  38. gpu_id: 使用gpu的id,默认0
  39. cpu_thread_num:使用cpu进行预测时的线程数,默认为1
  40. use_mkl: 是否使用mkldnn计算库,CPU情况下使用,默认False
  41. mkl_thread_num: mkldnn计算线程数,默认为4
  42. use_trt: 是否使用TensorRT,默认False
  43. use_glog: 是否启用glog日志, 默认False
  44. memory_optimize: 是否启动内存优化,默认True
  45. max_trt_batch_size: 在使用TensorRT时配置的最大batch size,默认1
  46. trt_precision_mode:在使用TensorRT时采用的精度,可选值['float32', 'float16']。默认'float32',
  47. """
  48. self.model_dir = model_dir
  49. self._model = load_model(model_dir, with_net=False)
  50. if trt_precision_mode.lower() == 'float32':
  51. trt_precision_mode = PrecisionType.Float32
  52. elif trt_precision_mode.lower() == 'float16':
  53. trt_precision_mode = PrecisionType.Float16
  54. else:
  55. logging.error(
  56. "TensorRT precision mode {} is invalid. Supported modes are float32 and float16."
  57. .format(trt_precision_mode),
  58. exit=True)
  59. self.predictor = self.create_predictor(
  60. use_gpu=use_gpu,
  61. gpu_id=gpu_id,
  62. cpu_thread_num=cpu_thread_num,
  63. use_mkl=use_mkl,
  64. mkl_thread_num=mkl_thread_num,
  65. use_trt=use_trt,
  66. use_glog=use_glog,
  67. memory_optimize=memory_optimize,
  68. max_trt_batch_size=max_trt_batch_size,
  69. trt_precision_mode=trt_precision_mode)
  70. self.timer = Timer()
  71. def create_predictor(self,
  72. use_gpu=True,
  73. gpu_id=0,
  74. cpu_thread_num=1,
  75. use_mkl=True,
  76. mkl_thread_num=4,
  77. use_trt=False,
  78. use_glog=False,
  79. memory_optimize=True,
  80. max_trt_batch_size=1,
  81. trt_precision_mode=PrecisionType.Float32):
  82. config = Config(
  83. osp.join(self.model_dir, 'model.pdmodel'),
  84. osp.join(self.model_dir, 'model.pdiparams'))
  85. if use_gpu:
  86. # 设置GPU初始显存(单位M)和Device ID
  87. config.enable_use_gpu(100, gpu_id)
  88. config.switch_ir_optim(True)
  89. if use_trt:
  90. config.enable_tensorrt_engine(
  91. workspace_size=1 << 10,
  92. max_batch_size=max_trt_batch_size,
  93. min_subgraph_size=3,
  94. precision_mode=trt_precision_mode,
  95. use_static=False,
  96. use_calib_mode=False)
  97. else:
  98. config.disable_gpu()
  99. config.set_cpu_math_library_num_threads(cpu_thread_num)
  100. if use_mkl:
  101. try:
  102. # cache 10 different shapes for mkldnn to avoid memory leak
  103. config.set_mkldnn_cache_capacity(10)
  104. config.enable_mkldnn()
  105. config.set_cpu_math_library_num_threads(mkl_thread_num)
  106. except Exception as e:
  107. logging.warning(
  108. "The current environment does not support `mkldnn`, so disable mkldnn."
  109. )
  110. pass
  111. if use_glog:
  112. config.enable_glog_info()
  113. else:
  114. config.disable_glog_info()
  115. if memory_optimize:
  116. config.enable_memory_optim()
  117. config.switch_use_feed_fetch_ops(False)
  118. predictor = create_predictor(config)
  119. return predictor
  120. def preprocess(self, images, transforms):
  121. preprocessed_samples = self._model._preprocess(
  122. images, transforms, to_tensor=False)
  123. if self._model.model_type == 'classifier':
  124. preprocessed_samples = {'image': preprocessed_samples[0]}
  125. elif self._model.model_type == 'segmenter':
  126. preprocessed_samples = {
  127. 'image': preprocessed_samples[0],
  128. 'ori_shape': preprocessed_samples[1]
  129. }
  130. elif self._model.model_type == 'detector':
  131. pass
  132. else:
  133. logging.error(
  134. "Invalid model type {}".format(self._model.model_type),
  135. exit=True)
  136. return preprocessed_samples
  137. def postprocess(self, net_outputs, topk=1, ori_shape=None,
  138. transforms=None):
  139. if self._model.model_type == 'classifier':
  140. true_topk = min(self._model.num_classes, topk)
  141. preds = self._model._postprocess(net_outputs[0], true_topk)
  142. if len(preds) == 1:
  143. preds = preds[0]
  144. elif self._model.model_type == 'segmenter':
  145. score_map, label_map = self._model._postprocess(
  146. net_outputs,
  147. batch_origin_shape=ori_shape,
  148. transforms=transforms.transforms)
  149. score_map = np.squeeze(score_map)
  150. label_map = np.squeeze(label_map)
  151. if score_map.ndim == 3:
  152. preds = {'label_map': label_map, 'score_map': score_map}
  153. else:
  154. preds = [{
  155. 'label_map': l,
  156. 'score_map': s
  157. } for l, s in zip(label_map, score_map)]
  158. elif self._model.model_type == 'detector':
  159. if 'RCNN' in self._model.__class__.__name__:
  160. net_outputs = [{
  161. k: v
  162. for k, v in zip(['bbox', 'bbox_num', 'mask'], res)
  163. } for res in net_outputs]
  164. else:
  165. net_outputs = {
  166. k: v
  167. for k, v in zip(['bbox', 'bbox_num', 'mask'], net_outputs)
  168. }
  169. preds = self._model._postprocess(net_outputs)
  170. if len(preds) == 1:
  171. preds = preds[0]
  172. else:
  173. logging.error(
  174. "Invalid model type {}.".format(self._model.model_type),
  175. exit=True)
  176. return preds
  177. def raw_predict(self, inputs):
  178. """ 接受预处理过后的数据进行预测
  179. Args:
  180. inputs(dict): 预处理过后的数据
  181. """
  182. input_names = self.predictor.get_input_names()
  183. for name in input_names:
  184. input_tensor = self.predictor.get_input_handle(name)
  185. input_tensor.copy_from_cpu(inputs[name])
  186. self.predictor.run()
  187. output_names = self.predictor.get_output_names()
  188. net_outputs = list()
  189. for name in output_names:
  190. output_tensor = self.predictor.get_output_handle(name)
  191. net_outputs.append(output_tensor.copy_to_cpu())
  192. return net_outputs
  193. def _run(self, images, topk=1, transforms=None):
  194. self.timer.preprocess_time_s.start()
  195. preprocessed_input = self.preprocess(images, transforms)
  196. self.timer.preprocess_time_s.end(iter_num=len(images))
  197. ori_shape = None
  198. self.timer.inference_time_s.start()
  199. if 'RCNN' in self._model.__class__.__name__:
  200. if len(preprocessed_input) > 1:
  201. logging.warning(
  202. "{} only supports inference with batch size equal to 1."
  203. .format(self._model.__class__.__name__))
  204. net_outputs = [
  205. self.raw_predict(sample) for sample in preprocessed_input
  206. ]
  207. self.timer.inference_time_s.end(iter_num=len(images))
  208. else:
  209. net_outputs = self.raw_predict(preprocessed_input)
  210. self.timer.inference_time_s.end(iter_num=1)
  211. ori_shape = preprocessed_input.get('ori_shape', None)
  212. self.timer.postprocess_time_s.start()
  213. results = self.postprocess(
  214. net_outputs, topk, ori_shape=ori_shape, transforms=transforms)
  215. self.timer.postprocess_time_s.end(iter_num=len(images))
  216. return results
  217. def predict(self,
  218. img_file,
  219. topk=1,
  220. transforms=None,
  221. warmup_iters=0,
  222. repeats=1):
  223. """ 图片预测
  224. Args:
  225. img_file(List[np.ndarray or str], str or np.ndarray):
  226. 图像路径;或者是解码后的排列格式为(H, W, C)且类型为float32且为BGR格式的数组。
  227. topk(int): 分类预测时使用,表示预测前topk的结果。默认值为1。
  228. transforms (paddlex.transforms): 数据预处理操作。默认值为None, 即使用`model.yml`中保存的数据预处理操作。
  229. warmup_iters (int): 预热轮数,用于评估模型推理以及前后处理速度。若大于1,会预先重复预测warmup_iters,而后才开始正式的预测及其速度评估。默认为0。
  230. repeats (int): 重复次数,用于评估模型推理以及前后处理速度。若大于1,会预测repeats次取时间平均值。默认值为1。
  231. """
  232. if repeats < 1:
  233. logging.error("`repeats` must be greater than 1.", exit=True)
  234. if transforms is None and not hasattr(self._model, 'test_transforms'):
  235. raise Exception("Transforms need to be defined, now is None.")
  236. if transforms is None:
  237. transforms = self._model.test_transforms
  238. if isinstance(img_file, (str, np.ndarray)):
  239. images = [img_file]
  240. else:
  241. images = img_file
  242. for step in range(warmup_iters):
  243. self._run(images=images, topk=topk, transforms=transforms)
  244. self.timer.reset()
  245. for step in range(repeats):
  246. results = self._run(
  247. images=images, topk=topk, transforms=transforms)
  248. self.timer.repeats = repeats
  249. self.timer.info(average=True)
  250. return results