utils.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import sys
  15. import time
  16. import os
  17. import os.path as osp
  18. import numpy as np
  19. import six
  20. import yaml
  21. import math
  22. from . import logging
  23. def seconds_to_hms(seconds):
  24. h = math.floor(seconds / 3600)
  25. m = math.floor((seconds - h * 3600) / 60)
  26. s = int(seconds - h * 3600 - m * 60)
  27. hms_str = "{}:{}:{}".format(h, m, s)
  28. return hms_str
  29. def get_environ_info():
  30. import paddle.fluid as fluid
  31. info = dict()
  32. info['place'] = 'cpu'
  33. info['num'] = int(os.environ.get('CPU_NUM', 1))
  34. if os.environ.get('CUDA_VISIBLE_DEVICES', None) != "":
  35. if hasattr(fluid.core, 'get_cuda_device_count'):
  36. gpu_num = 0
  37. try:
  38. gpu_num = fluid.core.get_cuda_device_count()
  39. except:
  40. os.environ['CUDA_VISIBLE_DEVICES'] = ''
  41. pass
  42. if gpu_num > 0:
  43. info['place'] = 'cuda'
  44. info['num'] = fluid.core.get_cuda_device_count()
  45. return info
  46. def parse_param_file(param_file, return_shape=True):
  47. from paddle.fluid.proto.framework_pb2 import VarType
  48. f = open(param_file, 'rb')
  49. version = np.frombuffer(f.read(4), dtype='int32')
  50. lod_level = np.frombuffer(f.read(8), dtype='int64')
  51. for i in range(int(lod_level)):
  52. _size = np.frombuffer(f.read(8), dtype='int64')
  53. _ = f.read(_size)
  54. version = np.frombuffer(f.read(4), dtype='int32')
  55. tensor_desc = VarType.TensorDesc()
  56. tensor_desc_size = np.frombuffer(f.read(4), dtype='int32')
  57. tensor_desc.ParseFromString(f.read(int(tensor_desc_size)))
  58. tensor_shape = tuple(tensor_desc.dims)
  59. if return_shape:
  60. f.close()
  61. return tuple(tensor_desc.dims)
  62. if tensor_desc.data_type != 5:
  63. raise Exception("Unexpected data type while parse {}".format(
  64. param_file))
  65. data_size = 4
  66. for i in range(len(tensor_shape)):
  67. data_size *= tensor_shape[i]
  68. weight = np.fromstring(f.read(data_size), dtype='float32')
  69. f.close()
  70. return np.reshape(weight, tensor_shape)
  71. def fuse_bn_weights(exe, main_prog, weights_dir):
  72. import paddle.fluid as fluid
  73. logging.info("Try to fuse weights of batch_norm...")
  74. bn_vars = list()
  75. for block in main_prog.blocks:
  76. ops = list(block.ops)
  77. for op in ops:
  78. if op.type == 'affine_channel':
  79. scale_name = op.input('Scale')[0]
  80. bias_name = op.input('Bias')[0]
  81. prefix = scale_name[:-5]
  82. mean_name = prefix + 'mean'
  83. variance_name = prefix + 'variance'
  84. if not osp.exists(osp.join(
  85. weights_dir, mean_name)) or not osp.exists(
  86. osp.join(weights_dir, variance_name)):
  87. logging.info(
  88. "There's no batch_norm weight found to fuse, skip fuse_bn."
  89. )
  90. return
  91. bias = block.var(bias_name)
  92. pretrained_shape = parse_param_file(
  93. osp.join(weights_dir, bias_name))
  94. actual_shape = tuple(bias.shape)
  95. if pretrained_shape != actual_shape:
  96. continue
  97. bn_vars.append(
  98. [scale_name, bias_name, mean_name, variance_name])
  99. eps = 1e-5
  100. for names in bn_vars:
  101. scale_name, bias_name, mean_name, variance_name = names
  102. scale = parse_param_file(
  103. osp.join(weights_dir, scale_name), return_shape=False)
  104. bias = parse_param_file(
  105. osp.join(weights_dir, bias_name), return_shape=False)
  106. mean = parse_param_file(
  107. osp.join(weights_dir, mean_name), return_shape=False)
  108. variance = parse_param_file(
  109. osp.join(weights_dir, variance_name), return_shape=False)
  110. bn_std = np.sqrt(np.add(variance, eps))
  111. new_scale = np.float32(np.divide(scale, bn_std))
  112. new_bias = bias - mean * new_scale
  113. scale_tensor = fluid.global_scope().find_var(scale_name).get_tensor()
  114. bias_tensor = fluid.global_scope().find_var(bias_name).get_tensor()
  115. scale_tensor.set(new_scale, exe.place)
  116. bias_tensor.set(new_bias, exe.place)
  117. if len(bn_vars) == 0:
  118. logging.info(
  119. "There's no batch_norm weight found to fuse, skip fuse_bn.")
  120. else:
  121. logging.info("There's {} batch_norm ops been fused.".format(
  122. len(bn_vars)))
  123. def load_pdparams(exe, main_prog, model_dir):
  124. import paddle.fluid as fluid
  125. from paddle.fluid.proto.framework_pb2 import VarType
  126. from paddle.fluid.framework import Program
  127. vars_to_load = list()
  128. import pickle
  129. if osp.isfile(model_dir):
  130. params_file = model_dir
  131. else:
  132. params_file = osp.join(model_dir, 'model.pdparams')
  133. with open(params_file, 'rb') as f:
  134. params_dict = pickle.load(f) if six.PY2 else pickle.load(
  135. f, encoding='latin1')
  136. unused_vars = list()
  137. for var in main_prog.list_vars():
  138. if not isinstance(var, fluid.framework.Parameter):
  139. continue
  140. if var.name not in params_dict:
  141. raise Exception("{} is not in saved paddlex model".format(
  142. var.name))
  143. if var.shape != params_dict[var.name].shape:
  144. unused_vars.append(var.name)
  145. logging.warning(
  146. "[SKIP] Shape of pretrained weight {} doesn't match.(Pretrained: {}, Actual: {})"
  147. .format(var.name, params_dict[var.name].shape, var.shape))
  148. continue
  149. vars_to_load.append(var)
  150. logging.debug("Weight {} will be load".format(var.name))
  151. for var_name in unused_vars:
  152. del params_dict[var_name]
  153. fluid.io.set_program_state(main_prog, params_dict)
  154. if len(vars_to_load) == 0:
  155. logging.warning(
  156. "There is no pretrain weights loaded, maybe you should check you pretrain model!"
  157. )
  158. else:
  159. logging.info("There are {} varaibles in {} are loaded.".format(
  160. len(vars_to_load), model_dir))
  161. def is_persistable(var):
  162. import paddle.fluid as fluid
  163. from paddle.fluid.proto.framework_pb2 import VarType
  164. if var.desc.type() == fluid.core.VarDesc.VarType.FEED_MINIBATCH or \
  165. var.desc.type() == fluid.core.VarDesc.VarType.FETCH_LIST or \
  166. var.desc.type() == fluid.core.VarDesc.VarType.READER:
  167. return False
  168. return var.persistable
  169. def is_belong_to_optimizer(var):
  170. import paddle.fluid as fluid
  171. from paddle.fluid.proto.framework_pb2 import VarType
  172. if not (isinstance(var, fluid.framework.Parameter) or
  173. var.desc.need_check_feed()):
  174. return is_persistable(var)
  175. return False
  176. def load_pdopt(exe, main_prog, model_dir):
  177. import paddle.fluid as fluid
  178. optimizer_var_list = list()
  179. vars_to_load = list()
  180. import pickle
  181. with open(osp.join(model_dir, 'model.pdopt'), 'rb') as f:
  182. opt_dict = pickle.load(f) if six.PY2 else pickle.load(
  183. f, encoding='latin1')
  184. optimizer_var_list = list(
  185. filter(is_belong_to_optimizer, main_prog.list_vars()))
  186. exception_message = "the training process can not be resumed due to optimizer set now and last time is different. Recommend to use `pretrain_weights` instead of `resume_checkpoint`"
  187. if len(optimizer_var_list) > 0:
  188. for var in optimizer_var_list:
  189. if var.name not in opt_dict:
  190. raise Exception("{} is not in saved paddlex optimizer, {}".
  191. format(var.name, exception_message))
  192. if var.shape != opt_dict[var.name].shape:
  193. raise Exception(
  194. "Shape of optimizer variable {} doesn't match.(Last: {}, Now: {}), {}"
  195. .format(var.name, opt_dict[var.name].shape,
  196. var.shape), exception_message)
  197. optimizer_varname_list = [var.name for var in optimizer_var_list]
  198. for k, v in opt_dict.items():
  199. if k not in optimizer_varname_list:
  200. raise Exception(
  201. "{} in saved paddlex optimizer is not in the model, {}".
  202. format(k, exception_message))
  203. fluid.io.set_program_state(main_prog, opt_dict)
  204. if len(optimizer_var_list) == 0:
  205. raise Exception(
  206. "There is no optimizer parameters in the model, please set the optimizer!"
  207. )
  208. else:
  209. logging.info("There are {} optimizer parameters in {} are loaded.".
  210. format(len(optimizer_var_list), model_dir))
  211. def load_pretrain_weights(exe,
  212. main_prog,
  213. weights_dir,
  214. fuse_bn=False,
  215. resume=False):
  216. if not osp.exists(weights_dir):
  217. raise Exception("Path {} not exists.".format(weights_dir))
  218. if osp.isfile(weights_dir):
  219. if not weights_dir.endswith('.pdparams'):
  220. raise Exception("File {} is not a paddle parameter file".format(
  221. weights_dir))
  222. load_pdparams(exe, main_prog, weights_dir)
  223. return
  224. if osp.exists(osp.join(weights_dir, "model.pdparams")):
  225. load_pdparams(exe, main_prog, weights_dir)
  226. if resume:
  227. if osp.exists(osp.join(weights_dir, "model.pdopt")):
  228. load_pdopt(exe, main_prog, weights_dir)
  229. else:
  230. raise Exception(
  231. "Optimizer file {} does not exist. Stop resumming training. Recommend to use `pretrain_weights` instead of `resume_checkpoint`"
  232. .format(osp.join(weights_dir, "model.pdopt")))
  233. return
  234. import paddle.fluid as fluid
  235. vars_to_load = list()
  236. for var in main_prog.list_vars():
  237. if not isinstance(var, fluid.framework.Parameter):
  238. continue
  239. if not osp.exists(osp.join(weights_dir, var.name)):
  240. logging.debug("[SKIP] Pretrained weight {}/{} doesn't exist".
  241. format(weights_dir, var.name))
  242. continue
  243. pretrained_shape = parse_param_file(osp.join(weights_dir, var.name))
  244. actual_shape = tuple(var.shape)
  245. if pretrained_shape != actual_shape:
  246. logging.warning(
  247. "[SKIP] Shape of pretrained weight {}/{} doesn't match.(Pretrained: {}, Actual: {})"
  248. .format(weights_dir, var.name, pretrained_shape, actual_shape))
  249. continue
  250. vars_to_load.append(var)
  251. logging.debug("Weight {} will be load".format(var.name))
  252. params_dict = fluid.io.load_program_state(
  253. weights_dir, var_list=vars_to_load)
  254. fluid.io.set_program_state(main_prog, params_dict)
  255. if len(vars_to_load) == 0:
  256. logging.warning(
  257. "There is no pretrain weights loaded, maybe you should check you pretrain model!"
  258. )
  259. else:
  260. logging.info("There are {} varaibles in {} are loaded.".format(
  261. len(vars_to_load), weights_dir))
  262. if fuse_bn:
  263. fuse_bn_weights(exe, main_prog, weights_dir)
  264. if resume:
  265. exception_message = "the training process can not be resumed due to optimizer set now and last time is different. Recommend to use `pretrain_weights` instead of `resume_checkpoint`"
  266. optimizer_var_list = list(
  267. filter(is_belong_to_optimizer, main_prog.list_vars()))
  268. if len(optimizer_var_list) > 0:
  269. for var in optimizer_var_list:
  270. if not osp.exists(osp.join(weights_dir, var.name)):
  271. raise Exception(
  272. "Optimizer parameter {} doesn't exist, {}".format(
  273. osp.join(weights_dir, var.name),
  274. exception_message))
  275. pretrained_shape = parse_param_file(
  276. osp.join(weights_dir, var.name))
  277. actual_shape = tuple(var.shape)
  278. if pretrained_shape != actual_shape:
  279. raise Exception(
  280. "Shape of optimizer variable {} doesn't match.(Last: {}, Now: {}), {}"
  281. .format(var.name, pretrained_shape,
  282. actual_shape), exception_message)
  283. optimizer_varname_list = [var.name for var in optimizer_var_list]
  284. if os.exists(osp.join(weights_dir, 'learning_rate')
  285. ) and 'learning_rate' not in optimizer_varname_list:
  286. raise Exception(
  287. "Optimizer parameter {}/learning_rate is not in the model, {}"
  288. .format(weights_dir, exception_message))
  289. fluid.io.load_vars(
  290. executor=exe,
  291. dirname=weights_dir,
  292. main_program=main_prog,
  293. vars=optimizer_var_list)
  294. if len(optimizer_var_list) == 0:
  295. raise Exception(
  296. "There is no optimizer parameters in the model, please set the optimizer!"
  297. )
  298. else:
  299. logging.info("There are {} optimizer parameters in {} are loaded.".
  300. format(len(optimizer_var_list), weights_dir))
  301. class EarlyStop:
  302. def __init__(self, patience, thresh):
  303. self.patience = patience
  304. self.counter = 0
  305. self.score = None
  306. self.max = 0
  307. self.thresh = thresh
  308. if patience < 1:
  309. raise Exception("Argument patience should be a positive integer.")
  310. def __call__(self, current_score):
  311. if self.score is None:
  312. self.score = current_score
  313. return False
  314. elif current_score > self.max:
  315. self.counter = 0
  316. self.score = current_score
  317. self.max = current_score
  318. return False
  319. else:
  320. if (abs(self.score - current_score) < self.thresh or
  321. current_score < self.score):
  322. self.counter += 1
  323. self.score = current_score
  324. logging.debug("EarlyStopping: %i / %i" %
  325. (self.counter, self.patience))
  326. if self.counter >= self.patience:
  327. logging.info("EarlyStopping: Stop training")
  328. return True
  329. return False
  330. else:
  331. self.counter = 0
  332. self.score = current_score
  333. return False