| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360 |
- # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import sys
- import time
- import os
- import os.path as osp
- import numpy as np
- import six
- import yaml
- import math
- from . import logging
- def seconds_to_hms(seconds):
- h = math.floor(seconds / 3600)
- m = math.floor((seconds - h * 3600) / 60)
- s = int(seconds - h * 3600 - m * 60)
- hms_str = "{}:{}:{}".format(h, m, s)
- return hms_str
- def get_environ_info():
- import paddle.fluid as fluid
- info = dict()
- info['place'] = 'cpu'
- info['num'] = int(os.environ.get('CPU_NUM', 1))
- if os.environ.get('CUDA_VISIBLE_DEVICES', None) != "":
- if hasattr(fluid.core, 'get_cuda_device_count'):
- gpu_num = 0
- try:
- gpu_num = fluid.core.get_cuda_device_count()
- except:
- os.environ['CUDA_VISIBLE_DEVICES'] = ''
- pass
- if gpu_num > 0:
- info['place'] = 'cuda'
- info['num'] = fluid.core.get_cuda_device_count()
- return info
- def parse_param_file(param_file, return_shape=True):
- from paddle.fluid.proto.framework_pb2 import VarType
- f = open(param_file, 'rb')
- version = np.fromstring(f.read(4), dtype='int32')
- lod_level = np.fromstring(f.read(8), dtype='int64')
- for i in range(int(lod_level)):
- _size = np.fromstring(f.read(8), dtype='int64')
- _ = f.read(_size)
- version = np.fromstring(f.read(4), dtype='int32')
- tensor_desc = VarType.TensorDesc()
- tensor_desc_size = np.fromstring(f.read(4), dtype='int32')
- tensor_desc.ParseFromString(f.read(int(tensor_desc_size)))
- tensor_shape = tuple(tensor_desc.dims)
- if return_shape:
- f.close()
- return tuple(tensor_desc.dims)
- if tensor_desc.data_type != 5:
- raise Exception(
- "Unexpected data type while parse {}".format(param_file))
- data_size = 4
- for i in range(len(tensor_shape)):
- data_size *= tensor_shape[i]
- weight = np.fromstring(f.read(data_size), dtype='float32')
- f.close()
- return np.reshape(weight, tensor_shape)
- def fuse_bn_weights(exe, main_prog, weights_dir):
- import paddle.fluid as fluid
- logging.info("Try to fuse weights of batch_norm...")
- bn_vars = list()
- for block in main_prog.blocks:
- ops = list(block.ops)
- for op in ops:
- if op.type == 'affine_channel':
- scale_name = op.input('Scale')[0]
- bias_name = op.input('Bias')[0]
- prefix = scale_name[:-5]
- mean_name = prefix + 'mean'
- variance_name = prefix + 'variance'
- if not osp.exists(osp.join(
- weights_dir, mean_name)) or not osp.exists(
- osp.join(weights_dir, variance_name)):
- logging.info(
- "There's no batch_norm weight found to fuse, skip fuse_bn."
- )
- return
- bias = block.var(bias_name)
- pretrained_shape = parse_param_file(
- osp.join(weights_dir, bias_name))
- actual_shape = tuple(bias.shape)
- if pretrained_shape != actual_shape:
- continue
- bn_vars.append(
- [scale_name, bias_name, mean_name, variance_name])
- eps = 1e-5
- for names in bn_vars:
- scale_name, bias_name, mean_name, variance_name = names
- scale = parse_param_file(
- osp.join(weights_dir, scale_name), return_shape=False)
- bias = parse_param_file(
- osp.join(weights_dir, bias_name), return_shape=False)
- mean = parse_param_file(
- osp.join(weights_dir, mean_name), return_shape=False)
- variance = parse_param_file(
- osp.join(weights_dir, variance_name), return_shape=False)
- bn_std = np.sqrt(np.add(variance, eps))
- new_scale = np.float32(np.divide(scale, bn_std))
- new_bias = bias - mean * new_scale
- scale_tensor = fluid.global_scope().find_var(scale_name).get_tensor()
- bias_tensor = fluid.global_scope().find_var(bias_name).get_tensor()
- scale_tensor.set(new_scale, exe.place)
- bias_tensor.set(new_bias, exe.place)
- if len(bn_vars) == 0:
- logging.info(
- "There's no batch_norm weight found to fuse, skip fuse_bn.")
- else:
- logging.info("There's {} batch_norm ops been fused.".format(
- len(bn_vars)))
- def load_pdparams(exe, main_prog, model_dir):
- import paddle.fluid as fluid
- from paddle.fluid.proto.framework_pb2 import VarType
- from paddle.fluid.framework import Program
- vars_to_load = list()
- import pickle
- with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f:
- params_dict = pickle.load(f) if six.PY2 else pickle.load(
- f, encoding='latin1')
- unused_vars = list()
- for var in main_prog.list_vars():
- if not isinstance(var, fluid.framework.Parameter):
- continue
- if var.name not in params_dict:
- raise Exception("{} is not in saved paddlex model".format(
- var.name))
- if var.shape != params_dict[var.name].shape:
- unused_vars.append(var.name)
- logging.warning(
- "[SKIP] Shape of pretrained weight {} doesn't match.(Pretrained: {}, Actual: {})"
- .format(var.name, params_dict[var.name].shape, var.shape))
- continue
- vars_to_load.append(var)
- logging.debug("Weight {} will be load".format(var.name))
- for var_name in unused_vars:
- del params_dict[var_name]
- fluid.io.set_program_state(main_prog, params_dict)
- if len(vars_to_load) == 0:
- logging.warning(
- "There is no pretrain weights loaded, maybe you should check you pretrain model!"
- )
- else:
- logging.info("There are {} varaibles in {} are loaded.".format(
- len(vars_to_load), model_dir))
- def is_persistable(var):
- import paddle.fluid as fluid
- from paddle.fluid.proto.framework_pb2 import VarType
- if var.desc.type() == fluid.core.VarDesc.VarType.FEED_MINIBATCH or \
- var.desc.type() == fluid.core.VarDesc.VarType.FETCH_LIST or \
- var.desc.type() == fluid.core.VarDesc.VarType.READER:
- return False
- return var.persistable
- def is_belong_to_optimizer(var):
- import paddle.fluid as fluid
- from paddle.fluid.proto.framework_pb2 import VarType
- if not (isinstance(var, fluid.framework.Parameter)
- or var.desc.need_check_feed()):
- return is_persistable(var)
- return False
- def load_pdopt(exe, main_prog, model_dir):
- import paddle.fluid as fluid
- optimizer_var_list = list()
- vars_to_load = list()
- import pickle
- with open(osp.join(model_dir, 'model.pdopt'), 'rb') as f:
- opt_dict = pickle.load(f) if six.PY2 else pickle.load(
- f, encoding='latin1')
- optimizer_var_list = list(
- filter(is_belong_to_optimizer, main_prog.list_vars()))
- exception_message = "the training process can not be resumed due to optimizer set now and last time is different. Recommend to use `pretrain_weights` instead of `resume_checkpoint`"
- if len(optimizer_var_list) > 0:
- for var in optimizer_var_list:
- if var.name not in opt_dict:
- raise Exception(
- "{} is not in saved paddlex optimizer, {}".format(
- var.name, exception_message))
- if var.shape != opt_dict[var.name].shape:
- raise Exception(
- "Shape of optimizer variable {} doesn't match.(Last: {}, Now: {}), {}"
- .format(var.name, opt_dict[var.name].shape,
- var.shape), exception_message)
- optimizer_varname_list = [var.name for var in optimizer_var_list]
- for k, v in opt_dict.items():
- if k not in optimizer_varname_list:
- raise Exception(
- "{} in saved paddlex optimizer is not in the model, {}".
- format(k, exception_message))
- fluid.io.set_program_state(main_prog, opt_dict)
- if len(optimizer_var_list) == 0:
- raise Exception(
- "There is no optimizer parameters in the model, please set the optimizer!"
- )
- else:
- logging.info(
- "There are {} optimizer parameters in {} are loaded.".format(
- len(optimizer_var_list), model_dir))
- def load_pretrain_weights(exe,
- main_prog,
- weights_dir,
- fuse_bn=False,
- resume=False):
- if not osp.exists(weights_dir):
- raise Exception("Path {} not exists.".format(weights_dir))
- if osp.exists(osp.join(weights_dir, "model.pdparams")):
- load_pdparams(exe, main_prog, weights_dir)
- if resume:
- if osp.exists(osp.join(weights_dir, "model.pdopt")):
- load_pdopt(exe, main_prog, weights_dir)
- else:
- raise Exception(
- "Optimizer file {} does not exist. Stop resumming training. Recommend to use `pretrain_weights` instead of `resume_checkpoint`"
- .format(osp.join(weights_dir, "model.pdopt")))
- return
- import paddle.fluid as fluid
- vars_to_load = list()
- for var in main_prog.list_vars():
- if not isinstance(var, fluid.framework.Parameter):
- continue
- if not osp.exists(osp.join(weights_dir, var.name)):
- logging.debug(
- "[SKIP] Pretrained weight {}/{} doesn't exist".format(
- weights_dir, var.name))
- continue
- pretrained_shape = parse_param_file(osp.join(weights_dir, var.name))
- actual_shape = tuple(var.shape)
- if pretrained_shape != actual_shape:
- logging.warning(
- "[SKIP] Shape of pretrained weight {}/{} doesn't match.(Pretrained: {}, Actual: {})"
- .format(weights_dir, var.name, pretrained_shape, actual_shape))
- continue
- vars_to_load.append(var)
- logging.debug("Weight {} will be load".format(var.name))
- fluid.io.load_vars(
- executor=exe,
- dirname=weights_dir,
- main_program=main_prog,
- vars=vars_to_load)
- if len(vars_to_load) == 0:
- logging.warning(
- "There is no pretrain weights loaded, maybe you should check you pretrain model!"
- )
- else:
- logging.info("There are {} varaibles in {} are loaded.".format(
- len(vars_to_load), weights_dir))
- if fuse_bn:
- fuse_bn_weights(exe, main_prog, weights_dir)
- if resume:
- exception_message = "the training process can not be resumed due to optimizer set now and last time is different. Recommend to use `pretrain_weights` instead of `resume_checkpoint`"
- optimizer_var_list = list(
- filter(is_belong_to_optimizer, main_prog.list_vars()))
- if len(optimizer_var_list) > 0:
- for var in optimizer_var_list:
- if not osp.exists(osp.join(weights_dir, var.name)):
- raise Exception(
- "Optimizer parameter {} doesn't exist, {}".format(
- osp.join(weights_dir, var.name),
- exception_message))
- pretrained_shape = parse_param_file(
- osp.join(weights_dir, var.name))
- actual_shape = tuple(var.shape)
- if pretrained_shape != actual_shape:
- raise Exception(
- "Shape of optimizer variable {} doesn't match.(Last: {}, Now: {}), {}"
- .format(var.name, pretrained_shape,
- actual_shape), exception_message)
- optimizer_varname_list = [var.name for var in optimizer_var_list]
- if os.exists(osp.join(weights_dir, 'learning_rate')
- ) and 'learning_rate' not in optimizer_varname_list:
- raise Exception(
- "Optimizer parameter {}/learning_rate is not in the model, {}"
- .format(weights_dir, exception_message))
- fluid.io.load_vars(
- executor=exe,
- dirname=weights_dir,
- main_program=main_prog,
- vars=optimizer_var_list)
- if len(optimizer_var_list) == 0:
- raise Exception(
- "There is no optimizer parameters in the model, please set the optimizer!"
- )
- else:
- logging.info(
- "There are {} optimizer parameters in {} are loaded.".format(
- len(optimizer_var_list), weights_dir))
- class EarlyStop:
- def __init__(self, patience, thresh):
- self.patience = patience
- self.counter = 0
- self.score = None
- self.max = 0
- self.thresh = thresh
- if patience < 1:
- raise Exception("Argument patience should be a positive integer.")
- def __call__(self, current_score):
- if self.score is None:
- self.score = current_score
- return False
- elif current_score > self.max:
- self.counter = 0
- self.score = current_score
- self.max = current_score
- return False
- else:
- if (abs(self.score - current_score) < self.thresh
- or current_score < self.score):
- self.counter += 1
- self.score = current_score
- logging.debug(
- "EarlyStopping: %i / %i" % (self.counter, self.patience))
- if self.counter >= self.patience:
- logging.info("EarlyStopping: Stop training")
- return True
- return False
- else:
- self.counter = 0
- self.score = current_score
- return False
|