zhengchun
/
PaddleX


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
							# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import time
import os
import os.path as osp
import numpy as np
import six
import yaml
import math
from . import logging


def seconds_to_hms(seconds):
    h = math.floor(seconds / 3600)
    m = math.floor((seconds - h * 3600) / 60)
    s = int(seconds - h * 3600 - m * 60)
    hms_str = "{}:{}:{}".format(h, m, s)
    return hms_str


def get_environ_info():
    import paddle.fluid as fluid
    info = dict()
    info['place'] = 'cpu'
    info['num'] = int(os.environ.get('CPU_NUM', 1))
    if os.environ.get('CUDA_VISIBLE_DEVICES', None) != "":
        if hasattr(fluid.core, 'get_cuda_device_count'):
            gpu_num = 0
            try:
                gpu_num = fluid.core.get_cuda_device_count()
            except:
                os.environ['CUDA_VISIBLE_DEVICES'] = ''
                pass
            if gpu_num > 0:
                info['place'] = 'cuda'
                info['num'] = fluid.core.get_cuda_device_count()
    return info


def parse_param_file(param_file, return_shape=True):
    from paddle.fluid.proto.framework_pb2 import VarType
    f = open(param_file, 'rb')
    version = np.fromstring(f.read(4), dtype='int32')
    lod_level = np.fromstring(f.read(8), dtype='int64')
    for i in range(int(lod_level)):
        _size = np.fromstring(f.read(8), dtype='int64')
        _ = f.read(_size)
    version = np.fromstring(f.read(4), dtype='int32')
    tensor_desc = VarType.TensorDesc()
    tensor_desc_size = np.fromstring(f.read(4), dtype='int32')
    tensor_desc.ParseFromString(f.read(int(tensor_desc_size)))
    tensor_shape = tuple(tensor_desc.dims)
    if return_shape:
        f.close()
        return tuple(tensor_desc.dims)
    if tensor_desc.data_type != 5:
        raise Exception(
            "Unexpected data type while parse {}".format(param_file))
    data_size = 4
    for i in range(len(tensor_shape)):
        data_size *= tensor_shape[i]
    weight = np.fromstring(f.read(data_size), dtype='float32')
    f.close()
    return np.reshape(weight, tensor_shape)


def fuse_bn_weights(exe, main_prog, weights_dir):
    import paddle.fluid as fluid
    logging.info("Try to fuse weights of batch_norm...")
    bn_vars = list()
    for block in main_prog.blocks:
        ops = list(block.ops)
        for op in ops:
            if op.type == 'affine_channel':
                scale_name = op.input('Scale')[0]
                bias_name = op.input('Bias')[0]
                prefix = scale_name[:-5]
                mean_name = prefix + 'mean'
                variance_name = prefix + 'variance'
                if not osp.exists(osp.join(
                        weights_dir, mean_name)) or not osp.exists(
                            osp.join(weights_dir, variance_name)):
                    logging.info(
                        "There's no batch_norm weight found to fuse, skip fuse_bn."
                    )
                    return

                bias = block.var(bias_name)
                pretrained_shape = parse_param_file(
                    osp.join(weights_dir, bias_name))
                actual_shape = tuple(bias.shape)
                if pretrained_shape != actual_shape:
                    continue
                bn_vars.append(
                    [scale_name, bias_name, mean_name, variance_name])
    eps = 1e-5
    for names in bn_vars:
        scale_name, bias_name, mean_name, variance_name = names
        scale = parse_param_file(
            osp.join(weights_dir, scale_name), return_shape=False)
        bias = parse_param_file(
            osp.join(weights_dir, bias_name), return_shape=False)
        mean = parse_param_file(
            osp.join(weights_dir, mean_name), return_shape=False)
        variance = parse_param_file(
            osp.join(weights_dir, variance_name), return_shape=False)
        bn_std = np.sqrt(np.add(variance, eps))
        new_scale = np.float32(np.divide(scale, bn_std))
        new_bias = bias - mean * new_scale
        scale_tensor = fluid.global_scope().find_var(scale_name).get_tensor()
        bias_tensor = fluid.global_scope().find_var(bias_name).get_tensor()
        scale_tensor.set(new_scale, exe.place)
        bias_tensor.set(new_bias, exe.place)
    if len(bn_vars) == 0:
        logging.info(
            "There's no batch_norm weight found to fuse, skip fuse_bn.")
    else:
        logging.info("There's {} batch_norm ops been fused.".format(
            len(bn_vars)))


def load_pdparams(exe, main_prog, model_dir):
    import paddle.fluid as fluid
    from paddle.fluid.proto.framework_pb2 import VarType
    from paddle.fluid.framework import Program

    vars_to_load = list()
    import pickle
    with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f:
        params_dict = pickle.load(f) if six.PY2 else pickle.load(
            f, encoding='latin1')
    unused_vars = list()
    for var in main_prog.list_vars():
        if not isinstance(var, fluid.framework.Parameter):
            continue
        if var.name not in params_dict:
            raise Exception("{} is not in saved paddlex model".format(
                var.name))
        if var.shape != params_dict[var.name].shape:
            unused_vars.append(var.name)
            logging.warning(
                "[SKIP] Shape of pretrained weight {} doesn't match.(Pretrained: {}, Actual: {})"
                .format(var.name, params_dict[var.name].shape, var.shape))
            continue
        vars_to_load.append(var)
        logging.debug("Weight {} will be load".format(var.name))
    for var_name in unused_vars:
        del params_dict[var_name]
    fluid.io.set_program_state(main_prog, params_dict)

    if len(vars_to_load) == 0:
        logging.warning(
            "There is no pretrain weights loaded, maybe you should check you pretrain model!"
        )
    else:
        logging.info("There are {} varaibles in {} are loaded.".format(
            len(vars_to_load), model_dir))


def is_persistable(var):
    import paddle.fluid as fluid
    from paddle.fluid.proto.framework_pb2 import VarType

    if var.desc.type() == fluid.core.VarDesc.VarType.FEED_MINIBATCH or \
        var.desc.type() == fluid.core.VarDesc.VarType.FETCH_LIST or \
        var.desc.type() == fluid.core.VarDesc.VarType.READER:
        return False
    return var.persistable


def is_belong_to_optimizer(var):
    import paddle.fluid as fluid
    from paddle.fluid.proto.framework_pb2 import VarType

    if not (isinstance(var, fluid.framework.Parameter)
            or var.desc.need_check_feed()):
        return is_persistable(var)
    return False


def load_pdopt(exe, main_prog, model_dir):
    import paddle.fluid as fluid

    optimizer_var_list = list()
    vars_to_load = list()
    import pickle
    with open(osp.join(model_dir, 'model.pdopt'), 'rb') as f:
        opt_dict = pickle.load(f) if six.PY2 else pickle.load(
            f, encoding='latin1')
    optimizer_var_list = list(
        filter(is_belong_to_optimizer, main_prog.list_vars()))
    exception_message = "the training process can not be resumed due to optimizer set now and last time is different. Recommend to use `pretrain_weights` instead of `resume_checkpoint`"
    if len(optimizer_var_list) > 0:
        for var in optimizer_var_list:
            if var.name not in opt_dict:
                raise Exception(
                    "{} is not in saved paddlex optimizer, {}".format(
                        var.name, exception_message))
            if var.shape != opt_dict[var.name].shape:
                raise Exception(
                    "Shape of optimizer variable {} doesn't match.(Last: {}, Now: {}), {}"
                    .format(var.name, opt_dict[var.name].shape,
                            var.shape), exception_message)
        optimizer_varname_list = [var.name for var in optimizer_var_list]
        for k, v in opt_dict.items():
            if k not in optimizer_varname_list:
                raise Exception(
                    "{} in saved paddlex optimizer is not in the model, {}".
                    format(k, exception_message))
        fluid.io.set_program_state(main_prog, opt_dict)

    if len(optimizer_var_list) == 0:
        raise Exception(
            "There is no optimizer parameters in the model, please set the optimizer!"
        )
    else:
        logging.info(
            "There are {} optimizer parameters in {} are loaded.".format(
                len(optimizer_var_list), model_dir))


def load_pretrain_weights(exe,
                          main_prog,
                          weights_dir,
                          fuse_bn=False,
                          resume=False):
    if not osp.exists(weights_dir):
        raise Exception("Path {} not exists.".format(weights_dir))
    if osp.exists(osp.join(weights_dir, "model.pdparams")):
        load_pdparams(exe, main_prog, weights_dir)
        if resume:
            if osp.exists(osp.join(weights_dir, "model.pdopt")):
                load_pdopt(exe, main_prog, weights_dir)
            else:
                raise Exception(
                    "Optimizer file {} does not exist. Stop resumming training. Recommend to use `pretrain_weights` instead of `resume_checkpoint`"
                    .format(osp.join(weights_dir, "model.pdopt")))
        return
    import paddle.fluid as fluid
    vars_to_load = list()
    for var in main_prog.list_vars():
        if not isinstance(var, fluid.framework.Parameter):
            continue
        if not osp.exists(osp.join(weights_dir, var.name)):
            logging.debug(
                "[SKIP] Pretrained weight {}/{} doesn't exist".format(
                    weights_dir, var.name))
            continue
        pretrained_shape = parse_param_file(osp.join(weights_dir, var.name))
        actual_shape = tuple(var.shape)
        if pretrained_shape != actual_shape:
            logging.warning(
                "[SKIP] Shape of pretrained weight {}/{} doesn't match.(Pretrained: {}, Actual: {})"
                .format(weights_dir, var.name, pretrained_shape, actual_shape))
            continue
        vars_to_load.append(var)
        logging.debug("Weight {} will be load".format(var.name))

    fluid.io.load_vars(
        executor=exe,
        dirname=weights_dir,
        main_program=main_prog,
        vars=vars_to_load)
    if len(vars_to_load) == 0:
        logging.warning(
            "There is no pretrain weights loaded, maybe you should check you pretrain model!"
        )
    else:
        logging.info("There are {} varaibles in {} are loaded.".format(
            len(vars_to_load), weights_dir))
    if fuse_bn:
        fuse_bn_weights(exe, main_prog, weights_dir)
    if resume:
        exception_message = "the training process can not be resumed due to optimizer set now and last time is different. Recommend to use `pretrain_weights` instead of `resume_checkpoint`"
        optimizer_var_list = list(
            filter(is_belong_to_optimizer, main_prog.list_vars()))
        if len(optimizer_var_list) > 0:
            for var in optimizer_var_list:
                if not osp.exists(osp.join(weights_dir, var.name)):
                    raise Exception(
                        "Optimizer parameter {} doesn't exist, {}".format(
                            osp.join(weights_dir, var.name),
                            exception_message))
                pretrained_shape = parse_param_file(
                    osp.join(weights_dir, var.name))
                actual_shape = tuple(var.shape)
                if pretrained_shape != actual_shape:
                    raise Exception(
                        "Shape of optimizer variable {} doesn't match.(Last: {}, Now: {}), {}"
                        .format(var.name, pretrained_shape,
                                actual_shape), exception_message)
            optimizer_varname_list = [var.name for var in optimizer_var_list]
            if os.exists(osp.join(weights_dir, 'learning_rate')
                         ) and 'learning_rate' not in optimizer_varname_list:
                raise Exception(
                    "Optimizer parameter {}/learning_rate is not in the model, {}"
                    .format(weights_dir, exception_message))
            fluid.io.load_vars(
                executor=exe,
                dirname=weights_dir,
                main_program=main_prog,
                vars=optimizer_var_list)

        if len(optimizer_var_list) == 0:
            raise Exception(
                "There is no optimizer parameters in the model, please set the optimizer!"
            )
        else:
            logging.info(
                "There are {} optimizer parameters in {} are loaded.".format(
                    len(optimizer_var_list), weights_dir))


class EarlyStop:
    def __init__(self, patience, thresh):
        self.patience = patience
        self.counter = 0
        self.score = None
        self.max = 0
        self.thresh = thresh
        if patience < 1:
            raise Exception("Argument patience should be a positive integer.")

    def __call__(self, current_score):
        if self.score is None:
            self.score = current_score
            return False
        elif current_score > self.max:
            self.counter = 0
            self.score = current_score
            self.max = current_score
            return False
        else:
            if (abs(self.score - current_score) < self.thresh
                    or current_score < self.score):
                self.counter += 1
                self.score = current_score
                logging.debug(
                    "EarlyStopping: %i / %i" % (self.counter, self.patience))
                if self.counter >= self.patience:
                    logging.info("EarlyStopping: Stop training")
                    return True
                return False
            else:
                self.counter = 0
                self.score = current_score
                return False