zhengchun
/
PaddleX


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
							"""
Copyright (c) 2016, Marco Tulio Correia Ribeiro
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
  list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
"""
The code in this file (lime_base.py) is modified from https://github.com/marcotcr/lime.
"""

import numpy as np
import scipy as sp

import tqdm
import copy
from functools import partial
import paddlex.utils.logging as logging


class LimeBase(object):
    """Class for learning a locally linear sparse model from perturbed data"""

    def __init__(self, kernel_fn, verbose=False, random_state=None):
        """Init function

        Args:
            kernel_fn: function that transforms an array of distances into an
                        array of proximity values (floats).
            verbose: if true, print local prediction values from linear model.
            random_state: an integer or numpy.RandomState that will be used to
                generate random numbers. If None, the random state will be
                initialized using the internal numpy seed.
        """
        from sklearn.utils import check_random_state
        self.kernel_fn = kernel_fn
        self.verbose = verbose
        self.random_state = check_random_state(random_state)

    @staticmethod
    def generate_lars_path(weighted_data, weighted_labels):
        """Generates the lars path for weighted data.

        Args:
            weighted_data: data that has been weighted by kernel
            weighted_label: labels, weighted by kernel

        Returns:
            (alphas, coefs), both are arrays corresponding to the
            regularization parameter and coefficients, respectively
        """
        from sklearn.linear_model import lars_path
        x_vector = weighted_data
        alphas, _, coefs = lars_path(
            x_vector, weighted_labels, method='lasso', verbose=False)
        return alphas, coefs

    def forward_selection(self, data, labels, weights, num_features):
        """Iteratively adds features to the model"""
        clf = Ridge(
            alpha=0, fit_intercept=True, random_state=self.random_state)
        used_features = []
        for _ in range(min(num_features, data.shape[1])):
            max_ = -100000000
            best = 0
            for feature in range(data.shape[1]):
                if feature in used_features:
                    continue
                clf.fit(data[:, used_features + [feature]],
                        labels,
                        sample_weight=weights)
                score = clf.score(
                    data[:, used_features + [feature]],
                    labels,
                    sample_weight=weights)
                if score > max_:
                    best = feature
                    max_ = score
            used_features.append(best)
        return np.array(used_features)

    def feature_selection(self, data, labels, weights, num_features, method):
        """Selects features for the model. see interpret_instance_with_data to
           understand the parameters."""
        from sklearn.linear_model import Ridge
        if method == 'none':
            return np.array(range(data.shape[1]))
        elif method == 'forward_selection':
            return self.forward_selection(data, labels, weights, num_features)
        elif method == 'highest_weights':
            clf = Ridge(
                alpha=0.01, fit_intercept=True, random_state=self.random_state)
            clf.fit(data, labels, sample_weight=weights)

            coef = clf.coef_
            if sp.sparse.issparse(data):
                coef = sp.sparse.csr_matrix(clf.coef_)
                weighted_data = coef.multiply(data[0])
                # Note: most efficient to slice the data before reversing
                sdata = len(weighted_data.data)
                argsort_data = np.abs(weighted_data.data).argsort()
                # Edge case where data is more sparse than requested number of feature importances
                # In that case, we just pad with zero-valued features
                if sdata < num_features:
                    nnz_indexes = argsort_data[::-1]
                    indices = weighted_data.indices[nnz_indexes]
                    num_to_pad = num_features - sdata
                    indices = np.concatenate((indices, np.zeros(
                        num_to_pad, dtype=indices.dtype)))
                    indices_set = set(indices)
                    pad_counter = 0
                    for i in range(data.shape[1]):
                        if i not in indices_set:
                            indices[pad_counter + sdata] = i
                            pad_counter += 1
                            if pad_counter >= num_to_pad:
                                break
                else:
                    nnz_indexes = argsort_data[sdata - num_features:sdata][::
                                                                           -1]
                    indices = weighted_data.indices[nnz_indexes]
                return indices
            else:
                weighted_data = coef * data[0]
                feature_weights = sorted(
                    zip(range(data.shape[1]), weighted_data),
                    key=lambda x: np.abs(x[1]),
                    reverse=True)
                return np.array([x[0] for x in feature_weights[:num_features]])
        elif method == 'lasso_path':
            weighted_data = ((data - np.average(
                data, axis=0, weights=weights)) *
                             np.sqrt(weights[:, np.newaxis]))
            weighted_labels = ((labels - np.average(
                labels, weights=weights)) * np.sqrt(weights))
            nonzero = range(weighted_data.shape[1])
            _, coefs = self.generate_lars_path(weighted_data, weighted_labels)
            for i in range(len(coefs.T) - 1, 0, -1):
                nonzero = coefs.T[i].nonzero()[0]
                if len(nonzero) <= num_features:
                    break
            used_features = nonzero
            return used_features
        elif method == 'auto':
            if num_features <= 6:
                n_method = 'forward_selection'
            else:
                n_method = 'highest_weights'
            return self.feature_selection(data, labels, weights, num_features,
                                          n_method)

    def interpret_instance_with_data(self,
                                     neighborhood_data,
                                     neighborhood_labels,
                                     distances,
                                     label,
                                     num_features,
                                     feature_selection='auto',
                                     model_regressor=None):
        """Takes perturbed data, labels and distances, returns interpretation.

        Args:
            neighborhood_data: perturbed data, 2d array. first element is
                               assumed to be the original data point.
            neighborhood_labels: corresponding perturbed labels. should have as
                                 many columns as the number of possible labels.
            distances: distances to original data point.
            label: label for which we want an interpretation
            num_features: maximum number of features in interpretation
            feature_selection: how to select num_features. options are:
                'forward_selection': iteratively add features to the model.
                    This is costly when num_features is high
                'highest_weights': selects the features that have the highest
                    product of absolute weight * original data point when
                    learning with all the features
                'lasso_path': chooses features based on the lasso
                    regularization path
                'none': uses all features, ignores num_features
                'auto': uses forward_selection if num_features <= 6, and
                    'highest_weights' otherwise.
            model_regressor: sklearn regressor to use in interpretation.
                Defaults to Ridge regression if None. Must have
                model_regressor.coef_ and 'sample_weight' as a parameter
                to model_regressor.fit()

        Returns:
            (intercept, exp, score, local_pred):
            intercept is a float.
            exp is a sorted list of tuples, where each tuple (x,y) corresponds
            to the feature id (x) and the local weight (y). The list is sorted
            by decreasing absolute value of y.
            score is the R^2 value of the returned interpretation
            local_pred is the prediction of the interpretation model on the original instance
        """
        from sklearn.linear_model import Ridge
        weights = self.kernel_fn(distances)
        labels_column = neighborhood_labels[:, label]
        used_features = self.feature_selection(neighborhood_data,
                                               labels_column, weights,
                                               num_features, feature_selection)
        if model_regressor is None:
            model_regressor = Ridge(
                alpha=1, fit_intercept=True, random_state=self.random_state)
        easy_model = model_regressor
        easy_model.fit(neighborhood_data[:, used_features],
                       labels_column,
                       sample_weight=weights)
        prediction_score = easy_model.score(
            neighborhood_data[:, used_features],
            labels_column,
            sample_weight=weights)

        local_pred = easy_model.predict(neighborhood_data[0, used_features]
                                        .reshape(1, -1))

        if self.verbose:
            logging.info('Intercept' + str(easy_model.intercept_))
            logging.info('Prediction_local' + str(local_pred))
            logging.info('Right:' + str(neighborhood_labels[0, label]))
        return (easy_model.intercept_, sorted(
            zip(used_features, easy_model.coef_),
            key=lambda x: np.abs(x[1]),
            reverse=True), prediction_score, local_pred)


class ImageInterpretation(object):
    def __init__(self, image, segments):
        """Init function.

        Args:
            image: 3d numpy array
            segments: 2d numpy array, with the output from skimage.segmentation
        """
        self.image = image
        self.segments = segments
        self.intercept = {}
        self.local_weights = {}
        self.local_pred = None

    def get_image_and_mask(self,
                           label,
                           positive_only=True,
                           negative_only=False,
                           hide_rest=False,
                           num_features=5,
                           min_weight=0.):
        """Init function.

        Args:
            label: label to interpret
            positive_only: if True, only take superpixels that positively contribute to
                the prediction of the label.
            negative_only: if True, only take superpixels that negatively contribute to
                the prediction of the label. If false, and so is positive_only, then both
                negativey and positively contributions will be taken.
                Both can't be True at the same time
            hide_rest: if True, make the non-interpretation part of the return
                image gray
            num_features: number of superpixels to include in interpretation
            min_weight: minimum weight of the superpixels to include in interpretation

        Returns:
            (image, mask), where image is a 3d numpy array and mask is a 2d
            numpy array that can be used with
            skimage.segmentation.mark_boundaries
        """
        if label not in self.local_weights:
            raise KeyError('Label not in interpretation')
        if positive_only & negative_only:
            raise ValueError(
                "Positive_only and negative_only cannot be true at the same time."
            )
        segments = self.segments
        image = self.image
        local_weights_label = self.local_weights[label]
        mask = np.zeros(segments.shape, segments.dtype)
        if hide_rest:
            temp = np.zeros(self.image.shape)
        else:
            temp = self.image.copy()
        if positive_only:
            fs = [
                x[0] for x in local_weights_label
                if x[1] > 0 and x[1] > min_weight
            ][:num_features]
        if negative_only:
            fs = [
                x[0] for x in local_weights_label
                if x[1] < 0 and abs(x[1]) > min_weight
            ][:num_features]
        if positive_only or negative_only:
            c = 1 if positive_only else 0
            for f in fs:
                temp[segments == f] = [0, 255, 0]
                # temp[segments == f, c] = np.max(image)
                mask[segments == f] = 1
            return temp, mask
        else:
            for f, w in local_weights_label[:num_features]:
                if np.abs(w) < min_weight:
                    continue
                c = 0 if w < 0 else 1
                mask[segments == f] = -1 if w < 0 else 1
                temp[segments == f] = image[segments == f].copy()
                temp[segments == f, c] = np.max(image)
            return temp, mask

    def get_rendered_image(self, label, min_weight=0.005):
        """

        Args:
            label: label to interpret
            min_weight:

        Returns:
            image, is a 3d numpy array
        """
        if label not in self.local_weights:
            raise KeyError('Label not in interpretation')

        from matplotlib import cm

        segments = self.segments
        image = self.image
        local_weights_label = self.local_weights[label]
        temp = np.zeros_like(image)

        weight_max = abs(local_weights_label[0][1])
        local_weights_label = [(f, w / weight_max)
                               for f, w in local_weights_label]
        local_weights_label = sorted(
            local_weights_label, key=lambda x: x[1],
            reverse=True)  # negatives are at last.

        cmaps = cm.get_cmap('Spectral')
        colors = cmaps(np.linspace(0, 1, len(local_weights_label)))
        colors = colors[:, :3]

        for i, (f, w) in enumerate(local_weights_label):
            if np.abs(w) < min_weight:
                continue
            temp[segments == f] = image[segments == f].copy()
            temp[segments == f] = colors[i] * 255
        return temp


class LimeImageInterpreter(object):
    """Interpres predictions on Image (i.e. matrix) data.
    For numerical features, perturb them by sampling from a Normal(0,1) and
    doing the inverse operation of mean-centering and scaling, according to the
    means and stds in the training data. For categorical features, perturb by
    sampling according to the training distribution, and making a binary
    feature that is 1 when the value is the same as the instance being
    interpreted."""

    def __init__(self,
                 kernel_width=.25,
                 kernel=None,
                 verbose=False,
                 feature_selection='auto',
                 random_state=None):
        """Init function.

        Args:
            kernel_width: kernel width for the exponential kernel.
            If None, defaults to sqrt(number of columns) * 0.75.
            kernel: similarity kernel that takes euclidean distances and kernel
                width as input and outputs weights in (0,1). If None, defaults to
                an exponential kernel.
            verbose: if true, print local prediction values from linear model
            feature_selection: feature selection method. can be
                'forward_selection', 'lasso_path', 'none' or 'auto'.
                See function 'einterpret_instance_with_data' in lime_base.py for
                details on what each of the options does.
            random_state: an integer or numpy.RandomState that will be used to
                generate random numbers. If None, the random state will be
                initialized using the internal numpy seed.
        """
        from sklearn.utils import check_random_state
        kernel_width = float(kernel_width)

        if kernel is None:

            def kernel(d, kernel_width):
                return np.sqrt(np.exp(-(d**2) / kernel_width**2))

        kernel_fn = partial(kernel, kernel_width=kernel_width)

        self.random_state = check_random_state(random_state)
        self.feature_selection = feature_selection
        self.base = LimeBase(
            kernel_fn, verbose, random_state=self.random_state)

    def interpret_instance(self,
                           image,
                           classifier_fn,
                           labels=(1, ),
                           hide_color=None,
                           num_features=100000,
                           num_samples=1000,
                           batch_size=10,
                           distance_metric='cosine',
                           model_regressor=None):
        """Generates interpretations for a prediction.

        First, we generate neighborhood data by randomly perturbing features
        from the instance (see __data_inverse). We then learn locally weighted
        linear models on this neighborhood data to interpret each of the classes
        in an interpretable way (see lime_base.py).

        Args:
            image: 3 dimension RGB image. If this is only two dimensional,
                we will assume it's a grayscale image and call gray2rgb.
            classifier_fn: classifier prediction probability function, which
                takes a numpy array and outputs prediction probabilities.  For
                ScikitClassifiers , this is classifier.predict_proba.
            labels: iterable with labels to be interpreted.
            hide_color: TODO
            num_features: maximum number of features present in interpretation
            num_samples: size of the neighborhood to learn the linear model
            batch_size: TODO
            distance_metric: the distance metric to use for weights.
            model_regressor: sklearn regressor to use in interpretation. Defaults
            to Ridge regression in LimeBase. Must have model_regressor.coef_
            and 'sample_weight' as a parameter to model_regressor.fit()

        Returns:
            An ImageIinterpretation object (see lime_image.py) with the corresponding
            interpretations.
        """
        import sklearn
        from skimage.measure import regionprops
        from skimage.segmentation import quickshift
        from skimage.color import gray2rgb
        if len(image.shape) == 2:
            image = gray2rgb(image)

        try:
            segments = quickshift(image, sigma=1)
        except ValueError as e:
            raise e

        self.segments = segments

        fudged_image = image.copy()
        # global_mean = np.mean(image, (0, 1))
        if hide_color is None:
            # if no hide_color, use the mean
            for x in np.unique(segments):
                mx = np.mean(image[segments == x], axis=0)
                fudged_image[segments == x] = mx
        elif hide_color == 'avg_from_neighbor':
            from scipy.spatial.distance import cdist

            n_features = np.unique(segments).shape[0]
            regions = regionprops(segments + 1)
            centroids = np.zeros((n_features, 2))
            for i, x in enumerate(regions):
                centroids[i] = np.array(x.centroid)

            d = cdist(centroids, centroids, 'sqeuclidean')

            for x in np.unique(segments):
                a = [image[segments == i] for i in np.argsort(d[x])[1:6]]
                mx = np.mean(np.concatenate(a), axis=0)
                fudged_image[segments == x] = mx

        else:
            fudged_image[:] = 0

        top = labels

        data, labels = self.data_labels(
            image,
            fudged_image,
            segments,
            classifier_fn,
            num_samples,
            batch_size=batch_size)

        distances = sklearn.metrics.pairwise_distances(
            data, data[0].reshape(1, -1), metric=distance_metric).ravel()

        interpretation_image = ImageInterpretation(image, segments)
        for label in top:
            (interpretation_image.intercept[label],
             interpretation_image.local_weights[label],
             interpretation_image.score, interpretation_image.local_pred
             ) = self.base.interpret_instance_with_data(
                 data,
                 labels,
                 distances,
                 label,
                 num_features,
                 model_regressor=model_regressor,
                 feature_selection=self.feature_selection)
        return interpretation_image

    def data_labels(self,
                    image,
                    fudged_image,
                    segments,
                    classifier_fn,
                    num_samples,
                    batch_size=10):
        """Generates images and predictions in the neighborhood of this image.

        Args:
            image: 3d numpy array, the image
            fudged_image: 3d numpy array, image to replace original image when
                superpixel is turned off
            segments: segmentation of the image
            classifier_fn: function that takes a list of images and returns a
                matrix of prediction probabilities
            num_samples: size of the neighborhood to learn the linear model
            batch_size: classifier_fn will be called on batches of this size.

        Returns:
            A tuple (data, labels), where:
                data: dense num_samples * num_superpixels
                labels: prediction probabilities matrix
        """
        n_features = np.unique(segments).shape[0]
        data = self.random_state.randint(0, 2, num_samples * n_features) \
            .reshape((num_samples, n_features))
        labels = []
        data[0, :] = 1
        imgs = []

        logging.info("Computing LIME.", use_color=True)

        for row in tqdm.tqdm(data):
            temp = copy.deepcopy(image)
            zeros = np.where(row == 0)[0]
            mask = np.zeros(segments.shape).astype(bool)
            for z in zeros:
                mask[segments == z] = True
            temp[mask] = fudged_image[mask]
            imgs.append(temp)
            if len(imgs) == batch_size:
                preds = classifier_fn(np.array(imgs))
                labels.extend(preds)
                imgs = []
        if len(imgs) > 0:
            preds = classifier_fn(np.array(imgs))
            labels.extend(preds)
        return data, np.array(labels)