| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502 |
- """
- Contains abstract functionality for learning locally linear sparse model.
- """
- from __future__ import print_function
- import numpy as np
- import scipy as sp
- import sklearn
- import sklearn.preprocessing
- from skimage.color import gray2rgb
- from sklearn.linear_model import Ridge, lars_path
- from sklearn.utils import check_random_state
- import copy
- from functools import partial
- from skimage.segmentation import quickshift
- from skimage.measure import regionprops
- class LimeBase(object):
- """Class for learning a locally linear sparse model from perturbed data"""
- def __init__(self,
- kernel_fn,
- verbose=False,
- random_state=None):
- """Init function
- Args:
- kernel_fn: function that transforms an array of distances into an
- array of proximity values (floats).
- verbose: if true, print local prediction values from linear model.
- random_state: an integer or numpy.RandomState that will be used to
- generate random numbers. If None, the random state will be
- initialized using the internal numpy seed.
- """
- self.kernel_fn = kernel_fn
- self.verbose = verbose
- self.random_state = check_random_state(random_state)
- @staticmethod
- def generate_lars_path(weighted_data, weighted_labels):
- """Generates the lars path for weighted data.
- Args:
- weighted_data: data that has been weighted by kernel
- weighted_label: labels, weighted by kernel
- Returns:
- (alphas, coefs), both are arrays corresponding to the
- regularization parameter and coefficients, respectively
- """
- x_vector = weighted_data
- alphas, _, coefs = lars_path(x_vector,
- weighted_labels,
- method='lasso',
- verbose=False)
- return alphas, coefs
- def forward_selection(self, data, labels, weights, num_features):
- """Iteratively adds features to the model"""
- clf = Ridge(alpha=0, fit_intercept=True, random_state=self.random_state)
- used_features = []
- for _ in range(min(num_features, data.shape[1])):
- max_ = -100000000
- best = 0
- for feature in range(data.shape[1]):
- if feature in used_features:
- continue
- clf.fit(data[:, used_features + [feature]], labels,
- sample_weight=weights)
- score = clf.score(data[:, used_features + [feature]],
- labels,
- sample_weight=weights)
- if score > max_:
- best = feature
- max_ = score
- used_features.append(best)
- return np.array(used_features)
- def feature_selection(self, data, labels, weights, num_features, method):
- """Selects features for the model. see explain_instance_with_data to
- understand the parameters."""
- if method == 'none':
- return np.array(range(data.shape[1]))
- elif method == 'forward_selection':
- return self.forward_selection(data, labels, weights, num_features)
- elif method == 'highest_weights':
- clf = Ridge(alpha=0.01, fit_intercept=True,
- random_state=self.random_state)
- clf.fit(data, labels, sample_weight=weights)
- coef = clf.coef_
- if sp.sparse.issparse(data):
- coef = sp.sparse.csr_matrix(clf.coef_)
- weighted_data = coef.multiply(data[0])
- # Note: most efficient to slice the data before reversing
- sdata = len(weighted_data.data)
- argsort_data = np.abs(weighted_data.data).argsort()
- # Edge case where data is more sparse than requested number of feature importances
- # In that case, we just pad with zero-valued features
- if sdata < num_features:
- nnz_indexes = argsort_data[::-1]
- indices = weighted_data.indices[nnz_indexes]
- num_to_pad = num_features - sdata
- indices = np.concatenate((indices, np.zeros(num_to_pad, dtype=indices.dtype)))
- indices_set = set(indices)
- pad_counter = 0
- for i in range(data.shape[1]):
- if i not in indices_set:
- indices[pad_counter + sdata] = i
- pad_counter += 1
- if pad_counter >= num_to_pad:
- break
- else:
- nnz_indexes = argsort_data[sdata - num_features:sdata][::-1]
- indices = weighted_data.indices[nnz_indexes]
- return indices
- else:
- weighted_data = coef * data[0]
- feature_weights = sorted(
- zip(range(data.shape[1]), weighted_data),
- key=lambda x: np.abs(x[1]),
- reverse=True)
- return np.array([x[0] for x in feature_weights[:num_features]])
- elif method == 'lasso_path':
- weighted_data = ((data - np.average(data, axis=0, weights=weights))
- * np.sqrt(weights[:, np.newaxis]))
- weighted_labels = ((labels - np.average(labels, weights=weights))
- * np.sqrt(weights))
- nonzero = range(weighted_data.shape[1])
- _, coefs = self.generate_lars_path(weighted_data,
- weighted_labels)
- for i in range(len(coefs.T) - 1, 0, -1):
- nonzero = coefs.T[i].nonzero()[0]
- if len(nonzero) <= num_features:
- break
- used_features = nonzero
- return used_features
- elif method == 'auto':
- if num_features <= 6:
- n_method = 'forward_selection'
- else:
- n_method = 'highest_weights'
- return self.feature_selection(data, labels, weights,
- num_features, n_method)
- def explain_instance_with_data(self,
- neighborhood_data,
- neighborhood_labels,
- distances,
- label,
- num_features,
- feature_selection='auto',
- model_regressor=None):
- """Takes perturbed data, labels and distances, returns explanation.
- Args:
- neighborhood_data: perturbed data, 2d array. first element is
- assumed to be the original data point.
- neighborhood_labels: corresponding perturbed labels. should have as
- many columns as the number of possible labels.
- distances: distances to original data point.
- label: label for which we want an explanation
- num_features: maximum number of features in explanation
- feature_selection: how to select num_features. options are:
- 'forward_selection': iteratively add features to the model.
- This is costly when num_features is high
- 'highest_weights': selects the features that have the highest
- product of absolute weight * original data point when
- learning with all the features
- 'lasso_path': chooses features based on the lasso
- regularization path
- 'none': uses all features, ignores num_features
- 'auto': uses forward_selection if num_features <= 6, and
- 'highest_weights' otherwise.
- model_regressor: sklearn regressor to use in explanation.
- Defaults to Ridge regression if None. Must have
- model_regressor.coef_ and 'sample_weight' as a parameter
- to model_regressor.fit()
- Returns:
- (intercept, exp, score, local_pred):
- intercept is a float.
- exp is a sorted list of tuples, where each tuple (x,y) corresponds
- to the feature id (x) and the local weight (y). The list is sorted
- by decreasing absolute value of y.
- score is the R^2 value of the returned explanation
- local_pred is the prediction of the explanation model on the original instance
- """
- weights = self.kernel_fn(distances)
- labels_column = neighborhood_labels[:, label]
- used_features = self.feature_selection(neighborhood_data,
- labels_column,
- weights,
- num_features,
- feature_selection)
- if model_regressor is None:
- model_regressor = Ridge(alpha=1, fit_intercept=True,
- random_state=self.random_state)
- easy_model = model_regressor
- easy_model.fit(neighborhood_data[:, used_features],
- labels_column, sample_weight=weights)
- prediction_score = easy_model.score(
- neighborhood_data[:, used_features],
- labels_column, sample_weight=weights)
- local_pred = easy_model.predict(neighborhood_data[0, used_features].reshape(1, -1))
- if self.verbose:
- print('Intercept', easy_model.intercept_)
- print('Prediction_local', local_pred,)
- print('Right:', neighborhood_labels[0, label])
- return (easy_model.intercept_,
- sorted(zip(used_features, easy_model.coef_),
- key=lambda x: np.abs(x[1]), reverse=True),
- prediction_score, local_pred)
- class ImageExplanation(object):
- def __init__(self, image, segments):
- """Init function.
- Args:
- image: 3d numpy array
- segments: 2d numpy array, with the output from skimage.segmentation
- """
- self.image = image
- self.segments = segments
- self.intercept = {}
- self.local_exp = {}
- self.local_pred = None
- def get_image_and_mask(self, label, positive_only=True, negative_only=False, hide_rest=False,
- num_features=5, min_weight=0.):
- """Init function.
- Args:
- label: label to explain
- positive_only: if True, only take superpixels that positively contribute to
- the prediction of the label.
- negative_only: if True, only take superpixels that negatively contribute to
- the prediction of the label. If false, and so is positive_only, then both
- negativey and positively contributions will be taken.
- Both can't be True at the same time
- hide_rest: if True, make the non-explanation part of the return
- image gray
- num_features: number of superpixels to include in explanation
- min_weight: minimum weight of the superpixels to include in explanation
- Returns:
- (image, mask), where image is a 3d numpy array and mask is a 2d
- numpy array that can be used with
- skimage.segmentation.mark_boundaries
- """
- if label not in self.local_exp:
- raise KeyError('Label not in explanation')
- if positive_only & negative_only:
- raise ValueError("Positive_only and negative_only cannot be true at the same time.")
- segments = self.segments
- image = self.image
- exp = self.local_exp[label]
- mask = np.zeros(segments.shape, segments.dtype)
- if hide_rest:
- temp = np.zeros(self.image.shape)
- else:
- temp = self.image.copy()
- if positive_only:
- fs = [x[0] for x in exp
- if x[1] > 0 and x[1] > min_weight][:num_features]
- if negative_only:
- fs = [x[0] for x in exp
- if x[1] < 0 and abs(x[1]) > min_weight][:num_features]
- if positive_only or negative_only:
- for f in fs:
- temp[segments == f] = image[segments == f].copy()
- mask[segments == f] = 1
- return temp, mask
- else:
- for f, w in exp[:num_features]:
- if np.abs(w) < min_weight:
- continue
- c = 0 if w < 0 else 1
- mask[segments == f] = -1 if w < 0 else 1
- temp[segments == f] = image[segments == f].copy()
- temp[segments == f, c] = np.max(image)
- return temp, mask
- def get_rendered_image(self, label, min_weight=0.005):
- """
- Args:
- label: label to explain
- min_weight:
- Returns:
- image, is a 3d numpy array
- """
- if label not in self.local_exp:
- raise KeyError('Label not in explanation')
- from matplotlib import cm
- segments = self.segments
- image = self.image
- exp = self.local_exp[label]
- temp = np.zeros_like(image)
- weight_max = abs(exp[0][1])
- exp = [(f, w/weight_max) for f, w in exp]
- exp = sorted(exp, key=lambda x: x[1], reverse=True) # negatives are at last.
- cmaps = cm.get_cmap('Spectral')
- # sigmoid_space = 1 / (1 + np.exp(-np.linspace(-20, 20, len(exp))))
- colors = cmaps(np.linspace(0, 1, len(exp)))
- colors = colors[:, :3]
- for i, (f, w) in enumerate(exp):
- if np.abs(w) < min_weight:
- continue
- temp[segments == f] = image[segments == f].copy()
- temp[segments == f] = colors[i] * 255
- return temp
- class LimeImageExplainer(object):
- """Explains predictions on Image (i.e. matrix) data.
- For numerical features, perturb them by sampling from a Normal(0,1) and
- doing the inverse operation of mean-centering and scaling, according to the
- means and stds in the training data. For categorical features, perturb by
- sampling according to the training distribution, and making a binary
- feature that is 1 when the value is the same as the instance being
- explained."""
- def __init__(self, kernel_width=.25, kernel=None, verbose=False,
- feature_selection='auto', random_state=None):
- """Init function.
- Args:
- kernel_width: kernel width for the exponential kernel.
- If None, defaults to sqrt(number of columns) * 0.75.
- kernel: similarity kernel that takes euclidean distances and kernel
- width as input and outputs weights in (0,1). If None, defaults to
- an exponential kernel.
- verbose: if true, print local prediction values from linear model
- feature_selection: feature selection method. can be
- 'forward_selection', 'lasso_path', 'none' or 'auto'.
- See function 'explain_instance_with_data' in lime_base.py for
- details on what each of the options does.
- random_state: an integer or numpy.RandomState that will be used to
- generate random numbers. If None, the random state will be
- initialized using the internal numpy seed.
- """
- kernel_width = float(kernel_width)
- if kernel is None:
- def kernel(d, kernel_width):
- return np.sqrt(np.exp(-(d ** 2) / kernel_width ** 2))
- kernel_fn = partial(kernel, kernel_width=kernel_width)
- self.random_state = check_random_state(random_state)
- self.feature_selection = feature_selection
- self.base = LimeBase(kernel_fn, verbose, random_state=self.random_state)
- def explain_instance(self, image, classifier_fn, labels=(1,),
- hide_color=None,
- num_features=100000, num_samples=1000,
- batch_size=10,
- distance_metric='cosine',
- model_regressor=None
- ):
- """Generates explanations for a prediction.
- First, we generate neighborhood data by randomly perturbing features
- from the instance (see __data_inverse). We then learn locally weighted
- linear models on this neighborhood data to explain each of the classes
- in an interpretable way (see lime_base.py).
- Args:
- image: 3 dimension RGB image. If this is only two dimensional,
- we will assume it's a grayscale image and call gray2rgb.
- classifier_fn: classifier prediction probability function, which
- takes a numpy array and outputs prediction probabilities. For
- ScikitClassifiers , this is classifier.predict_proba.
- labels: iterable with labels to be explained.
- hide_color: TODO
- num_features: maximum number of features present in explanation
- num_samples: size of the neighborhood to learn the linear model
- batch_size: TODO
- distance_metric: the distance metric to use for weights.
- model_regressor: sklearn regressor to use in explanation. Defaults
- to Ridge regression in LimeBase. Must have model_regressor.coef_
- and 'sample_weight' as a parameter to model_regressor.fit()
- Returns:
- An ImageExplanation object (see lime_image.py) with the corresponding
- explanations.
- """
- if len(image.shape) == 2:
- image = gray2rgb(image)
- try:
- segments = quickshift(image, sigma=1)
- except ValueError as e:
- raise e
- self.segments = segments
- fudged_image = image.copy()
- if hide_color is None:
- # if no hide_color, use the mean
- for x in np.unique(segments):
- mx = np.mean(image[segments == x], axis=0)
- fudged_image[segments == x] = mx
- elif hide_color == 'avg_from_neighbor':
- from scipy.spatial.distance import cdist
- n_features = np.unique(segments).shape[0]
- regions = regionprops(segments + 1)
- centroids = np.zeros((n_features, 2))
- for i, x in enumerate(regions):
- centroids[i] = np.array(x.centroid)
- d = cdist(centroids, centroids, 'sqeuclidean')
- for x in np.unique(segments):
- # print(np.argmin(d[x]))
- a = [image[segments == i] for i in np.argsort(d[x])[1:6]]
- mx = np.mean(np.concatenate(a), axis=0)
- fudged_image[segments == x] = mx
- else:
- fudged_image[:] = 0
- top = labels
- data, labels = self.data_labels(image, fudged_image, segments,
- classifier_fn, num_samples,
- batch_size=batch_size)
- distances = sklearn.metrics.pairwise_distances(
- data,
- data[0].reshape(1, -1),
- metric=distance_metric
- ).ravel()
- ret_exp = ImageExplanation(image, segments)
- for label in top:
- (ret_exp.intercept[label],
- ret_exp.local_exp[label],
- ret_exp.score, ret_exp.local_pred) = self.base.explain_instance_with_data(
- data, labels, distances, label, num_features,
- model_regressor=model_regressor,
- feature_selection=self.feature_selection)
- return ret_exp
- def data_labels(self,
- image,
- fudged_image,
- segments,
- classifier_fn,
- num_samples,
- batch_size=10):
- """Generates images and predictions in the neighborhood of this image.
- Args:
- image: 3d numpy array, the image
- fudged_image: 3d numpy array, image to replace original image when
- superpixel is turned off
- segments: segmentation of the image
- classifier_fn: function that takes a list of images and returns a
- matrix of prediction probabilities
- num_samples: size of the neighborhood to learn the linear model
- batch_size: classifier_fn will be called on batches of this size.
- Returns:
- A tuple (data, labels), where:
- data: dense num_samples * num_superpixels
- labels: prediction probabilities matrix
- """
- n_features = np.unique(segments).shape[0]
- data = self.random_state.randint(0, 2, num_samples * n_features) \
- .reshape((num_samples, n_features))
- labels = []
- data[0, :] = 1
- imgs = []
- for row in data:
- temp = copy.deepcopy(image)
- zeros = np.where(row == 0)[0]
- mask = np.zeros(segments.shape).astype(bool)
- for z in zeros:
- mask[segments == z] = True
- temp[mask] = fudged_image[mask]
- imgs.append(temp)
- if len(imgs) == batch_size:
- preds = classifier_fn(np.array(imgs))
- labels.extend(preds)
- imgs = []
- if len(imgs) > 0:
- preds = classifier_fn(np.array(imgs))
- labels.extend(preds)
- return data, np.array(labels)
|