interpretation_algorithms.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. #Licensed under the Apache License, Version 2.0 (the "License");
  4. #you may not use this file except in compliance with the License.
  5. #You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. #Unless required by applicable law or agreed to in writing, software
  10. #distributed under the License is distributed on an "AS IS" BASIS,
  11. #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. #See the License for the specific language governing permissions and
  13. #limitations under the License.
  14. import os
  15. import os.path as osp
  16. import numpy as np
  17. import time
  18. from . import lime_base
  19. from ._session_preparation import paddle_get_fc_weights, compute_features_for_kmeans, gen_user_home
  20. from .normlime_base import combine_normlime_and_lime, get_feature_for_kmeans, load_kmeans_model
  21. from paddlex.interpret.as_data_reader.readers import read_image
  22. import cv2
  23. class CAM(object):
  24. def __init__(self, predict_fn, label_names):
  25. """
  26. Args:
  27. predict_fn: input: images_show [N, H, W, 3], RGB range(0, 255)
  28. output: [
  29. logits [N, num_classes],
  30. feature map before global average pooling [N, num_channels, h_, w_]
  31. ]
  32. """
  33. self.predict_fn = predict_fn
  34. self.label_names = label_names
  35. def preparation_cam(self, data_):
  36. image_show = read_image(data_)
  37. result = self.predict_fn(image_show)
  38. logit = result[0][0]
  39. if abs(np.sum(logit) - 1.0) > 1e-4:
  40. # softmax
  41. logit = logit - np.max(logit)
  42. exp_result = np.exp(logit)
  43. probability = exp_result / np.sum(exp_result)
  44. else:
  45. probability = logit
  46. # only interpret top 1
  47. pred_label = np.argsort(probability)
  48. pred_label = pred_label[-1:]
  49. self.predicted_label = pred_label[0]
  50. self.predicted_probability = probability[pred_label[0]]
  51. self.image = image_show[0]
  52. self.labels = pred_label
  53. fc_weights = paddle_get_fc_weights()
  54. feature_maps = result[1]
  55. l = pred_label[0]
  56. ln = l
  57. if self.label_names is not None:
  58. ln = self.label_names[l]
  59. print(f'predicted result: {ln} with probability {probability[pred_label[0]]:.3f}')
  60. return feature_maps, fc_weights
  61. def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None):
  62. feature_maps, fc_weights = self.preparation_cam(data_)
  63. cam = get_cam(self.image, feature_maps, fc_weights, self.predicted_label)
  64. if visualization or save_to_disk:
  65. import matplotlib.pyplot as plt
  66. from skimage.segmentation import mark_boundaries
  67. l = self.labels[0]
  68. ln = l
  69. if self.label_names is not None:
  70. ln = self.label_names[l]
  71. psize = 5
  72. nrows = 1
  73. ncols = 2
  74. plt.close()
  75. f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows))
  76. for ax in axes.ravel():
  77. ax.axis("off")
  78. axes = axes.ravel()
  79. axes[0].imshow(self.image)
  80. axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}")
  81. axes[1].imshow(cam)
  82. axes[1].set_title("CAM")
  83. if save_to_disk and save_outdir is not None:
  84. os.makedirs(save_outdir, exist_ok=True)
  85. save_fig(data_, save_outdir, 'cam')
  86. if visualization:
  87. plt.show()
  88. return
  89. class LIME(object):
  90. def __init__(self, predict_fn, label_names, num_samples=3000, batch_size=50):
  91. """
  92. LIME wrapper. See lime_base.py for the detailed LIME implementation.
  93. Args:
  94. predict_fn: from image [N, H, W, 3] to logits [N, num_classes], this is necessary for computing LIME.
  95. num_samples: the number of samples that LIME takes for fitting.
  96. batch_size: batch size for model inference each time.
  97. """
  98. self.num_samples = num_samples
  99. self.batch_size = batch_size
  100. self.predict_fn = predict_fn
  101. self.labels = None
  102. self.image = None
  103. self.lime_interpreter = None
  104. self.label_names = label_names
  105. def preparation_lime(self, data_):
  106. image_show = read_image(data_)
  107. result = self.predict_fn(image_show)
  108. result = result[0] # only one image here.
  109. if abs(np.sum(result) - 1.0) > 1e-4:
  110. # softmax
  111. result = result - np.max(result)
  112. exp_result = np.exp(result)
  113. probability = exp_result / np.sum(exp_result)
  114. else:
  115. probability = result
  116. # only interpret top 1
  117. pred_label = np.argsort(probability)
  118. pred_label = pred_label[-1:]
  119. self.predicted_label = pred_label[0]
  120. self.predicted_probability = probability[pred_label[0]]
  121. self.image = image_show[0]
  122. self.labels = pred_label
  123. l = pred_label[0]
  124. ln = l
  125. if self.label_names is not None:
  126. ln = self.label_names[l]
  127. print(f'predicted result: {ln} with probability {probability[pred_label[0]]:.3f}')
  128. end = time.time()
  129. algo = lime_base.LimeImageInterpreter()
  130. interpreter = algo.interpret_instance(self.image, self.predict_fn, self.labels, 0,
  131. num_samples=self.num_samples, batch_size=self.batch_size)
  132. self.lime_interpreter = interpreter
  133. print('lime time: ', time.time() - end, 's.')
  134. def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None):
  135. if self.lime_interpreter is None:
  136. self.preparation_lime(data_)
  137. if visualization or save_to_disk:
  138. import matplotlib.pyplot as plt
  139. from skimage.segmentation import mark_boundaries
  140. l = self.labels[0]
  141. ln = l
  142. if self.label_names is not None:
  143. ln = self.label_names[l]
  144. psize = 5
  145. nrows = 2
  146. weights_choices = [0.6, 0.7, 0.75, 0.8, 0.85]
  147. ncols = len(weights_choices)
  148. plt.close()
  149. f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows))
  150. for ax in axes.ravel():
  151. ax.axis("off")
  152. axes = axes.ravel()
  153. axes[0].imshow(self.image)
  154. axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}")
  155. axes[1].imshow(mark_boundaries(self.image, self.lime_interpreter.segments))
  156. axes[1].set_title("superpixel segmentation")
  157. # LIME visualization
  158. for i, w in enumerate(weights_choices):
  159. num_to_show = auto_choose_num_features_to_show(self.lime_interpreter, l, w)
  160. temp, mask = self.lime_interpreter.get_image_and_mask(
  161. l, positive_only=False, hide_rest=False, num_features=num_to_show
  162. )
  163. axes[ncols + i].imshow(mark_boundaries(temp, mask))
  164. axes[ncols + i].set_title(f"label {ln}, first {num_to_show} superpixels")
  165. if save_to_disk and save_outdir is not None:
  166. os.makedirs(save_outdir, exist_ok=True)
  167. save_fig(data_, save_outdir, 'lime', self.num_samples)
  168. if visualization:
  169. plt.show()
  170. return
  171. class NormLIME(object):
  172. def __init__(self, predict_fn, label_names, num_samples=3000, batch_size=50,
  173. kmeans_model_for_normlime=None, normlime_weights=None):
  174. root_path = gen_user_home()
  175. root_path = osp.join(root_path, '.paddlex')
  176. h_pre_models = osp.join(root_path, "pre_models")
  177. if not osp.exists(h_pre_models):
  178. if not osp.exists(root_path):
  179. os.makedirs(root_path)
  180. url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz"
  181. pdx.utils.download_and_decompress(url, path=root_path)
  182. h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl")
  183. if kmeans_model_for_normlime is None:
  184. try:
  185. self.kmeans_model = load_kmeans_model(h_pre_models_kmeans)
  186. except:
  187. raise ValueError("NormLIME needs the KMeans model, where we provided a default one in "
  188. "pre_models/kmeans_model.pkl.")
  189. else:
  190. print("Warning: It is *strongly* suggested to use the default KMeans model in pre_models/kmeans_model.pkl. "
  191. "Use another one will change the final result.")
  192. self.kmeans_model = load_kmeans_model(kmeans_model_for_normlime)
  193. self.num_samples = num_samples
  194. self.batch_size = batch_size
  195. try:
  196. self.normlime_weights = np.load(normlime_weights, allow_pickle=True).item()
  197. except:
  198. self.normlime_weights = None
  199. print("Warning: not find the correct precomputed Normlime result.")
  200. self.predict_fn = predict_fn
  201. self.labels = None
  202. self.image = None
  203. self.label_names = label_names
  204. def predict_cluster_labels(self, feature_map, segments):
  205. X = get_feature_for_kmeans(feature_map, segments)
  206. try:
  207. cluster_labels = self.kmeans_model.predict(X)
  208. except AttributeError:
  209. from sklearn.metrics import pairwise_distances_argmin_min
  210. cluster_labels, _ = pairwise_distances_argmin_min(X, self.kmeans_model.cluster_centers_)
  211. return cluster_labels
  212. def predict_using_normlime_weights(self, pred_labels, predicted_cluster_labels):
  213. # global weights
  214. g_weights = {y: [] for y in pred_labels}
  215. for y in pred_labels:
  216. cluster_weights_y = self.normlime_weights.get(y, {})
  217. g_weights[y] = [
  218. (i, cluster_weights_y.get(k, 0.0)) for i, k in enumerate(predicted_cluster_labels)
  219. ]
  220. g_weights[y] = sorted(g_weights[y],
  221. key=lambda x: np.abs(x[1]), reverse=True)
  222. return g_weights
  223. def preparation_normlime(self, data_):
  224. self._lime = LIME(
  225. self.predict_fn,
  226. self.label_names,
  227. self.num_samples,
  228. self.batch_size
  229. )
  230. self._lime.preparation_lime(data_)
  231. image_show = read_image(data_)
  232. self.predicted_label = self._lime.predicted_label
  233. self.predicted_probability = self._lime.predicted_probability
  234. self.image = image_show[0]
  235. self.labels = self._lime.labels
  236. # print(f'predicted result: {self.predicted_label} with probability {self.predicted_probability: .3f}')
  237. print('performing NormLIME operations ...')
  238. cluster_labels = self.predict_cluster_labels(
  239. compute_features_for_kmeans(image_show).transpose((1, 2, 0)), self._lime.lime_interpreter.segments
  240. )
  241. g_weights = self.predict_using_normlime_weights(self.labels, cluster_labels)
  242. return g_weights
  243. def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None):
  244. if self.normlime_weights is None:
  245. raise ValueError("Not find the correct precomputed NormLIME result. \n"
  246. "\t Try to call compute_normlime_weights() first or load the correct path.")
  247. g_weights = self.preparation_normlime(data_)
  248. lime_weights = self._lime.lime_interpreter.local_weights
  249. if visualization or save_to_disk:
  250. import matplotlib.pyplot as plt
  251. from skimage.segmentation import mark_boundaries
  252. l = self.labels[0]
  253. ln = l
  254. if self.label_names is not None:
  255. ln = self.label_names[l]
  256. psize = 5
  257. nrows = 4
  258. weights_choices = [0.6, 0.7, 0.75, 0.8, 0.85]
  259. nums_to_show = []
  260. ncols = len(weights_choices)
  261. plt.close()
  262. f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows))
  263. for ax in axes.ravel():
  264. ax.axis("off")
  265. axes = axes.ravel()
  266. axes[0].imshow(self.image)
  267. axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}")
  268. axes[1].imshow(mark_boundaries(self.image, self._lime.lime_interpreter.segments))
  269. axes[1].set_title("superpixel segmentation")
  270. # LIME visualization
  271. for i, w in enumerate(weights_choices):
  272. num_to_show = auto_choose_num_features_to_show(self._lime.lime_interpreter, l, w)
  273. nums_to_show.append(num_to_show)
  274. temp, mask = self._lime.lime_interpreter.get_image_and_mask(
  275. l, positive_only=False, hide_rest=False, num_features=num_to_show
  276. )
  277. axes[ncols + i].imshow(mark_boundaries(temp, mask))
  278. axes[ncols + i].set_title(f"LIME: first {num_to_show} superpixels")
  279. # NormLIME visualization
  280. self._lime.lime_interpreter.local_weights = g_weights
  281. for i, num_to_show in enumerate(nums_to_show):
  282. temp, mask = self._lime.lime_interpreter.get_image_and_mask(
  283. l, positive_only=False, hide_rest=False, num_features=num_to_show
  284. )
  285. axes[ncols * 2 + i].imshow(mark_boundaries(temp, mask))
  286. axes[ncols * 2 + i].set_title(f"NormLIME: first {num_to_show} superpixels")
  287. # NormLIME*LIME visualization
  288. combined_weights = combine_normlime_and_lime(lime_weights, g_weights)
  289. self._lime.lime_interpreter.local_weights = combined_weights
  290. for i, num_to_show in enumerate(nums_to_show):
  291. temp, mask = self._lime.lime_interpreter.get_image_and_mask(
  292. l, positive_only=False, hide_rest=False, num_features=num_to_show
  293. )
  294. axes[ncols * 3 + i].imshow(mark_boundaries(temp, mask))
  295. axes[ncols * 3 + i].set_title(f"Combined: first {num_to_show} superpixels")
  296. self._lime.lime_interpreter.local_weights = lime_weights
  297. if save_to_disk and save_outdir is not None:
  298. os.makedirs(save_outdir, exist_ok=True)
  299. save_fig(data_, save_outdir, 'normlime', self.num_samples)
  300. if visualization:
  301. plt.show()
  302. def auto_choose_num_features_to_show(lime_interpreter, label, percentage_to_show):
  303. segments = lime_interpreter.segments
  304. lime_weights = lime_interpreter.local_weights[label]
  305. num_pixels_threshold_in_a_sp = segments.shape[0] * segments.shape[1] // len(np.unique(segments)) // 8
  306. # l1 norm with filtered weights.
  307. used_weights = [(tuple_w[0], tuple_w[1]) for i, tuple_w in enumerate(lime_weights) if tuple_w[1] > 0]
  308. norm = np.sum([tuple_w[1] for i, tuple_w in enumerate(used_weights)])
  309. normalized_weights = [(tuple_w[0], tuple_w[1] / norm) for i, tuple_w in enumerate(lime_weights)]
  310. a = 0.0
  311. n = 0
  312. for i, tuple_w in enumerate(normalized_weights):
  313. if tuple_w[1] < 0:
  314. continue
  315. if len(np.where(segments == tuple_w[0])[0]) < num_pixels_threshold_in_a_sp:
  316. continue
  317. a += tuple_w[1]
  318. if a > percentage_to_show:
  319. n = i + 1
  320. break
  321. if percentage_to_show <= 0.0:
  322. return 5
  323. if n == 0:
  324. return auto_choose_num_features_to_show(lime_interpreter, label, percentage_to_show-0.1)
  325. return n
  326. def get_cam(image_show, feature_maps, fc_weights, label_index, cam_min=None, cam_max=None):
  327. _, nc, h, w = feature_maps.shape
  328. cam = feature_maps * fc_weights[:, label_index].reshape(1, nc, 1, 1)
  329. cam = cam.sum((0, 1))
  330. if cam_min is None:
  331. cam_min = np.min(cam)
  332. if cam_max is None:
  333. cam_max = np.max(cam)
  334. cam = cam - cam_min
  335. cam = cam / cam_max
  336. cam = np.uint8(255 * cam)
  337. cam_img = cv2.resize(cam, image_show.shape[0:2], interpolation=cv2.INTER_LINEAR)
  338. heatmap = cv2.applyColorMap(np.uint8(255 * cam_img), cv2.COLORMAP_JET)
  339. heatmap = np.float32(heatmap)
  340. cam = heatmap + np.float32(image_show)
  341. cam = cam / np.max(cam)
  342. return cam
  343. def save_fig(data_, save_outdir, algorithm_name, num_samples=3000):
  344. import matplotlib.pyplot as plt
  345. if isinstance(data_, str):
  346. if algorithm_name == 'cam':
  347. f_out = f"{algorithm_name}_{data_.split('/')[-1]}.png"
  348. else:
  349. f_out = f"{algorithm_name}_{data_.split('/')[-1]}_s{num_samples}.png"
  350. plt.savefig(
  351. os.path.join(save_outdir, f_out)
  352. )
  353. else:
  354. n = 0
  355. if algorithm_name == 'cam':
  356. f_out = f'cam-{n}.png'
  357. else:
  358. f_out = f'{algorithm_name}_s{num_samples}-{n}.png'
  359. while os.path.exists(
  360. os.path.join(save_outdir, f_out)
  361. ):
  362. n += 1
  363. if algorithm_name == 'cam':
  364. f_out = f'cam-{n}.png'
  365. else:
  366. f_out = f'{algorithm_name}_s{num_samples}-{n}.png'
  367. continue
  368. plt.savefig(
  369. os.path.join(
  370. save_outdir, f_out
  371. )
  372. )
  373. print('The image of intrepretation result save in {}'.format(os.path.join(
  374. save_outdir, f_out
  375. )))