1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
| import cv2 import os import numpy as np from PIL import Image import torch import torch.nn as nn import torch.nn.functional as F import torchvision import torchvision.transforms as transforms import matplotlib.pyplot as plt from torchvision.models.resnet import resnet18
class GradCAM: def __init__(self, model: nn.Module, target_layer: str, size=(224, 224), mean=None, std=None) -> None: self.model = model self.model.eval() getattr(self.model, target_layer).register_forward_hook(self.__forward_hook) getattr(self.model, target_layer).register_backward_hook(self.__backward_hook)
self.size = size self.origin_size = None self.mean, self.std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] if mean and std: self.mean, self.std = mean, std
self.grads = [] self.fmaps = []
def forward(self, img_arr: np.ndarray, label=None, show=True, write=False): img_input = self.__img_preprocess(img_arr.copy())
output = self.model(img_input) idx = np.argmax(output.cpu().data.numpy())
self.model.zero_grad() loss = self.__compute_loss(output, label)
loss.backward()
grads_val = self.grads[0].cpu().data.numpy().squeeze() fmap = self.fmaps[0].cpu().data.numpy().squeeze() cam = self.__compute_cam(fmap, grads_val) cam_show = cv2.resize(cam, self.origin_size) img_show = img_arr.astype(np.float32) / 255 self.__show_cam_on_image(img_show, cam_show, if_show=show, if_write=write)
self.fmaps.clear() self.grads.clear() def __img_transform(self, img_arr: np.ndarray, transform: torchvision.transforms) -> torch.Tensor: img = img_arr.copy() img = Image.fromarray(np.uint8(img)) img = transform(img).unsqueeze(0) return img
def __img_preprocess(self, img_in: np.ndarray) -> torch.Tensor: self.origin_size = (img_in.shape[1], img_in.shape[0]) img = img_in.copy() img = cv2.resize(img, self.size) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(self.mean, self.std) ]) img_tensor = self.__img_transform(img, transform) return img_tensor
def __backward_hook(self, module, grad_in, grad_out): self.grads.append(grad_out[0].detach()) def __forward_hook(self, module, input, output): self.fmaps.append(output)
def __compute_loss(self, logit, index=None): if not index: index = np.argmax(logit.cpu().data.numpy()) else: index = np.array(index)
index = index[np.newaxis, np.newaxis] index = torch.from_numpy(index) one_hot = torch.zeros(1, 1000).scatter_(1, index, 1) one_hot.requires_grad = True loss = torch.sum(one_hot * logit) return loss def __compute_cam(self, feature_map, grads): """ feature_map: np.array [C, H, W] grads: np.array, [C, H, W] return: np.array, [H, W] """ cam = np.zeros(feature_map.shape[1:], dtype=np.float32) alpha = np.mean(grads, axis=(1, 2)) for k, ak in enumerate(alpha): cam += ak * feature_map[k] cam = np.maximum(cam, 0) cam = cv2.resize(cam, self.size) cam = (cam - np.min(cam)) / np.max(cam) return cam
def __show_cam_on_image(self, img: np.ndarray, mask: np.ndarray, if_show=True, if_write=False): heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET) heatmap = np.float32(heatmap) / 255 cam = heatmap + np.float32(img) cam = cam / np.max(cam) cam = np.uint8(255 * cam) if if_write: cv2.imwrite("camcam.jpg", cam) if if_show: plt.imshow(cam[:, :, ::-1]) plt.show()
img = cv2.imread('test.jpg', 1) net = resnet18(pretrained=True)
grad_cam = GradCAM(net, 'layer4', (224, 224), [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) grad_cam.forward(img, show=True, write=False)
|