美文网首页
Pytorch版本cam图经过resnet50网络

Pytorch版本cam图经过resnet50网络

作者: 黑恶歌王 | 来源:发表于2019-03-06 10:31 被阅读0次

    前言

    cam图可以打印出类似热力图的效果并且把其叠加在原图的效果上,像我们做attention机制时常常想用这个工具来看一下经过不同层时处理的细节是如何的。因为原始工程只有过imagenet预训练的vgg19网络,从issue里看好多问有没有resnet50网络的。想了想还是觉得最后把这个心事了解才算是修成正果,这样不只是嵌套resnet50,其他的甚至自己的网络(特指处理图片的这一块的2d网络)都可以靠自己的方式嵌套进去。

    正文

    原始工程给出的pytorch的版本,非常棒,我们正好也是基于这个框架进行工作,虽说谈不上得心,但是相对应手。原始工程其实只有一个文件是用来跑demo的,就是里面的 grad-cam.py 文件。内容如下:

    import torch
    from torch.autograd import Variable
    from torch.autograd import Function
    from torchvision import models
    from torchvision import utils
    import cv2
    import sys
    import numpy as np
    import argparse
    
    class FeatureExtractor():
        """ Class for extracting activations and 
        registering gradients from targetted intermediate layers """
        def __init__(self, model, target_layers):
            self.model = model
            self.target_layers = target_layers
            self.gradients = []
    
        def save_gradient(self, grad):
            self.gradients.append(grad)
    
        def __call__(self, x):
            outputs = []
            self.gradients = []
            for name, module in self.model._modules.items():
                x = module(x)
                if name in self.target_layers:
                    x.register_hook(self.save_gradient)
                    outputs += [x]
            return outputs, x
    
    class ModelOutputs():
        """ Class for making a forward pass, and getting:
        1. The network output.
        2. Activations from intermeddiate targetted layers.
        3. Gradients from intermeddiate targetted layers. """
        def __init__(self, model, target_layers):
            self.model = model
            self.feature_extractor = FeatureExtractor(self.model.features, target_layers)
    
        def get_gradients(self):
            return self.feature_extractor.gradients
    
        def __call__(self, x):
            target_activations, output  = self.feature_extractor(x)
            output = output.view(output.size(0), -1)
            output = self.model.classifier(output)
            return target_activations, output
    
    def preprocess_image(img):
        means=[0.485, 0.456, 0.406]
        stds=[0.229, 0.224, 0.225]
    
        preprocessed_img = img.copy()[: , :, ::-1]
        for i in range(3):
            preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - means[i]
            preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / stds[i]
        preprocessed_img = \
            np.ascontiguousarray(np.transpose(preprocessed_img, (2, 0, 1)))
        preprocessed_img = torch.from_numpy(preprocessed_img)
        preprocessed_img.unsqueeze_(0)
        input = Variable(preprocessed_img, requires_grad = True)
        return input
    
    def show_cam_on_image(img, mask):
        heatmap = cv2.applyColorMap(np.uint8(255*mask), cv2.COLORMAP_JET)
        heatmap = np.float32(heatmap) / 255
        cam = heatmap + np.float32(img)
        cam = cam / np.max(cam)
        cv2.imwrite("cam.jpg", np.uint8(255 * cam))
    
    class GradCam:
        def __init__(self, model, target_layer_names, use_cuda):
            self.model = model
            self.model.eval()
            self.cuda = use_cuda
            if self.cuda:
                self.model = model.cuda()
    
            self.extractor = ModelOutputs(self.model, target_layer_names)
    
        def forward(self, input):
            return self.model(input) 
    
        def __call__(self, input, index = None):
            if self.cuda:
                features, output = self.extractor(input.cuda())
            else:
                features, output = self.extractor(input)
    
            if index == None:
                index = np.argmax(output.cpu().data.numpy())
    
            one_hot = np.zeros((1, output.size()[-1]), dtype = np.float32)
            one_hot[0][index] = 1
            one_hot = Variable(torch.from_numpy(one_hot), requires_grad = True)
            if self.cuda:
                one_hot = torch.sum(one_hot.cuda() * output)
            else:
                one_hot = torch.sum(one_hot * output)
    
            self.model.features.zero_grad()
            self.model.classifier.zero_grad()
            one_hot.backward(retain_variables=True)
    
            grads_val = self.extractor.get_gradients()[-1].cpu().data.numpy()
    
            target = features[-1]
            target = target.cpu().data.numpy()[0, :]
    
            weights = np.mean(grads_val, axis = (2, 3))[0, :]
            cam = np.zeros(target.shape[1 : ], dtype = np.float32)
    
            for i, w in enumerate(weights):
                cam += w * target[i, :, :]
    
            cam = np.maximum(cam, 0)
            cam = cv2.resize(cam, (224, 224))
            cam = cam - np.min(cam)
            cam = cam / np.max(cam)
            return cam
    
    class GuidedBackpropReLU(Function):
    
        def forward(self, input):
            positive_mask = (input > 0).type_as(input)
            output = torch.addcmul(torch.zeros(input.size()).type_as(input), input, positive_mask)
            self.save_for_backward(input, output)
            return output
    
        def backward(self, grad_output):
            input, output = self.saved_tensors
            grad_input = None
    
            positive_mask_1 = (input > 0).type_as(grad_output)
            positive_mask_2 = (grad_output > 0).type_as(grad_output)
            grad_input = torch.addcmul(torch.zeros(input.size()).type_as(input), torch.addcmul(torch.zeros(input.size()).type_as(input), grad_output, positive_mask_1), positive_mask_2)
    
            return grad_input
    
    class GuidedBackpropReLUModel:
        def __init__(self, model, use_cuda):
            self.model = model
            self.model.eval()
            self.cuda = use_cuda
            if self.cuda:
                self.model = model.cuda()
    
            # replace ReLU with GuidedBackpropReLU
            for idx, module in self.model.features._modules.items():
                if module.__class__.__name__ == 'ReLU':
                    self.model.features._modules[idx] = GuidedBackpropReLU()
    
        def forward(self, input):
            return self.model(input)
    
        def __call__(self, input, index = None):
            if self.cuda:
                output = self.forward(input.cuda())
            else:
                output = self.forward(input)
    
            if index == None:
                index = np.argmax(output.cpu().data.numpy())
    
            one_hot = np.zeros((1, output.size()[-1]), dtype = np.float32)
            one_hot[0][index] = 1
            one_hot = Variable(torch.from_numpy(one_hot), requires_grad = True)
            if self.cuda:
                one_hot = torch.sum(one_hot.cuda() * output)
            else:
                one_hot = torch.sum(one_hot * output)
    
            # self.model.features.zero_grad()
            # self.model.classifier.zero_grad()
            one_hot.backward(retain_variables=True)
    
            output = input.grad.cpu().data.numpy()
            output = output[0,:,:,:]
    
            return output
    
    def get_args():
        parser = argparse.ArgumentParser()
        parser.add_argument('--use-cuda', action='store_true', default=False,
                            help='Use NVIDIA GPU acceleration')
        parser.add_argument('--image-path', type=str, default='./examples/both.png',
                            help='Input image path')
        args = parser.parse_args()
        args.use_cuda = args.use_cuda and torch.cuda.is_available()
        if args.use_cuda:
            print("Using GPU for acceleration")
        else:
            print("Using CPU for computation")
    
        return args
    
    if __name__ == '__main__':
        """ python grad_cam.py <path_to_image>
        1. Loads an image with opencv.
        2. Preprocesses it for VGG19 and converts to a pytorch variable.
        3. Makes a forward pass to find the category index with the highest score,
        and computes intermediate activations.
        Makes the visualization. """
    
        args = get_args()
    
        # Can work with any model, but it assumes that the model has a 
        # feature method, and a classifier method,
        # as in the VGG models in torchvision.
        grad_cam = GradCam(model = models.vgg19(pretrained=True), \
                        target_layer_names = ["35"], use_cuda=args.use_cuda)
    
        img = cv2.imread(args.image_path, 1)
        img = np.float32(cv2.resize(img, (224, 224))) / 255
        input = preprocess_image(img)
    
        # If None, returns the map for the highest scoring category.
        # Otherwise, targets the requested index.
        target_index = None
    
        mask = grad_cam(input, target_index)
    
        show_cam_on_image(img, mask)
    
        gb_model = GuidedBackpropReLUModel(model = models.vgg19(pretrained=True), use_cuda=args.use_cuda)
        gb = gb_model(input, index=target_index)
        utils.save_image(torch.from_numpy(gb), 'gb.jpg')
    
        cam_mask = np.zeros(gb.shape)
        for i in range(0, gb.shape[0]):
            cam_mask[i, :, :] = mask
    
        cam_gb = np.multiply(cam_mask, gb)
    utils.save_image(torch.from_numpy(cam_gb), 'cam_gb.jpg')
    

    可以看到里面用的是vgg19网络,并且是imagenet预训练过得(从torchvision中找到的标准的预训练模型)。但是我们想要做的是用resnet50来看看不同的效果,这里我们就直接先给上我们的微调代码链接,也是同样的文件只不过我们修改成了resnet50预训练模型。下面先把代码贴出来:

    import torch
    from torch.autograd import Variable
    from torch.autograd import Function
    from torchvision import models
    from torchvision import utils
    import cv2
    import sys
    import numpy as np
    import argparse
    resnet = models.resnet50(pretrained=True)
    class FeatureExtractor():
        """ Class for extracting activations and 
        registering gradients from targetted intermediate layers """
        def __init__(self, model, target_layers):
            self.model = model
            self.target_layers = target_layers
            self.gradients = []
    
        def save_gradient(self, grad):
            self.gradients.append(grad)
    
        def __call__(self, x):
            outputs = []
            self.gradients = []
            for name, module in self.model._modules.items():
                x = module(x)
                print('name=',name)
                print('x.size()=',x.size())
                if name in self.target_layers:
                    x.register_hook(self.save_gradient)
                    outputs += [x]
            return outputs, x
    
    class ModelOutputs():
        """ Class for making a forward pass, and getting:
        1. The network output.
        2. Activations from intermeddiate targetted layers.
        3. Gradients from intermeddiate targetted layers. """
        def __init__(self, model, target_layers):
            self.model = model
            self.feature_extractor = FeatureExtractor(self.model, target_layers)
    
        def get_gradients(self):
            return self.feature_extractor.gradients
    
        def __call__(self, x):
            target_activations, output  = self.feature_extractor(x)
            output = output.view(output.size(0), -1)
            print('classfier=',output.size())
            output = resnet.fc(output)
            #print(output.size())
            return target_activations, output
    
    def preprocess_image(img):
        means=[0.485, 0.456, 0.406]
        stds=[0.229, 0.224, 0.225]
    
        preprocessed_img = img.copy()[: , :, ::-1]
        for i in range(3):
            preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - means[i]
            preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / stds[i]
        preprocessed_img = \
            np.ascontiguousarray(np.transpose(preprocessed_img, (2, 0, 1)))
        preprocessed_img = torch.from_numpy(preprocessed_img)
        preprocessed_img.unsqueeze_(0)
        input = Variable(preprocessed_img, requires_grad = True)
        return input
    
    def show_cam_on_image(img, mask):
        heatmap = cv2.applyColorMap(np.uint8(255*mask), cv2.COLORMAP_JET)
        heatmap = np.float32(heatmap) / 255
        cam = heatmap + np.float32(img)
        cam = cam / np.max(cam)
        cv2.imwrite("cam.jpg", np.uint8(255 * cam))
    
    class GradCam:
        def __init__(self, model, target_layer_names, use_cuda):
            self.model = model
            self.model.eval()
            self.cuda = use_cuda
            if self.cuda:
                self.model = model.cuda()
    
            self.extractor = ModelOutputs(self.model, target_layer_names)
    
        def forward(self, input):
            return self.model(input) 
    
        def __call__(self, input, index = None):
            if self.cuda:
                features, output = self.extractor(input.cuda())
            else:
                features, output = self.extractor(input)
    
            if index == None:
                index = np.argmax(output.cpu().data.numpy())
    
            one_hot = np.zeros((1, output.size()[-1]), dtype = np.float32)
            one_hot[0][index] = 1
            one_hot = Variable(torch.from_numpy(one_hot), requires_grad = True)
            if self.cuda:
                one_hot = torch.sum(one_hot.cuda() * output)
            else:
                one_hot = torch.sum(one_hot * output)
    
            self.model.zero_grad()
            self.model.zero_grad()
            one_hot.backward(retain_graph=True)
    
            grads_val = self.extractor.get_gradients()[-1].cpu().data.numpy()
    
            target = features[-1]
            target = target.cpu().data.numpy()[0, :]
    
            weights = np.mean(grads_val, axis = (2, 3))[0, :]
            cam = np.zeros(target.shape[1 : ], dtype = np.float32)
    
            for i, w in enumerate(weights):
                cam += w * target[i, :, :]
    
            cam = np.maximum(cam, 0)
            cam = cv2.resize(cam, (224, 224))
            cam = cam - np.min(cam)
            cam = cam / np.max(cam)
            return cam
    
    class GuidedBackpropReLU(Function):
    
        def forward(self, input):
            positive_mask = (input > 0).type_as(input)
            output = torch.addcmul(torch.zeros(input.size()).type_as(input), input, positive_mask)
            self.save_for_backward(input, output)
            return output
    
        def backward(self, grad_output):
            input, output = self.saved_tensors
            grad_input = None
    
            positive_mask_1 = (input > 0).type_as(grad_output)
            positive_mask_2 = (grad_output > 0).type_as(grad_output)
            grad_input = torch.addcmul(torch.zeros(input.size()).type_as(input), torch.addcmul(torch.zeros(input.size()).type_as(input), grad_output, positive_mask_1), positive_mask_2)
    
            return grad_input
    
    class GuidedBackpropReLUModel:
        def __init__(self, model, use_cuda):
            self.model = resnet
            self.model.eval()
            self.cuda = use_cuda
            if self.cuda:
                self.model = model.cuda()
    
            # replace ReLU with GuidedBackpropReLU
            for idx, module in self.model._modules.items():
                if module.__class__.__name__ == 'ReLU':
                    self.model._modules[idx] = GuidedBackpropReLU()
    
        def forward(self, input):
            return self.model(input)
    
        def __call__(self, input, index = None):
            if self.cuda:
                output = self.forward(input.cuda())
            else:
                output = self.forward(input)
    
            if index == None:
                index = np.argmax(output.cpu().data.numpy())
    
            one_hot = np.zeros((1, output.size()[-1]), dtype = np.float32)
            one_hot[0][index] = 1
            one_hot = Variable(torch.from_numpy(one_hot), requires_grad = True)
            if self.cuda:
                one_hot = torch.sum(one_hot.cuda() * output)
            else:
                one_hot = torch.sum(one_hot * output)
    
            # self.model.features.zero_grad()
            # self.model.classifier.zero_grad()
            one_hot.backward(retain_graph=True)
    
            output = input.grad.cpu().data.numpy()
            output = output[0,:,:,:]
    
            return output
    
    def get_args():
        parser = argparse.ArgumentParser()
        parser.add_argument('--use-cuda', action='store_true', default=False,
                            help='Use NVIDIA GPU acceleration')
        parser.add_argument('--image-path', type=str, default='./examples/naicha.jpg',
                            help='Input image path')
        args = parser.parse_args()
        args.use_cuda = args.use_cuda and torch.cuda.is_available()
        if args.use_cuda:
            print("Using GPU for acceleration")
        else:
            print("Using CPU for computation")
    
        return args
    
    if __name__ == '__main__':
        """ python grad_cam.py <path_to_image>
        1. Loads an image with opencv.
        2. Preprocesses it for VGG19 and converts to a pytorch variable.
        3. Makes a forward pass to find the category index with the highest score,
        and computes intermediate activations.
        Makes the visualization. """
    
        args = get_args()
    
        # Can work with any model, but it assumes that the model has a 
        # feature method, and a classifier method,
        # as in the VGG models in torchvision.
        model = models.resnet50(pretrained=True)
        del model.fc
        print(model)
        #modules = list(resnet.children())[:-1]
        #model = torch.nn.Sequential(*modules)
    
        #print(model)
        grad_cam = GradCam(model , \
                        target_layer_names = ["layer4"], use_cuda=args.use_cuda)
        #print(type(grad_cam))
        img = cv2.imread(args.image_path, 1)
        img = np.float32(cv2.resize(img, (224, 224))) / 255
        input = preprocess_image(img)
        print('input.size()=',input.size())
        # If None, returns the map for the highest scoring category.
        # Otherwise, targets the requested index.
        target_index =None
    
        mask = grad_cam(input, target_index)
        #print(type(mask))
    
        show_cam_on_image(img, mask)
    
        gb_model = GuidedBackpropReLUModel(model = model, use_cuda=args.use_cuda)
        gb = gb_model(input, index=target_index)
        utils.save_image(torch.from_numpy(gb), 'gb.jpg')
    
        cam_mask = np.zeros(gb.shape)
        for i in range(0, gb.shape[0]):
            cam_mask[i, :, :] = mask
    
        cam_gb = np.multiply(cam_mask, gb)
    utils.save_image(torch.from_numpy(cam_gb), 'cam_gb.jpg')
    

    里面有几个不一样的点可以下载下来之后用IDE中比对difference,这样比较快,理解也就会深一些。一些修改的地方我有注释标注了一下,正文中不再多赘述。当然注释都是在github里新更新的那一版了,各位有需要可以自取,我实在是懒。

    效果

    百度百科搜的猫图
    cam图

    总结

    凡事多动动脑子,多查一查文献,尽管再懒,自己不想思考,但是网络时代知识是共享的,就连特摄里这两年都流行借力量用了,我们平凡人借网上神人们的知识一用,又不是什么丢脸的事。一些中间查过的文献我已经找不清楚从哪里来的了,总之自己一搜都会搜到,而且真正厉害的大牛也不会看到这里的,自娱自乐罢了,谢谢。

    相关文章

      网友评论

          本文标题:Pytorch版本cam图经过resnet50网络

          本文链接:https://www.haomeiwen.com/subject/wvgzuqtx.html