200行代码实现CNN卷积结果的可视化

作者: 吃醋不吃辣的雷儿 | 来源:发表于2022-09-23 13:48 被阅读0次

200行代码实现CNN卷积结果的可视化
2019-05-04 Day13
利用Python实现卷积神经网络的可视化
18- OpenCV+TensorFlow 入门人工智能图像处理
CNN
CNN中的卷积操作
keras_CNN
深度学习笔记3：实现一个卷积神经网络
CNN资料
python 神经网络入门

from PIL import Image
import os
import numpy as np
import torch
import torch.nn as nn
import copy
from torch.autograd import Variable
from torchvision import models
import matplotlib.cm as mpl_color_map


def preprocess(pil_im, resize=True):
    """
        Processes image for CNNs

    Args:
        PIL_img (PIL_img): PIL Image or numpy array to process
        resize_im (bool): Resize to 224 or not
    returns:
        im_as_var (torch variable): Variable that contains processed float tensor
    """
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225] # mean and std for RGB channels in ImageNet
    if type(pil_im) != Image.Image:
        pil_im = Image.fromarray(pil_im) # convert input image to Image.image
    if resize:
        pil_im = pil_im.resize((224, 224), Image.ANTIALIAS) # resize image as width 224 and height 224
    image_array = np.float32(pil_im)
    image_array = image_array.transpose(2, 0, 1) # transpose to (D, W, H) form
    
    for channel, _ in enumerate(image_array):
        image_array[channel] /= 255
        image_array[channel] -= mean[channel]
        image_array[channel] /= std[channel] # normalize image array
    
    image_tensor = torch.from_numpy(image_array).float()
    image_tensor.unsqueeze_(0) # add one channel shaped as 1, 3, 224, 224
    image_variable = Variable(image_tensor, requires_grad=True)
    return image_variable


def get_example_params(list_index):
    """
        Gets used variables for almost all visualizations, like the image, model etc.

    Args:
        example_index (int): Image id to use from examples

    returns:
        original_image (numpy arr): Original image read from the file
        prep_img (numpy_arr): Processed image
        file_name_to_export (string): File name to export the visualizations
        pretrained_model(Pytorch model): Model to use for the operations
    """
    examples = ['../input_images/cat10.png', '../input_images/cat134.png', '../input_images/dog10014.png', '../input_images/panda1.png', '../input_images/tiger1.png']
    img_path = examples[list_index]
    file_name_to_export = img_path[img_path.rfind('/')+1:img_path.rfind('.')]
    original_image = Image.open(img_path).convert('RGB') # open as RGB format
    prep_img = preprocess(original_image)
    pretrained_model = models.alexnet(pretrained = True)
    return (original_image, prep_img, file_name_to_export, pretrained_model)


def format_np_output(np_arr):
    """
        This is a (kind of) bandaid fix to streamline saving procedure.
        It converts all the outputs to the same format which is 3xWxH with using sucecssive if clauses.
        
    Args:
        im_as_arr (Numpy array): Matrix of shape 1xWxH or WxH or 3xWxH
    """
    if len(np_arr.shape) == 2:
        np_arr = np.expand_dims(np_arr, axis=0) # case 1: append one dimension
    if np_arr.shape[0] == 1:
        np_arr = np.repeat(np_arr, 3, axis=0) # case 2: 1xWxH --> 3xWxH
    if np_arr.shape[0] == 3:
        np_arr = np_arr.transpose(1, 2, 0) # case 3: WxHx3
    if np.max(np_arr) <= 1:
        np_arr = (np_arr * 255).astype(np.uint8) # case 4: if normalized then x255
    return np_arr


def save_img(im_to_save, save_path):
    """
        Saves a numpy matrix or PIL image as an image
        
    Args:
        im_as_arr (Numpy array): Matrix of shape DxWxH
        path (str): Path to the image
    """
    if isinstance(im_to_save, np.ndarray):
        im_to_save = format_np_output(im_to_save)
        im_to_save = Image.fromarray(im_to_save)
    im_to_save.save(save_path)
    
    
def apply_colormap_to_image(origin_img, activation_map, colormap_type):
    """
        Apply heatmap on image
        
    Args:
        org_img (PIL img): Original image
        activation_map (numpy arr): Activation map (grayscale) 0-255
        colormap_name (str): Name of the colormap
    """
    color_map = mpl_color_map.get_cmap(colormap_type) # get colormap of hsv format
    no_trans_heatmap = color_map(activation_map)
    
    heatmap = copy.deepcopy(no_trans_heatmap)
    heatmap[:, :, 3] = 0.4 # change alpha
    heatmap = Image.fromarray((heatmap * 255).astype(np.uint8)) # heatmap image
    no_trans_heatmap = Image.fromarray((no_trans_heatmap*255).astype(np.uint8)) # no_trans_heatmap image
    
    heatmap_on_image = Image.new("RGBA", origin_img.size)
    heatmap_on_image = Image.alpha_composite(heatmap_on_image, origin_img.convert("RGBA"))
    heatmap_on_image = Image.alpha_composite(heatmap_on_image, heatmap) # heatmap + original image
    return no_trans_heatmap, heatmap_on_image
    
    
def save_class_activation_images(origin_img, activation_map, file_name):
    """
        Save cam activation map and activation map on the original image

    Args:
        org_img (PIL img): Original image
        activation_map (numpy arr): Activation map (grayscale) 0-255
        file_name (str): File name of the exported image
    """
    if not os.path.exists("../results"):
        os.makedirs("../results")
    heatmap, heatmap_on_image = apply_colormap_to_image(origin_img, activation_map, "hsv")
    heatmap_path = os.path.join("../results", file_name + "heatmap.png")
    save_img(heatmap, heatmap_path)
    heatmap_on_image_path = os.path.join("../results", file_name + "heatmap_on_image.png")
    save_img(heatmap_on_image, heatmap_on_image_path)
    activation_path = os.path.join("../results", file_name + "activation_map.png")
    save_img(activation_map, activation_path)


class Camextractor():
    """
        Class activation map extractor: to extract the feature at target layer
    """
    
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = int(target_layer)
        self.gradient = None
        
    def save_gradient(self, grad):
        self.gradient = grad
        
    def conv_output(self, x):
        # forward pass and save conv result at target layer
        conv_out = None
        for layer_index, layer in self.model.features._modules.items():
            print("layer_index:", layer_index, "layer:", layer)
            x = layer(x) # forward for layer at layer_index
            if int(layer_index) == self.target_layer:
                x.register_hook(self.save_gradient) # register hook and save gradients
                conv_out = x
        return conv_out, x
    
    def forward_pass(self, x):
        # forward pass for the whole model
        conv_out, x = self.conv_output(x)
        x = x.view(x.size(0), -1) # flatten
        x = self.model.classifier(x) # classifier and if softmax added behind, then output probability of each class
        return conv_out, x
    
class Layercam():
    """
        Produces class activation map using LayerCam method
    """
    
    def __init__(self, model, target_layer):
        
        self.model = model
        self.model.eval() # evaluation patten, not to activate BatchNorm and Dropout
        self.target_layer = int(target_layer)
        self.extractor = Camextractor(self.model, self.target_layer)
        
    def generate_cam(self, input_image):
        
        conv_out, model_out = self.extractor.forward_pass(input_image) # forward pass and save conv result at target layer
        target_class = np.argmax(model_out.data.numpy()) # classify and get the result with maximum probability
        one_hot_out = torch.FloatTensor(1, model_out.size()[-1]).zero_()
        one_hot_out[0][target_class] = 1 # target for back propagation
        self.model.features.zero_grad()
        self.model.classifier.zero_grad() # zero gradient
        model_out.backward(gradient = one_hot_out, retain_graph = True)
        target_out = conv_out.data.numpy()[0] # target layer output
        weight = self.extractor.gradient.data.numpy()[0] # weight for gradient
        weight[weight < 0] = 0 # relu
        cam = np.sum(weight * target_out, axis=0) # element multiply between weight and target layer output, then sum
        cam = (cam - np.min(cam)) / (np.max(cam) - np.min(cam)) # normalize cam to [0, 1]
        cam = np.uint8(cam * 255) # [0, 255]
        cam = np.uint8(Image.fromarray(cam).resize((input_image.shape[2], input_image.shape[3]), Image.ANTIALIAS)) / 255
        return cam
    
    
if __name__ == "__main__":
    
    target_example = 4  # Tiger '../input_images/tiger1.png'
    (original_image, prep_img, file_name_to_export, pretrained_model) = get_example_params(target_example)
    layercam = Layercam(pretrained_model, target_layer=9)
    cam = layercam.generate_cam(prep_img)
    save_class_activation_images(original_image, cam, file_name_to_export)
    print('Layer cam completed')

tiger1heatmap_on_image.png

tiger1heatmap.png

tiger1activation_map.png

tiger1.png
图一到图四分别为：热力图+原图、热力图、cam、原图。
这里推荐：
https://github.com/utkuozbulak/pytorch-cnn-visualizations
本篇是根据该github上的layercam方法魔改的，也就是把常用的函数封装到了一个文件里，并且不需要对target class的预测，而是根据输入图片自行调用训练好的alexnet进行预测，取预测概率最大的类别作为输出，而且可以随意调用本地的图片进行预测，该本地图片最好来自于ImageNet且resize为224x224的。imageresize的代码很简单，调用Image库几行代码即可此处不再粘贴。

200行代码实现CNN卷积结果的可视化
图一到图四分别为：热力图+原图、热力图、cam、原图。这里推荐：本篇是根据该github上的layercam方...
2019-05-04 Day13
Day13 CNN的实现&CNN可视化&具有代表性的CNN 7.5 CNN的实现按照Convolution ->...
利用Python实现卷积神经网络的可视化
在本文中，将探讨如何可视化卷积神经网络（CNN），该网络在计算机视觉中使用最为广泛。首先了解CNN模型可视化的重要...
18- OpenCV+TensorFlow 入门人工智能图像处理
cnn卷积神经网络实现手写数字识别卷积层 & 池化层实现 padding参数决定卷积核是否可以停留边缘。全连接...
CNN
参考：CNN卷积神经网络原理讲解+图片识别应用（附源码）卷积神经网络 – CNN深入学习卷积神经网络（CNN）的原...
CNN中的卷积操作
目录： 1.CNN中的卷积操作直接卷积法通用矩阵乘法GEMM 2.手动实现Conv2d 一、卷积神经网络中的卷积操...
keras_CNN
本文主要讲CNN（Convolutional Neural Networks）卷积神经网络在 keras 上的代码...
深度学习笔记3：实现一个卷积神经网络
一、卷积神经网络(CNN) 卷积神经网络（ConvolutionalNeural Network,CNN）是人工神...
CNN资料
卷积种类CNN公式推导CNN公式说明
python 神经网络入门
Python 徒手实现卷积神经网络 CNN - 知乎 (zhihu.com)[https://zhuanlan....