Pytorch之图像分类（多分类，Mutli-Class Ima

作者: 深思海数_willschang | 来源:发表于2021-09-03 08:27 被阅读0次

Pytorch之图像分类（多分类，Mutli-Class Ima
pytorch之图像分类
Pytorch图像分类
Pytorch实战-图像分类
使用pytorch深度学习框架实现mnist数据集的图像分类
迁移学习_pytorch简单实战
pytorch中的损失函数
图像分类
使用PyTorch建立图像分类模型
pytorch 图像分类(CIFAR10)

示例数据集：STL-10数据集，pytorch的torchviso包里有提供该数据。

引入包

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.pylab as plab
from PIL import Image, ImageDraw
import numpy as np
import pandas as pd
import os
import copy
import collections
from sklearn.model_selection import StratifiedShuffleSplit

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split, Subset
import torchvision.transforms as transforms
from torchvision import models,utils, datasets
import torch.nn.functional as F
from torch import optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchsummary import summary

# CPU or GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# dataloader里的多进程用到num_workers
workers = 0 if os.name=='nt' else 4

一些中间辅助函数

# 显示图片
def img_show(img, y=None, color=True, title='default'):
    plt.figure(figsize=(10,10))
    npimg = img.numpy()
    npimg_tr = np.transpose(npimg, (1, 2, 0))
    plt.imshow(npimg_tr)
    if y is not None:
        plt.title(f'{title} label: {str(y)}')
    return True


# 用torch.utils.make_grid构建一组图片
def make_grid_image(ori_ds, grid_size=4):
    grid_size = grid_size
    rnd_inds = np.random.randint(0,len(ori_ds),grid_size)
    print("image indices:", rnd_inds)

    x_grid=[ori_ds[i][0] for i in rnd_inds]
    y_grid=[ori_ds[i][1] for i in rnd_inds]

    x_grid = utils.make_grid(x_grid, nrow=grid_size, padding=2)
    print(x_grid.shape)
    return x_grid, y_grid


# 计算训练数据各通道均值与方差
def caculate_channels_mean_std(cal_ds):
    # RGB mean and std 
    meanRGB=[np.mean(x.numpy(),axis=(1,2)) for x,_ in train_ds]
    stdRGB=[np.std(x.numpy(),axis=(1,2)) for x,_ in train_ds]

    meanR=np.mean([m[0] for m in meanRGB])
    meanG=np.mean([m[1] for m in meanRGB])
    meanB=np.mean([m[2] for m in meanRGB])

    stdR=np.mean([s[0] for s in stdRGB])
    stdG=np.mean([s[1] for s in stdRGB])
    stdB=np.mean([s[2] for s in stdRGB])
    
    mean_rgb = [meanR,meanG,meanB]
    std_rgb = [stdR,stdG,stdB]

    print(f'each channel mean : {mean_rgb}')
    print(f'each channel std : {std_rgb}')
    return mean_rgb, std_rgb


# 获取学习率方法
def get_lr(opt):
    for param_group in opt.param_groups:
        return param_group['lr']

# 定义几组中间函数

# 预测正确数（与真实值比较）
def metrics_batch(output, target):
    # 取得预测输出类别
    pred = output.argmax(dim=1, keepdim=True)
    
    # 预测值与真实比较
    corrects = pred.eq(target.view_as(pred)).sum().item()
    return corrects

# 每批次迭代的损失计算方法
def loss_batch(loss_func, output, target, opt=None):
    
    # 取得损失值
    loss = loss_func(output, target)
    
    # 取得预测正确个数
    metric_b = metrics_batch(output,target)
    
    if opt is not None:
        opt.zero_grad()
        loss.backward()
        opt.step()

    return loss.item(), metric_b

# 定义每轮次损失计算 epoch
def loss_epoch(model,loss_func,dataset_dl,sanity_check=False,opt=None):
    running_loss = 0.0
    running_metric = 0.0
    len_data = len(dataset_dl.dataset)

    for xb, yb in dataset_dl:
        xb = xb.to(device)
        yb = yb.to(device)
        
        output=model(xb)
        # 调用每批次损失计算
        loss_b,metric_b=loss_batch(loss_func, output, yb, opt)
        
        # 更新损失值
        running_loss += loss_b
        
        # 叠加预测正确数
        if metric_b is not None:
            running_metric += metric_b

        # 在可用性检测条件下，跳出循环，即只循环一次batch
        if sanity_check is True:
            break
    
    # 计算损失平均值
    loss = running_loss / float(len_data)
    
    # 计算正确值平均
    metric = running_metric / float(len_data)
    
    return loss, metric


# 画出损失值与正确率
def show_loss_acc(num_epochs, loss_hist, metric_hist):
    # 损失值
    plt.title("Train-Val Loss")
    plt.plot(range(1,num_epochs+1),loss_hist["train"],label="train")
    plt.plot(range(1,num_epochs+1),loss_hist["val"],label="val")
    plt.ylabel("Loss")
    plt.xlabel("Training Epochs")
    plt.legend()
    plt.show()

    # 准确率
    plt.title("Train-Val Accuracy")
    plt.plot(range(1,num_epochs+1),metric_hist["train"],label="train")
    plt.plot(range(1,num_epochs+1),metric_hist["val"],label="val")
    plt.ylabel("Accuracy")
    plt.xlabel("Training Epochs")
    plt.legend()
    plt.show()

数据加载与初探

# 实例dataset
# 数据存储地址
path2data = "./data/multi_class/"

if not os.path.exists(path2data):
    os.mkdir(path2data)
    
# 定义数据转换器
data_transformer = transforms.Compose([transforms.ToTensor()])
    
# 加载数据
train_ds = datasets.STL10(path2data, split='train', download=True, transform=data_transformer)
test0_ds=datasets.STL10(path2data, split='test', download=True, transform=data_transformer)

# 查看数据形状
print(train_ds.data.shape)
print(test0_ds.data.shape)

# 查看各类别数据量
y_train = [y for _, y in train_ds]
counter_train = collections.Counter(y_train)
print(counter_train)
print(train_ds.classes)

# 切分测试集数据为验证集+测试集
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)
indices = list(range(len(test0_ds)))
y_test0 = [y for _,y in test0_ds]
for test_index, val_index in sss.split(indices, y_test0):
    print("test:", test_index, "\nval:", val_index)
    print(len(val_index),len(test_index))
    
val_ds = Subset(test0_ds, val_index)
test_ds = Subset(test0_ds, test_index)
"""
Files already downloaded and verified
Files already downloaded and verified
(5000, 3, 96, 96)
(8000, 3, 96, 96)
Counter({1: 500, 5: 500, 6: 500, 3: 500, 9: 500, 7: 500, 4: 500, 8: 500, 0: 500, 2: 500})
['airplane', 'bird', 'car', 'cat', 'deer', 'dog', 'horse', 'monkey', 'ship', 'truck']
test: [2096 4321 2767 ... 3206 3910 2902] 
val: [6332 6852 1532 ... 5766 4469 1011]
1600 6400
"""

np.random.seed(0)
# 可视化部分训练验证数据 
train_grid, train_y_grid = make_grid_image(train_ds)
img_show(train_grid, train_y_grid, title='Train')

val_grid, val_y_grid = make_grid_image(val_ds)
img_show(val_grid, val_y_grid, title='Val')

train grid

val grid

定义转换器进行数据处理 transforms

# 构建dataset
# 计算数据的均值与方差，用于后面的归一化处理
mean_rgb, std_rgb = caculate_channels_mean_std(train_ds)
# 定义转换器 transforms
train_transformer = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),  
    transforms.RandomVerticalFlip(p=0.5),  
    transforms.ToTensor(),
    transforms.Normalize(mean_rgb, std_rgb)])
                 

test0_transformer = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean_rgb, std_rgb),
    ])

# 更新训练测试集的转换器
train_ds.transform = train_transformer
test0_ds.transform = test0_transformer

# 查看重新转换后的图片数据
np.random.seed(0)
torch.manual_seed(0)
train_grid, train_y_grid = make_grid_image(train_ds)
img_show(train_grid, train_y_grid, title='Train after trans')
val_grid, val_y_grid = make_grid_image(val_ds)
img_show(val_grid, val_y_grid, title='Val after trans')
"""
each channel mean : [0.4467106, 0.43980986, 0.40664646]
each channel std : [0.22414584, 0.22148906, 0.22389975]
image indices: [2732 2607 1653 3264]
torch.Size([3, 100, 394])
image indices: [ 835  763 1383 1033]
torch.Size([3, 100, 394])
"""

train after trans.png

val after trans.png

实例化dataloader 及加载预训练模型（resnet18）

# 实例化dataloader
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=64, shuffle=False)

# 调用预训练好的模型resnet18,pretrained=True
model_resnet18 = models.resnet18(pretrained=True)
# 打印模型信息
# print(model_resnet18)

# 修改输出类别数 1000 --> 10
num_classes=10
num_ftrs = model_resnet18.fc.in_features 
model_resnet18.fc = nn.Linear(num_ftrs, num_classes)

model_resnet18.to(device)
# 查看模型结构信息
summary(model_resnet18, input_size=(3, 224, 224))
"""
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64, 56, 56]               0
           Conv2d-15           [-1, 64, 56, 56]          36,864
      BatchNorm2d-16           [-1, 64, 56, 56]             128
             ReLU-17           [-1, 64, 56, 56]               0
       BasicBlock-18           [-1, 64, 56, 56]               0
           Conv2d-19          [-1, 128, 28, 28]          73,728
      BatchNorm2d-20          [-1, 128, 28, 28]             256
             ReLU-21          [-1, 128, 28, 28]               0
           Conv2d-22          [-1, 128, 28, 28]         147,456
      BatchNorm2d-23          [-1, 128, 28, 28]             256
           Conv2d-24          [-1, 128, 28, 28]           8,192
      BatchNorm2d-25          [-1, 128, 28, 28]             256
             ReLU-26          [-1, 128, 28, 28]               0
       BasicBlock-27          [-1, 128, 28, 28]               0
           Conv2d-28          [-1, 128, 28, 28]         147,456
      BatchNorm2d-29          [-1, 128, 28, 28]             256
             ReLU-30          [-1, 128, 28, 28]               0
           Conv2d-31          [-1, 128, 28, 28]         147,456
      BatchNorm2d-32          [-1, 128, 28, 28]             256
             ReLU-33          [-1, 128, 28, 28]               0
       BasicBlock-34          [-1, 128, 28, 28]               0
           Conv2d-35          [-1, 256, 14, 14]         294,912
      BatchNorm2d-36          [-1, 256, 14, 14]             512
             ReLU-37          [-1, 256, 14, 14]               0
           Conv2d-38          [-1, 256, 14, 14]         589,824
      BatchNorm2d-39          [-1, 256, 14, 14]             512
           Conv2d-40          [-1, 256, 14, 14]          32,768
      BatchNorm2d-41          [-1, 256, 14, 14]             512
             ReLU-42          [-1, 256, 14, 14]               0
       BasicBlock-43          [-1, 256, 14, 14]               0
           Conv2d-44          [-1, 256, 14, 14]         589,824
      BatchNorm2d-45          [-1, 256, 14, 14]             512
             ReLU-46          [-1, 256, 14, 14]               0
           Conv2d-47          [-1, 256, 14, 14]         589,824
      BatchNorm2d-48          [-1, 256, 14, 14]             512
             ReLU-49          [-1, 256, 14, 14]               0
       BasicBlock-50          [-1, 256, 14, 14]               0
           Conv2d-51            [-1, 512, 7, 7]       1,179,648
      BatchNorm2d-52            [-1, 512, 7, 7]           1,024
             ReLU-53            [-1, 512, 7, 7]               0
           Conv2d-54            [-1, 512, 7, 7]       2,359,296
      BatchNorm2d-55            [-1, 512, 7, 7]           1,024
           Conv2d-56            [-1, 512, 7, 7]         131,072
      BatchNorm2d-57            [-1, 512, 7, 7]           1,024
             ReLU-58            [-1, 512, 7, 7]               0
       BasicBlock-59            [-1, 512, 7, 7]               0
           Conv2d-60            [-1, 512, 7, 7]       2,359,296
      BatchNorm2d-61            [-1, 512, 7, 7]           1,024
             ReLU-62            [-1, 512, 7, 7]               0
           Conv2d-63            [-1, 512, 7, 7]       2,359,296
      BatchNorm2d-64            [-1, 512, 7, 7]           1,024
             ReLU-65            [-1, 512, 7, 7]               0
       BasicBlock-66            [-1, 512, 7, 7]               0
AdaptiveAvgPool2d-67            [-1, 512, 1, 1]               0
           Linear-68                   [-1, 10]           5,130
================================================================
Total params: 11,181,642
Trainable params: 11,181,642
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 62.79
Params size (MB): 42.65
Estimated Total Size (MB): 106.01
----------------------------------------------------------------
"""

可视化第一层卷积出来的图片数据

# 可视化第一层卷积层后的图片信息
# 取得第一层卷积层的权重
for w in model_resnet18.parameters():
   w = w.data.cpu()
   print(w.shape)
   break

# normalize to [0,1]
min_w = torch.min(w)
w1 = (-1/(2*min_w))*w + 0.5 
print(torch.min(w1).item(),torch.max(w1).item())

# 构建grid图片
grid_size=len(w1)
x_grid=[w1[i] for i in range(grid_size)]
x_grid=utils.make_grid(x_grid, nrow=8, padding=1)
print(x_grid.shape)
"""
torch.Size([64, 3, 7, 7])
0.0 1.102618932723999
torch.Size([3, 65, 65])
"""
# 可视化
plt.figure(figsize=(5,5))
img_show(x_grid)

The first layer outputs

模型训练及验证主函数

# 定义训练验证主函数
def train_val(model, params):
    # 提取各个参数
    num_epochs=params["num_epochs"]
    loss_func=params["loss_func"]
    opt=params["optimizer"]
    train_dl=params["train_dl"]
    val_dl=params["val_dl"]
    sanity_check=params["sanity_check"]
    lr_scheduler=params["lr_scheduler"]
    path2weights=params["path2weights"]
    
    # 存储中间损失值
    loss_history={
        "train": [],
        "val": [],
    }
    
    # 存储中间正确预测数
    metric_history={
        "train": [],
        "val": [],
    }
    
    # 存储中间较好模型的参数
    best_model_wts = copy.deepcopy(model.state_dict())
    
    # 初始化最优损失值
    best_loss=float('inf')
    
    # 主函数
    for epoch in range(num_epochs):       
        # 获得当前学习率值
        current_lr=get_lr(opt)
        print('Epoch {}/{}, current lr={}'.format(epoch, num_epochs - 1, current_lr))
        
        # 模型训练
        model.train()
        train_loss, train_metric=loss_epoch(model,loss_func,train_dl,sanity_check,opt)

        # 存储中间各数据
        loss_history["train"].append(train_loss)
        metric_history["train"].append(train_metric)
        
        # 模型验证   
        model.eval()
        with torch.no_grad():
            val_loss, val_metric=loss_epoch(model,loss_func,val_dl,sanity_check)
        
       
        # 存储较好的模型参数 
        if val_loss < best_loss:
            best_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            
            # 保存到指定路径下
            torch.save(model.state_dict(), path2weights)
            print("Copied best model weights!")
        
        # 存储验证过程中的数据
        loss_history["val"].append(val_loss)
        metric_history["val"].append(val_metric)
        
        # 执行学习率更新策略
        lr_scheduler.step()

        print("train loss: %.6f, dev loss: %.6f, accuracy: %.2f" %(train_loss,val_loss,100*val_metric))
        print("-"*10) 

    # 加载最优的参数值
    model.load_state_dict(best_model_wts)
        
    return model, loss_history, metric_history

定义损失函数，优化器及学习率更新策略并进行模型训练验证

# 定义损失函数
loss_fn = nn.CrossEntropyLoss(reduction="sum")
# 定义优化器
opt = optim.Adam(model_resnet18.parameters(), lr=1e-4)
# 学习率更新策略
lr_scheduler = CosineAnnealingLR(opt,T_max=5,eta_min=1e-6)

params_train = {
         "num_epochs": 10,
         "optimizer": opt,
         "loss_func": loss_func,
         "train_dl": train_dl,
         "val_dl": val_dl,
         "sanity_check": False,
         "lr_scheduler": lr_scheduler,
         "path2weights": "./models/resnet18.pt",
    }
# 训练及验证模型
model_resnet18,loss_hist,metric_hist=train_val(model_resnet18,params_train)
"""
Epoch 0/9, current lr=0.0001
Copied best model weights!
train loss: 0.960643, dev loss: 0.460767, accuracy: 84.50
----------
Epoch 1/9, current lr=9.05463412215599e-05
Copied best model weights!
train loss: 0.427234, dev loss: 0.383167, accuracy: 87.06
----------
Epoch 2/9, current lr=6.57963412215599e-05
Copied best model weights!
train loss: 0.276291, dev loss: 0.354399, accuracy: 87.44
----------
Epoch 3/9, current lr=3.52036587784401e-05
Copied best model weights!
train loss: 0.192877, dev loss: 0.335165, accuracy: 88.31
----------
Epoch 4/9, current lr=1.0453658778440105e-05
Copied best model weights!
train loss: 0.158006, dev loss: 0.333249, accuracy: 88.25
----------
Epoch 5/9, current lr=1e-06
Copied best model weights!
train loss: 0.153824, dev loss: 0.326157, accuracy: 89.00
----------
Epoch 6/9, current lr=1.0453658778440102e-05
train loss: 0.144040, dev loss: 0.328791, accuracy: 88.88
----------
Epoch 7/9, current lr=3.520365877844009e-05
Copied best model weights!
train loss: 0.135462, dev loss: 0.325485, accuracy: 88.56
----------
Epoch 8/9, current lr=6.579634122155988e-05
train loss: 0.105906, dev loss: 0.360124, accuracy: 88.38
----------
Epoch 9/9, current lr=9.054634122155989e-05
train loss: 0.101154, dev loss: 0.391994, accuracy: 87.88
----------
"""
# 可视化结果
show_loss_acc(params_train['num_epochs'], loss_hist, metric_hist)

# 可以看到cuda显存的信息
print(torch.cuda.memory_summary())
"""
|===========================================================================|
|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|===========================================================================|
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |  353214 KB |  712633 KB |    2865 GB |    2865 GB |
|       from large pool |  326272 KB |  682240 KB |    2846 GB |    2846 GB |
|       from small pool |   26942 KB |   40619 KB |      19 GB |      19 GB |
|---------------------------------------------------------------------------|
| Active memory         |  353214 KB |  712633 KB |    2865 GB |    2865 GB |
|       from large pool |  326272 KB |  682240 KB |    2846 GB |    2846 GB |
|       from small pool |   26942 KB |   40619 KB |      19 GB |      19 GB |
|---------------------------------------------------------------------------|
| GPU reserved memory   |  737280 KB |  737280 KB |     982 MB |  268288 KB |
|       from large pool |  694272 KB |  694272 KB |     930 MB |  258048 KB |
|       from small pool |   43008 KB |   43008 KB |      52 MB |   10240 KB |
|---------------------------------------------------------------------------|
| Non-releasable memory |  162882 KB |  167026 KB |    2405 GB |    2405 GB |
|       from large pool |  157056 KB |  165760 KB |    2384 GB |    2384 GB |
|       from small pool |    5826 KB |   10363 KB |      20 GB |      20 GB |
|---------------------------------------------------------------------------|
| Allocations           |     556    |     800    |  555489    |  554933    |
|       from large pool |      64    |     115    |  225764    |  225700    |
|       from small pool |     492    |     720    |  329725    |  329233    |
|---------------------------------------------------------------------------|
| Active allocs         |     556    |     800    |  555489    |  554933    |
|       from large pool |      64    |     115    |  225764    |  225700    |
|       from small pool |     492    |     720    |  329725    |  329233    |
|---------------------------------------------------------------------------|
| GPU reserved segments |      41    |      41    |      51    |      10    |
|       from large pool |      20    |      20    |      25    |       5    |
|       from small pool |      21    |      21    |      26    |       5    |
|---------------------------------------------------------------------------|
| Non-releasable allocs |      62    |      70    |  331818    |  331756    |
|       from large pool |      14    |      15    |  121327    |  121313    |
|       from small pool |      48    |      60    |  210491    |  210443    |
|===========================================================================|
"""

# 释放GPU内
if model_resnet18:
    del model_resnet18 
torch.cuda.empty_cache()

Loss.png

Acc.png

Pytorch之图像分类（多分类，Mutli-Class Ima
示例数据集：STL-10数据集，pytorch的torchviso包里有提供该数据。引入包一些中间辅助函数数...
pytorch之图像分类
满心欢喜的来跑这个图像分类，上来就报了个错。安装torchvision 疯狂报这个错：raise NotSuppo...
Pytorch图像分类
1、Datasets 这段代码可以实现从图片读入数据，文件夹名为label。 2、Pytorch训练 3、将自己的...
Pytorch实战-图像分类
用图像实现Pytorch图像分类（一) 总结：使用预训练网络有什么意义当我们人类看到图像时，可以识别线条和形状。正...
使用pytorch深度学习框架实现mnist数据集的图像分类
此文章是使用pytorch实现mnist手写字体的图像分类。利用pytorch内置函数mnist下载数据，同时利用...
迁移学习_pytorch简单实战
迁移学习_pytorch实战想学习一下迁移学习，则将使用预先训练的网络，来构建用于疟疾检测的图像分类器，这个分类...
pytorch中的损失函数
1. 多标签分类损失函数 pytorch中能计算多标签分类任务loss的方法有好几个。binary_cross_e...
图像分类
图像分类入门 -图像分类的概念背景与意义所谓图像分类问题，就是已有固定的分类标签集合，然后对于输入的图像，从分...
使用PyTorch建立图像分类模型
概述在PyTorch中构建自己的卷积神经网络(CNN)的实践教程我们将研究一个图像分类问题——CNN的一个经典...
pytorch 图像分类(CIFAR10)
可以将传统的图像、音频和视频转换为 numpy 后，再由 numpy 转换为 torch.*Tensor. 对于图...