ResNet18迁移学习-动物多任务分类

作者: 就是果味熊 | 来源:发表于2020-06-12 15:14 被阅读0次

ResNet18迁移学习-动物多任务分类
吴恩达深度学习笔记(68)-多任务学习
多标签分类怎么做？(Python)
迁移学习_pytorch简单实战
表示学习、迁移学习、多任务学习
迁移学习总结
迁移学习与多任务学习简析
学习迁移
浅谈迁移学习图像分类
学习的迁移

迁移学习

迁移学习的具体内容有很多大佬文章已经说得很清楚了，这里就不献丑了。

本文尝试通过迁移学习，将Pytorch中的已经预训练好的ResNet18网络用用于动物图片分类。

任务

1.纲分类任务，预测该动物是属于哺乳类（Mammals）还是鸟纲（Birds）
2.种分类任务，预测该动物是鸡、兔还是鼠
3.多任务分类，同时预测纲和种

数据

数据在网盘自取(已更正）https://pan.baidu.com/s/1nrlpqWFHVRhiFNSAMsbn3w 提取码：4zrj

数据分为train dataset 和val dataset两个数据集，分别有890和80个图片数据，含有鸡、兔、鼠三种动物。数据分布如下：CLASSES = ['Mammals', 'Birds'] ，分别有580和310张图片，用0,1 对应；SPECIES = ['rabbits', 'rats', 'chickens'] 分别有300，270，310张图片，用0， 1， 2对应。数据信息如图所示

code

导入所需要的包.
这里被注释掉了，是因为起初使用的自己设定的网络，效果很差，后来就使用pytorch里自带的resNet网络进行了训练和预测。

from Classes_Network import *

from __future__ import print_function, division
import os
import matplotlib.pyplot as plt
from torch.utils.data import  DataLoader
import torch
import torchvision
#from Classes_Network import *
from torchvision.transforms import transforms

from torchvision import models

from PIL import Image
import pandas as pd
import numpy as np
import random
from torch import optim
from torch.optim import lr_scheduler
import copy
import time

设定一些参数和路径

root_dir = './Stage_3 Multi-classification/'
train_annotations_file = 'Multi_train_annotation.csv'
val_annotations_file = 'Multi_val_annotation.csv'
CLASSES = ['Mammals', 'Birds'] 
SPECIES = ['rabbits', 'rats', 'chickens']

train_annotation_file与val_annotation_file文件标注了训练集和验证集的数据的路径以及label.格式如图所示。

pd.read_csv(root_dir + train_annotations_file)

数据label
根据数据集的格式完善了Dataset

class MyDataset():
    def __init__(self,root_dir,annotations_file,transform=None):

        self.root_dir = root_dir
        self.annotations = annotations_file
        self.transform = transform

        # if not os.path.isfile(self.annotations_file):
        #     print(self.annotations + "does not exist")
        self.file_info = pd.read_csv(root_dir +annotations_file,index_col=0)
        self.size = len(self.file_info)
    
    def __len__(self):
        return self.size
    
    def __getitem__(self,idx):
        
        
        img_path = self.file_info['path'][idx]
        label_classes = self.file_info['classes'][idx]
        label_species = self.file_info['species'][idx]
        
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        
        return img, label_classes, label_species

设定transform，并将数据载入dataloader

train_transform = transforms.Compose([transforms.Resize((500, 500)),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       ])

# val_transform = transforms.Compose([transforms.Resize((500, 500)),
#                                        transforms.RandomHorizontalFlip(),
#                                        transforms.ToTensor(),
#                                        ])
val_transform = transforms.Compose([transforms.Resize((500, 500)),
                                       
                                       transforms.ToTensor(),
                                       ])
train_dataset = MyDataset(root_dir,train_annotations_file,transform=train_transform)
val_dataset = MyDataset(root_dir,val_annotations_file,transform=val_transform)



train_loader = DataLoader(dataset=train_dataset,batch_size=16,shuffle=True)
val_loader = DataLoader(dataset=val_dataset,batch_size=1,shuffle=True)

data_loaders = {'train': train_loader, 'val': val_loader}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
print(device)

定义模型, 损失函数，优化器
将内置的resnet18及与预其相应训练模型参数导入，进行迁移学习。
param.requires_grad = False 将模型的中间层参数固定，只要最后的FC层参数可导。model_ft.fc = nn.Linear(num_ftrs, 32) 此处将原来的默认分类数1000改为32，是为了后续再加上两个并列的FC层分别对class和species进行分类，若改为2，则进行单一的二分类任务。

model_ft = models.resnet18(pretrained=True)#加载已经训练好的模型

# 使除最后一层的参数不可导，即不进行学习
for param in model_ft.parameters():
    param.requires_grad = False

# classes分类结果输出
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 32)#将全连接层做出改变类别改为两类

class multi_out_model(torch.nn.Module):
    def __init__(self,model_core):
        super(multi_out_model,self).__init__()
        
        self.resnet_model = model_core
        
        self.classes = nn.Linear(in_features=32, out_features=2, bias=True)
        self.species = nn.Linear(in_features=32, out_features=3, bias=True)
        
    def forward(self,x):
        
        x1 = self.resnet_model(x)
        
        classes = self.classes(x1)
        species = self.species(x1)
        
        return classes, species
    
model_ft = multi_out_model(model_ft)

criterion = [nn.CrossEntropyLoss(),nn.CrossEntropyLoss()]

model_ft = model_ft.to(device)
network = model_ft

# criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized优化参数
# optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Observe that only parameters of final layer are being optimized as
# opoosed to before.

optimizer_ft = optim.SGD([{"params":model_ft.resnet_model.fc.parameters()},
                         {"params":model_ft.classes.parameters()},
                         {"params":model_ft.species.parameters()}],lr=0.01,momentum=0.9)
optimizer = optimizer_ft

# Decay LR by a factor of 0.1 every 7 epochs使用学习率缩减
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7,gamma=0.1)

定义训练函数

def train_model(model, criterion, optimizer, scheduler, num_epochs=50,pretrain_model=None):
    start_time = time.clock()
    Loss_list = {'train':[],'val':[]}
    classloss_list = {'train':[],'val':[]}
    speciesloss_list = {'train':[],'val':[]}
    Accuracy_list_classes = {'train':[],'val':[]}
    Accuracy_list_species = {'train':[],'val':[]}
    start_epoch = 0

    if pretrain_model != None and os.path.exists(pretrain_model):
        checkpoint = torch.load(pretrain_model)
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        start_epoch = checkpoint['epoch'] + 1
        num_epochs = num_epochs +start_epoch
    else:
        print('无保存模型，从头开始训练')

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_loss = 100

这里设定了预训练模型，如果采用已经训练好的优秀的模型及其权重的话，就把pretrain_model设置为权重文件的路径+文件名,后面训练时会导入相关参数。

    for epoch in range(start_epoch,num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-*' *10)
        
        for phase in ['train','val']:
            if phase == 'train':
                model.train() # 在train和test的时候。BN层以及Dropout的处理方式不一样,其他都一样，所以没有这两类层的话，可以不进行声明               
            else:
                model.eval()
                
            running_loss = 0.0
            running_classes_loss = 0.0
            running_species_loss = 0.0
            corrects_classes = 0
            correct_species = 0
            
            # Each epoch has a training and validation phase
            for idx,data in enumerate(data_loaders[phase]):
                img, label_classes, label_species = data
                img = img.to(device)
                label_classes = label_classes.to(device)
                label_species = label_species.to(device)
                # zero the parameter gradients
                optimizer.zero_grad()
                # forward 
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'): # 当是train phase时，以下参数为可导，当为val时，后续包含参数不可导
                    output = model(img)
                    x_classes = output[0]
                    x_species = output[1]
#                     x_classes, x_species = model(img)

                    x_classes = x_classes.view(-1, 2)  # 将softmax输出的列向量转换为行向量
                    x_species = x_species.view(-1, 3)

                    _, preds_classes = torch.max(x_classes, 1)  # 输出行向量中最大的元素及其对应的索引值
                    _a,preds_species = torch.max(x_species, 1)
                    #损失函数，可以依实际情况设定。
                    loss_classes = criterion[0](x_classes, label_classes)
                    loss_species = criterion[1](x_species, label_species)

#                     loss = criterion(x_classes, label_classes)  # 单分类时loss函数
                    
                    if phase == 'train':
                        loss = 0.1 * loss_classes + 0.9* loss_species
                        
                        loss.backward()
                        optimizer.step()  # 进行权值更新
                        
                running_classes_loss += loss_classes.item() * img.size(0)
                running_species_loss += loss_species.item() * img.size(0)
                        
                running_loss += loss.item() * img.size(0)

                corrects_classes += torch.sum(preds_classes == label_classes)
                correct_species += torch.sum(preds_species == label_species)

loss = 0.1 * loss_classes + 0.9* loss_species设定各个任务损失函数的权重，这里暂时写的是0.1,0.9.但是目前为止还没有筛选出好的权重或者说别的改进过的损失函数.
由于作者电脑辣鸡，所以为了防止加载的东西过多而爆显存，使用torch.cuda.empty_cache()删除一些不需要的变量。

            epoch_loss = running_loss / len(data_loaders[phase].dataset)
            epoch_class_loss = loss_classes / len(data_loaders[phase].dataset)
            epoch_species_loss = loss_species / len(data_loaders[phase].dataset)
            
            Loss_list[phase].append(epoch_loss)
            classloss_list[phase].append(epoch_class_loss)
            speciesloss_list[phase].append(epoch_species_loss)

这里想把各个任务的loss分别打印出来，但是显示结果和总loss好像对不上，先不注释掉了。
后续根据各次的迭代结果，选出最优模型并保留模型参数。

            epoch_acc_classes = corrects_classes.double() / len(data_loaders[phase].dataset)
            epoch_acc_species = correct_species.double() / len(data_loaders[phase].dataset)
#             epoch_acc = epoch_acc_classes 
            
            Accuracy_list_classes[phase].append(100 * epoch_acc_classes)
            Accuracy_list_species[phase].append(100 * epoch_acc_species)
            
            print('{} Loss: {:.4f}  Acc_classes: {:.2%}  Acc_species: {:.2%}'.format(phase, epoch_loss,epoch_acc_classes,epoch_acc_species))
            
            # 更新模型权重及最优准确率
#             if phase == 'val' and epoch_loss < best_loss: 
            if phase == 'val':
                print('This epoch val loss: {:.4f}'.format(epoch_loss))
                if epoch_loss < best_loss: 
            # 多任务分类时，仅采用了损失函数进行最优模型的选择，为考虑采用其他指标进行筛选，单一任务时，采用准确率即可。
#             if phase == 'val' and epoch_acc > best_acc:
#                 best_acc = epoch_acc_classes
                    best_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
    #                 print('Best val classes Acc: {:.2%}'.format(best_acc))
                    print('Best val loss: {:.4f}'.format(best_loss))

    # 获取模型当前的参数，以便后续继续训练
    pre_state = {'model' : model.state_dict(), 'optimizer':optimizer.state_dict(), 'epoch': epoch}
    torch.save(pre_state, 'multi_pre_resnet18_model.pt')

    # 所有epoch结束后，将best_model_wts中的模型参数加载到当前网络中，并保存
    state = {'model' : model.load_state_dict(best_model_wts)}
    torch.save(state, 'multi_best_model.pt')
    
#     print('Best val classes Acc: {:.2%}'.format(best_acc))
    end_time = time.clock()
    print('训练时间：' + str(end_time - start_time))
    return model, classloss_list, speciesloss_list, Loss_list,Accuracy_list_classes,Accuracy_list_species

开始训练

import time
start_time = time.clock()
model, classloss_list, speciesloss_list, Loss_list, Accuracy_list_classes, Accuracy_list_species = train_model(network, criterion, optimizer, exp_lr_scheduler, num_epochs=2)

end_time = time.clock()
print('训练时间：' + str(end_time - start_time))

同时将pretrain_model='multi_pre_resnet18_model.pt'设置为之前训练保存的参数，可以接着上次继续训练，但注意不要过拟合。

model, classloss_list, speciesloss_list, Loss_list, Accuracy_list_classes, Accuracy_list_species = train_model(
    network, criterion, optimizer, exp_lr_scheduler, num_epochs=20,pretrain_model='multi_pre_resnet18_model.pt')

训练结果示例
----------
...
----------
Epoch 19/19
----------
train Loss: 0.9367 Acc_classes: 87.48% Acc_species: 71.79%
val Loss: 0.5222 Acc_classes: 93.75% Acc_species: 86.25%
This epoch val loss: 0.5222
Best val loss: 0.5222
训练时间：1396.4057515999993

之前也忘记损失函数的权重怎么设置的，最终得到的结果如上。可以看到，通过迁移学习将resnet用来进行该分类任务时，在classes的分类上还是很容易收敛的，但是同时进行species的分类时，就有点吃力了。后续可能会改进损失函数进行优化。

对准确率及损失进行可视化

x = range(0,len(Loss_list['train']))
y3 = [i.cpu().numpy() for i in Accuracy_list_classes["train"]]
y4 = [i.cpu().numpy() for i in Accuracy_list_classes["val"]]
# y3 = Accuracy_list_classes["train"]
# y4 = Accuracy_list_classes["val"]
plt.plot(x, y3, color="r", linestyle="-", marker=".", linewidth=1, label="train")
plt.plot(x, y4, color="b", linestyle="-", marker=".", linewidth=1, label="val")
plt.ylim(min(min(y3),min(y4)) * 0.2,max(max(y3),max(y4)) * 1.2)
plt.legend()
plt.title('train and val Classes_acc vs. epoches')
plt.ylabel('Classes_accuracy')
plt.savefig("train and val Classes_acc vs epoches.jpg")

y5 = [i.cpu().numpy() for i in Accuracy_list_species["train"]]
y6 = [i.cpu().numpy() for i in Accuracy_list_species["val"]]
# y5 = Accuracy_list_species["train"].cpu().numpy()
# y6 = Accuracy_list_species["val"].cpu().numpy()
plt.plot(x, y5, color="r", linestyle="-", marker=".", linewidth=1, label="train")
plt.plot(x, y6, color="b", linestyle="-", marker=".", linewidth=1, label="val")
plt.ylim(min(min(y5),min(y6)) * 0.2,max(max(y5),max(y6)) * 1.2)
plt.legend()
plt.title('train and val Species_acc vs. epoches')
plt.ylabel('Classes_accuracy')
plt.savefig("train and val Species_acc vs epoches.jpg")

y1 = Loss_list["val"]
y2 = Loss_list["train"]
y8 = speciesloss_list['train'] 
y7 = speciesloss_list['val']
y10 = classloss_list['train']
y9 = classloss_list['val']

plt.plot(x, y1, color="r", linestyle="-", marker="o", linewidth=1, label="loss_val")
plt.plot(x, y2, color="b", linestyle="-", marker="o", linewidth=1, label="loss_train")

plt.plot(x, y7, color="r", linestyle="-", marker="^", linewidth=1, label="specie_loss_val")
plt.plot(x, y8, color="b", linestyle="-", marker="^", linewidth=1, label="specie_loss_train")

plt.plot(x, y9, color="r", linestyle="-", marker=">", linewidth=1, label="class_loss_val")
plt.plot(x, y10, color="b", linestyle="-", marker="<", linewidth=1, label="class_loss_train")

plt.ylim(min(min(y1),min(y2)) * (-1.5),max(max(y1),max(y2),max(y7),max(y8),max(y9),max(y10)) * 1.1)

plt.legend()
plt.title('train and val loss vs. epoches')
plt.xlabel("epochs")
plt.ylabel('loss')
plt.savefig("train and val loss vs epoches.jpg")

class_acc

specie_acc

loss

并对验证集进行验证

def visualize_model(model):
    corrects_classes = 0
    corrects_species = 0
    counts = 0
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(data_loaders['val']):
#             print
            img, label_classes, label_species = data
            
#             img = img.to(device)
            label_classes = label_classes.to(device)
            label_species = label_species.to(device)
#             inputs = data['image']
#             labels_classes = data['classes'].to(device)

            output = model(img.to(device))
            x_classes = output[0].view(-1,2)
            _, preds_classes = torch.max(x_classes, 1)
            corrects_classes += torch.sum(preds_classes == label_classes)
            
            x_species = output[1].view(-1,3)
            _, preds_species = torch.max(x_species, 1)
            corrects_species += torch.sum(preds_species == label_species)
            
            torch.cuda.empty_cache()
            
            plt.imshow(transforms.ToPILImage()(img.squeeze(0)))
            plt.title('predicted classes: {}\n ground-truth classes:{}\n predicted species: {}\n ground-truth species:{}'\
                      .format(CLASSES[preds_classes],CLASSES[label_classes],SPECIES[preds_species],SPECIES[label_species]))

            plt.show()
            counts += 1
            
        epoch_acc_classes = corrects_classes.double() / counts
        epoch_acc_species = corrects_species.double() / counts

        print("epoch_acc_classes:{} epoch_acc_species:{}".format(epoch_acc_classes, epoch_acc_species))
                      

visualize_model(network)

最终验证结果如图

example1

example2 、
然后两个任务验证集的准确率为：
epoch_acc_classes:93.75% epoch_acc_species:86.25%

单独进行classes分类和species分类时，验证集准确率均可达到90%以上,这也说明预训练的模型含有丰富特征。
后续计划在数据集、网络结构、损失函数方面进行改善，以期提高多任务图像分类时，各个子任务的准确率。

数据增强

对数据进行随机裁剪，改变颜色，旋转，透视变换等操作，并将变换后的图片保存下来，生成新的数据集，补充到原训练集中。

# 透视变换
def random_warp(img, row, col):
    height, width, channels = img.shape
    # warp:
    random_margin = 60
    x1 = random.randint(-random_margin, random_margin)
    y1 = random.randint(-random_margin, random_margin)
    x2 = random.randint(width - random_margin - 1, width - 1)
    y2 = random.randint(-random_margin, random_margin)
    x3 = random.randint(width - random_margin - 1, width - 1)
    y3 = random.randint(height - random_margin - 1, height - 1)
    x4 = random.randint(-random_margin, random_margin)
    y4 = random.randint(height - random_margin - 1, height - 1)

    dx1 = random.randint(-random_margin, random_margin)
    dy1 = random.randint(-random_margin, random_margin)
    dx2 = random.randint(width - random_margin - 1, width - 1)
    dy2 = random.randint(-random_margin, random_margin)
    dx3 = random.randint(width - random_margin - 1, width - 1)
    dy3 = random.randint(height - random_margin - 1, height - 1)
    dx4 = random.randint(-random_margin, random_margin)
    dy4 = random.randint(height - random_margin - 1, height - 1)

    pts1 = np.float32([[x1, y1], [x2, y2], [x3, y3], [x4, y4]])
    pts2 = np.float32([[dx1, dy1], [dx2, dy2], [dx3, dy3], [dx4, dy4]])
    M_warp = cv2.getPerspectiveTransform(pts1, pts2)
    img_warp = cv2.warpPerspective(img, M_warp, (width, height))
    return M_warp, img_warp

#改变颜色
def random_light_color(img):
    #brightness
    B,G,R = cv2.split(img)#通道拆分，顺序为BGR,不是RBG
    
    b_rand = random.randint(-50,50)#生成随机数整数n a<=n<=b
    if b_rand == 0:
        pass
    elif b_rand > 0:
        lim = 255 - b_rand
        B[B > lim] = 255
        B[B <= lim] = (b_rand + B[B <= lim]).astype(img.dtype)
    elif b_rand < 0:
        lim = 0 - b_rand
        B[B < lim] = 0
        B[B >= lim] = (b_rand + B[B >= lim]).astype(img.dtype)
        
    g_rand = random.randint(-50,50)
    if g_rand == 0:
        pass
    elif g_rand > 0:
        lim = 255 - g_rand
        G[G > lim] = 255#R[],G[],B[]都是矩阵
        G[G <= lim] = (g_rand + G[G <= lim]).astype(img.dtype)
    elif g_rand < 0:
        lim = 0 - g_rand
        G[G < lim] = 0
        G[G >= lim] = (g_rand + G[G >= lim]).astype(img.dtype)
        
    r_rand = random.randint(-50,50)
    if r_rand == 0:
        pass
    elif r_rand > 0:
        lim = 255 - r_rand
        R[R > lim] = 255
        R[R <= lim] = (r_rand + R[R <= lim]).astype(img.dtype)
    elif r_rand < 0:
        lim = 0 - r_rand
        R[R < lim] = 0
        R[R >= lim] = (r_rand + R[R >= lim]).astype(img.dtype)
        
    img_merge = cv2.merge((B,G,R)) #合并之前分离出来进行变换的通道   
    #img = cv2.cvtColor(final_hsv,cv2.COLOR_HSV2BGR)
    return img_merge


#对图片实现多种变换并保存
def image_data_aug(img,crop=True,change_color=True,rotation=True,perspective_transform=False):
    if (crop or change_color or rotation or perspective_transform) == False:
        print("wrong input")
        return
    if crop:
        img = img[int(img.shape[0]/4):int(3*img.shape[0]/4),0:int(3*img.shape[1]/4)]#根据图像大小选择参数大小
    if change_color:
        img = random_light_color(img)
    if rotation:
        angle = random.randint(0,180)
        scale = random.uniform(0.75,1.25)
        M = cv2.getRotationMatrix2D((img.shape[1] / 2, img.shape[0] / 2), angle, scale) # center, angle, scale
        img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
    if perspective_transform:
        M_warp, img = random_warp(img, img[0], img[1])
    
    return img

def create_image(ori_img_file,times=1):
    """
    param:  ori_img_file:记录训练数据集相关信息的csv文件路径+名称；
            times:      为数据集增加的倍数
    """
    ori_file = pd.read_csv(root_dir + ori_img_file,index_col=0)
    new_csv = []
    for time in range(times):
        print("图片第{}次生成中...".format(str(time)))
        with tqdm(range(len(ori_file))) as t:
            for idx in t:
                ori_path = ori_file["path"][idx]
                path = ori_path.replace(".jpg",  "_aug"+str(time) + "_" +  str(idx)+".jpg").replace("train","train_aug").replace("val","val_aug")
                classes = ori_file["classes"][idx]
                species = ori_file["species"][idx]
                print(ori_path)
                img = cv2.imread(ori_path)

                try:
                    img = image_data_aug(img)
                    # """
                    #不知道为啥遍历到idx=680时，总会显示error: C:\projects\opencv-python\opencv\modules\highgui\src\window.
                    #cpp:325: error: (-215) size.width>0 && size.height>0 in function cv::imshow
                    #(已经改斜杠，确认路径没有中文，所以用了try...except这个结构)
                    cv2.imwrite(path,img)
                    new_csv.append([str(time)+"_"+ str(idx),path,classes,species])
                    
                except:
                    continue 
    data_aug = pd.DataFrame(new_csv,columns=["index","path","classes","species"])
    data_aug.to_csv("data_aug.csv",index=0)