美文网首页Tensorflow and pytorch
ResNet18迁移学习-动物多任务分类

ResNet18迁移学习-动物多任务分类

作者: 就是果味熊 | 来源:发表于2020-06-12 15:14 被阅读0次

    迁移学习

    迁移学习的具体内容有很多大佬文章已经说得很清楚了,这里就不献丑了。

    本文尝试通过迁移学习,将Pytorch中的已经预训练好的ResNet18网络用用于动物图片分类。

    任务

    1.纲分类任务,预测该动物是属于哺乳类(Mammals)还是鸟纲(Birds)
    2.种分类任务,预测该动物是鸡、兔还是鼠
    3.多任务分类,同时预测纲和种

    数据

    数据在网盘自取(已更正)https://pan.baidu.com/s/1nrlpqWFHVRhiFNSAMsbn3w 提取码:4zrj

    数据分为train dataset 和val dataset两个数据集,分别有890和80个图片数据,含有鸡、兔、鼠三种动物。数据分布如下:CLASSES = ['Mammals', 'Birds'] ,分别有580和310张图片,用0,1 对应;SPECIES = ['rabbits', 'rats', 'chickens'] 分别有300,270,310张图片,用0, 1, 2对应。数据信息如图所示

    code

    导入所需要的包.
    这里被注释掉了,是因为起初使用的自己设定的网络,效果很差,后来就使用pytorch里自带的resNet网络进行了训练和预测。

    from Classes_Network import *
    
    from __future__ import print_function, division
    import os
    import matplotlib.pyplot as plt
    from torch.utils.data import  DataLoader
    import torch
    import torchvision
    #from Classes_Network import *
    from torchvision.transforms import transforms
    
    from torchvision import models
    
    from PIL import Image
    import pandas as pd
    import numpy as np
    import random
    from torch import optim
    from torch.optim import lr_scheduler
    import copy
    import time
    

    设定一些参数和路径

    root_dir = './Stage_3 Multi-classification/'
    train_annotations_file = 'Multi_train_annotation.csv'
    val_annotations_file = 'Multi_val_annotation.csv'
    CLASSES = ['Mammals', 'Birds'] 
    SPECIES = ['rabbits', 'rats', 'chickens']
    

    train_annotation_file与val_annotation_file文件标注了训练集和验证集的数据的路径以及label.格式如图所示。

    pd.read_csv(root_dir + train_annotations_file)
    
    数据label
    根据数据集的格式完善了Dataset
    class MyDataset():
        def __init__(self,root_dir,annotations_file,transform=None):
    
            self.root_dir = root_dir
            self.annotations = annotations_file
            self.transform = transform
    
            # if not os.path.isfile(self.annotations_file):
            #     print(self.annotations + "does not exist")
            self.file_info = pd.read_csv(root_dir +annotations_file,index_col=0)
            self.size = len(self.file_info)
        
        def __len__(self):
            return self.size
        
        def __getitem__(self,idx):
            
            
            img_path = self.file_info['path'][idx]
            label_classes = self.file_info['classes'][idx]
            label_species = self.file_info['species'][idx]
            
            img = Image.open(img_path).convert('RGB')
            if self.transform:
                img = self.transform(img)
            
            return img, label_classes, label_species
    

    设定transform,并将数据载入dataloader

    train_transform = transforms.Compose([transforms.Resize((500, 500)),
                                           transforms.RandomHorizontalFlip(),
                                           transforms.ToTensor(),
                                           ])
    
    # val_transform = transforms.Compose([transforms.Resize((500, 500)),
    #                                        transforms.RandomHorizontalFlip(),
    #                                        transforms.ToTensor(),
    #                                        ])
    val_transform = transforms.Compose([transforms.Resize((500, 500)),
                                           
                                           transforms.ToTensor(),
                                           ])
    train_dataset = MyDataset(root_dir,train_annotations_file,transform=train_transform)
    val_dataset = MyDataset(root_dir,val_annotations_file,transform=val_transform)
    
    
    
    train_loader = DataLoader(dataset=train_dataset,batch_size=16,shuffle=True)
    val_loader = DataLoader(dataset=val_dataset,batch_size=1,shuffle=True)
    
    data_loaders = {'train': train_loader, 'val': val_loader}
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # device = torch.device("cpu")
    print(device)
    

    定义模型, 损失函数,优化器
    将内置的resnet18及与预其相应训练模型参数导入,进行迁移学习。
    param.requires_grad = False 将模型的中间层参数固定,只要最后的FC层参数可导。model_ft.fc = nn.Linear(num_ftrs, 32) 此处将原来的默认分类数1000改为32,是为了后续再加上两个并列的FC层分别对class和species进行分类,若改为2,则进行单一的二分类任务。

    model_ft = models.resnet18(pretrained=True)#加载已经训练好的模型
    
    # 使除最后一层的参数不可导,即不进行学习
    for param in model_ft.parameters():
        param.requires_grad = False
    
    # classes分类结果输出
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 32)#将全连接层做出改变类别改为两类
    
    class multi_out_model(torch.nn.Module):
        def __init__(self,model_core):
            super(multi_out_model,self).__init__()
            
            self.resnet_model = model_core
            
            self.classes = nn.Linear(in_features=32, out_features=2, bias=True)
            self.species = nn.Linear(in_features=32, out_features=3, bias=True)
            
        def forward(self,x):
            
            x1 = self.resnet_model(x)
            
            classes = self.classes(x1)
            species = self.species(x1)
            
            return classes, species
        
    model_ft = multi_out_model(model_ft)
    
    criterion = [nn.CrossEntropyLoss(),nn.CrossEntropyLoss()]
    
    model_ft = model_ft.to(device)
    network = model_ft
    
    # criterion = nn.CrossEntropyLoss()
    
    # Observe that all parameters are being optimized优化参数
    # optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
    
    # Observe that only parameters of final layer are being optimized as
    # opoosed to before.
    
    optimizer_ft = optim.SGD([{"params":model_ft.resnet_model.fc.parameters()},
                             {"params":model_ft.classes.parameters()},
                             {"params":model_ft.species.parameters()}],lr=0.01,momentum=0.9)
    optimizer = optimizer_ft
    
    # Decay LR by a factor of 0.1 every 7 epochs使用学习率缩减
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7,gamma=0.1)
    

    定义训练函数

    def train_model(model, criterion, optimizer, scheduler, num_epochs=50,pretrain_model=None):
        start_time = time.clock()
        Loss_list = {'train':[],'val':[]}
        classloss_list = {'train':[],'val':[]}
        speciesloss_list = {'train':[],'val':[]}
        Accuracy_list_classes = {'train':[],'val':[]}
        Accuracy_list_species = {'train':[],'val':[]}
        start_epoch = 0
    
        if pretrain_model != None and os.path.exists(pretrain_model):
            checkpoint = torch.load(pretrain_model)
            model.load_state_dict(checkpoint['model'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            start_epoch = checkpoint['epoch'] + 1
            num_epochs = num_epochs +start_epoch
        else:
            print('无保存模型,从头开始训练')
    
        best_model_wts = copy.deepcopy(model.state_dict())
        best_acc = 0.0
        best_loss = 100
    

    这里设定了预训练模型,如果采用已经训练好的优秀的模型及其权重的话,就把pretrain_model设置为权重文件的路径+文件名,后面训练时会导入相关参数。

        for epoch in range(start_epoch,num_epochs):
            print('Epoch {}/{}'.format(epoch, num_epochs - 1))
            print('-*' *10)
            
            for phase in ['train','val']:
                if phase == 'train':
                    model.train() # 在train和test的时候。BN层以及Dropout的处理方式不一样,其他都一样,所以没有这两类层的话,可以不进行声明               
                else:
                    model.eval()
                    
                running_loss = 0.0
                running_classes_loss = 0.0
                running_species_loss = 0.0
                corrects_classes = 0
                correct_species = 0
                
                # Each epoch has a training and validation phase
                for idx,data in enumerate(data_loaders[phase]):
                    img, label_classes, label_species = data
                    img = img.to(device)
                    label_classes = label_classes.to(device)
                    label_species = label_species.to(device)
                    # zero the parameter gradients
                    optimizer.zero_grad()
                    # forward 
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'): # 当是train phase时,以下参数为可导,当为val时,后续包含参数不可导
                        output = model(img)
                        x_classes = output[0]
                        x_species = output[1]
    #                     x_classes, x_species = model(img)
    
                        x_classes = x_classes.view(-1, 2)  # 将softmax输出的列向量转换为行向量
                        x_species = x_species.view(-1, 3)
    
                        _, preds_classes = torch.max(x_classes, 1)  # 输出行向量中最大的元素及其对应的索引值
                        _a,preds_species = torch.max(x_species, 1)
                        #损失函数,可以依实际情况设定。
                        loss_classes = criterion[0](x_classes, label_classes)
                        loss_species = criterion[1](x_species, label_species)
    
    #                     loss = criterion(x_classes, label_classes)  # 单分类时loss函数
                        
                        if phase == 'train':
                            loss = 0.1 * loss_classes + 0.9* loss_species
                            
                            loss.backward()
                            optimizer.step()  # 进行权值更新
                            
                    running_classes_loss += loss_classes.item() * img.size(0)
                    running_species_loss += loss_species.item() * img.size(0)
                            
                    running_loss += loss.item() * img.size(0)
    
                    corrects_classes += torch.sum(preds_classes == label_classes)
                    correct_species += torch.sum(preds_species == label_species)
    

    loss = 0.1 * loss_classes + 0.9* loss_species设定各个任务损失函数的权重,这里暂时写的是0.1,0.9.但是目前为止还没有筛选出好的权重或者说别的改进过的损失函数.
    由于作者电脑辣鸡,所以为了防止加载的东西过多而爆显存,使用torch.cuda.empty_cache()删除一些不需要的变量。

                epoch_loss = running_loss / len(data_loaders[phase].dataset)
                epoch_class_loss = loss_classes / len(data_loaders[phase].dataset)
                epoch_species_loss = loss_species / len(data_loaders[phase].dataset)
                
                Loss_list[phase].append(epoch_loss)
                classloss_list[phase].append(epoch_class_loss)
                speciesloss_list[phase].append(epoch_species_loss)
    

    这里想把各个任务的loss分别打印出来,但是显示结果和总loss好像对不上,先不注释掉了
    后续根据各次的迭代结果,选出最优模型并保留模型参数。

                epoch_acc_classes = corrects_classes.double() / len(data_loaders[phase].dataset)
                epoch_acc_species = correct_species.double() / len(data_loaders[phase].dataset)
    #             epoch_acc = epoch_acc_classes 
                
                Accuracy_list_classes[phase].append(100 * epoch_acc_classes)
                Accuracy_list_species[phase].append(100 * epoch_acc_species)
                
                print('{} Loss: {:.4f}  Acc_classes: {:.2%}  Acc_species: {:.2%}'.format(phase, epoch_loss,epoch_acc_classes,epoch_acc_species))
                
                # 更新模型权重及最优准确率
    #             if phase == 'val' and epoch_loss < best_loss: 
                if phase == 'val':
                    print('This epoch val loss: {:.4f}'.format(epoch_loss))
                    if epoch_loss < best_loss: 
                # 多任务分类时,仅采用了损失函数进行最优模型的选择,为考虑采用其他指标进行筛选,单一任务时,采用准确率即可。
    #             if phase == 'val' and epoch_acc > best_acc:
    #                 best_acc = epoch_acc_classes
                        best_loss = epoch_loss
                        best_model_wts = copy.deepcopy(model.state_dict())
        #                 print('Best val classes Acc: {:.2%}'.format(best_acc))
                        print('Best val loss: {:.4f}'.format(best_loss))
    
        # 获取模型当前的参数,以便后续继续训练
        pre_state = {'model' : model.state_dict(), 'optimizer':optimizer.state_dict(), 'epoch': epoch}
        torch.save(pre_state, 'multi_pre_resnet18_model.pt')
    
        # 所有epoch结束后,将best_model_wts中的模型参数加载到当前网络中,并保存
        state = {'model' : model.load_state_dict(best_model_wts)}
        torch.save(state, 'multi_best_model.pt')
        
    #     print('Best val classes Acc: {:.2%}'.format(best_acc))
        end_time = time.clock()
        print('训练时间:' + str(end_time - start_time))
        return model, classloss_list, speciesloss_list, Loss_list,Accuracy_list_classes,Accuracy_list_species
    

    开始训练

    import time
    start_time = time.clock()
    model, classloss_list, speciesloss_list, Loss_list, Accuracy_list_classes, Accuracy_list_species = train_model(network, criterion, optimizer, exp_lr_scheduler, num_epochs=2)
    
    end_time = time.clock()
    print('训练时间:' + str(end_time - start_time))
    

    同时将pretrain_model='multi_pre_resnet18_model.pt'设置为之前训练保存的参数,可以接着上次继续训练,但注意不要过拟合。

    model, classloss_list, speciesloss_list, Loss_list, Accuracy_list_classes, Accuracy_list_species = train_model(
        network, criterion, optimizer, exp_lr_scheduler, num_epochs=20,pretrain_model='multi_pre_resnet18_model.pt')
    

    训练结果示例
    ----------
    ...
    ----------
    Epoch 19/19
    ----------
    train Loss: 0.9367 Acc_classes: 87.48% Acc_species: 71.79%
    val Loss: 0.5222 Acc_classes: 93.75% Acc_species: 86.25%
    This epoch val loss: 0.5222
    Best val loss: 0.5222
    训练时间:1396.4057515999993

    之前也忘记损失函数的权重怎么设置的,最终得到的结果如上。可以看到,通过迁移学习将resnet用来进行该分类任务时,在classes的分类上还是很容易收敛的,但是同时进行species的分类时,就有点吃力了。后续可能会改进损失函数进行优化。

    对准确率及损失进行可视化

    x = range(0,len(Loss_list['train']))
    y3 = [i.cpu().numpy() for i in Accuracy_list_classes["train"]]
    y4 = [i.cpu().numpy() for i in Accuracy_list_classes["val"]]
    # y3 = Accuracy_list_classes["train"]
    # y4 = Accuracy_list_classes["val"]
    plt.plot(x, y3, color="r", linestyle="-", marker=".", linewidth=1, label="train")
    plt.plot(x, y4, color="b", linestyle="-", marker=".", linewidth=1, label="val")
    plt.ylim(min(min(y3),min(y4)) * 0.2,max(max(y3),max(y4)) * 1.2)
    plt.legend()
    plt.title('train and val Classes_acc vs. epoches')
    plt.ylabel('Classes_accuracy')
    plt.savefig("train and val Classes_acc vs epoches.jpg")
    
    y5 = [i.cpu().numpy() for i in Accuracy_list_species["train"]]
    y6 = [i.cpu().numpy() for i in Accuracy_list_species["val"]]
    # y5 = Accuracy_list_species["train"].cpu().numpy()
    # y6 = Accuracy_list_species["val"].cpu().numpy()
    plt.plot(x, y5, color="r", linestyle="-", marker=".", linewidth=1, label="train")
    plt.plot(x, y6, color="b", linestyle="-", marker=".", linewidth=1, label="val")
    plt.ylim(min(min(y5),min(y6)) * 0.2,max(max(y5),max(y6)) * 1.2)
    plt.legend()
    plt.title('train and val Species_acc vs. epoches')
    plt.ylabel('Classes_accuracy')
    plt.savefig("train and val Species_acc vs epoches.jpg")
    
    y1 = Loss_list["val"]
    y2 = Loss_list["train"]
    y8 = speciesloss_list['train'] 
    y7 = speciesloss_list['val']
    y10 = classloss_list['train']
    y9 = classloss_list['val']
    
    plt.plot(x, y1, color="r", linestyle="-", marker="o", linewidth=1, label="loss_val")
    plt.plot(x, y2, color="b", linestyle="-", marker="o", linewidth=1, label="loss_train")
    
    plt.plot(x, y7, color="r", linestyle="-", marker="^", linewidth=1, label="specie_loss_val")
    plt.plot(x, y8, color="b", linestyle="-", marker="^", linewidth=1, label="specie_loss_train")
    
    plt.plot(x, y9, color="r", linestyle="-", marker=">", linewidth=1, label="class_loss_val")
    plt.plot(x, y10, color="b", linestyle="-", marker="<", linewidth=1, label="class_loss_train")
    
    plt.ylim(min(min(y1),min(y2)) * (-1.5),max(max(y1),max(y2),max(y7),max(y8),max(y9),max(y10)) * 1.1)
    
    plt.legend()
    plt.title('train and val loss vs. epoches')
    plt.xlabel("epochs")
    plt.ylabel('loss')
    plt.savefig("train and val loss vs epoches.jpg")
    
    class_acc
    specie_acc
    loss

    并对验证集进行验证

    def visualize_model(model):
        corrects_classes = 0
        corrects_species = 0
        counts = 0
        model.eval()
        with torch.no_grad():
            for i, data in enumerate(data_loaders['val']):
    #             print
                img, label_classes, label_species = data
                
    #             img = img.to(device)
                label_classes = label_classes.to(device)
                label_species = label_species.to(device)
    #             inputs = data['image']
    #             labels_classes = data['classes'].to(device)
    
                output = model(img.to(device))
                x_classes = output[0].view(-1,2)
                _, preds_classes = torch.max(x_classes, 1)
                corrects_classes += torch.sum(preds_classes == label_classes)
                
                x_species = output[1].view(-1,3)
                _, preds_species = torch.max(x_species, 1)
                corrects_species += torch.sum(preds_species == label_species)
                
                torch.cuda.empty_cache()
                
                plt.imshow(transforms.ToPILImage()(img.squeeze(0)))
                plt.title('predicted classes: {}\n ground-truth classes:{}\n predicted species: {}\n ground-truth species:{}'\
                          .format(CLASSES[preds_classes],CLASSES[label_classes],SPECIES[preds_species],SPECIES[label_species]))
    
                plt.show()
                counts += 1
                
            epoch_acc_classes = corrects_classes.double() / counts
            epoch_acc_species = corrects_species.double() / counts
    
            print("epoch_acc_classes:{} epoch_acc_species:{}".format(epoch_acc_classes, epoch_acc_species))
                          
    
    visualize_model(network)
    

    最终验证结果如图

    example1
    example2 、
    然后两个任务验证集的准确率为:
    epoch_acc_classes:93.75% epoch_acc_species:86.25%

    单独进行classes分类和species分类时,验证集准确率均可达到90%以上,这也说明预训练的模型含有丰富特征。
    后续计划在数据集、网络结构、损失函数方面进行改善,以期提高多任务图像分类时,各个子任务的准确率。

    数据增强

    对数据进行随机裁剪,改变颜色,旋转,透视变换等操作,并将变换后的图片保存下来,生成新的数据集,补充到原训练集中。

    # 透视变换
    def random_warp(img, row, col):
        height, width, channels = img.shape
        # warp:
        random_margin = 60
        x1 = random.randint(-random_margin, random_margin)
        y1 = random.randint(-random_margin, random_margin)
        x2 = random.randint(width - random_margin - 1, width - 1)
        y2 = random.randint(-random_margin, random_margin)
        x3 = random.randint(width - random_margin - 1, width - 1)
        y3 = random.randint(height - random_margin - 1, height - 1)
        x4 = random.randint(-random_margin, random_margin)
        y4 = random.randint(height - random_margin - 1, height - 1)
    
        dx1 = random.randint(-random_margin, random_margin)
        dy1 = random.randint(-random_margin, random_margin)
        dx2 = random.randint(width - random_margin - 1, width - 1)
        dy2 = random.randint(-random_margin, random_margin)
        dx3 = random.randint(width - random_margin - 1, width - 1)
        dy3 = random.randint(height - random_margin - 1, height - 1)
        dx4 = random.randint(-random_margin, random_margin)
        dy4 = random.randint(height - random_margin - 1, height - 1)
    
        pts1 = np.float32([[x1, y1], [x2, y2], [x3, y3], [x4, y4]])
        pts2 = np.float32([[dx1, dy1], [dx2, dy2], [dx3, dy3], [dx4, dy4]])
        M_warp = cv2.getPerspectiveTransform(pts1, pts2)
        img_warp = cv2.warpPerspective(img, M_warp, (width, height))
        return M_warp, img_warp
    
    #改变颜色
    def random_light_color(img):
        #brightness
        B,G,R = cv2.split(img)#通道拆分,顺序为BGR,不是RBG
        
        b_rand = random.randint(-50,50)#生成随机数整数n a<=n<=b
        if b_rand == 0:
            pass
        elif b_rand > 0:
            lim = 255 - b_rand
            B[B > lim] = 255
            B[B <= lim] = (b_rand + B[B <= lim]).astype(img.dtype)
        elif b_rand < 0:
            lim = 0 - b_rand
            B[B < lim] = 0
            B[B >= lim] = (b_rand + B[B >= lim]).astype(img.dtype)
            
        g_rand = random.randint(-50,50)
        if g_rand == 0:
            pass
        elif g_rand > 0:
            lim = 255 - g_rand
            G[G > lim] = 255#R[],G[],B[]都是矩阵
            G[G <= lim] = (g_rand + G[G <= lim]).astype(img.dtype)
        elif g_rand < 0:
            lim = 0 - g_rand
            G[G < lim] = 0
            G[G >= lim] = (g_rand + G[G >= lim]).astype(img.dtype)
            
        r_rand = random.randint(-50,50)
        if r_rand == 0:
            pass
        elif r_rand > 0:
            lim = 255 - r_rand
            R[R > lim] = 255
            R[R <= lim] = (r_rand + R[R <= lim]).astype(img.dtype)
        elif r_rand < 0:
            lim = 0 - r_rand
            R[R < lim] = 0
            R[R >= lim] = (r_rand + R[R >= lim]).astype(img.dtype)
            
        img_merge = cv2.merge((B,G,R)) #合并之前分离出来进行变换的通道   
        #img = cv2.cvtColor(final_hsv,cv2.COLOR_HSV2BGR)
        return img_merge
    
    
    #对图片实现多种变换并保存
    def image_data_aug(img,crop=True,change_color=True,rotation=True,perspective_transform=False):
        if (crop or change_color or rotation or perspective_transform) == False:
            print("wrong input")
            return
        if crop:
            img = img[int(img.shape[0]/4):int(3*img.shape[0]/4),0:int(3*img.shape[1]/4)]#根据图像大小选择参数大小
        if change_color:
            img = random_light_color(img)
        if rotation:
            angle = random.randint(0,180)
            scale = random.uniform(0.75,1.25)
            M = cv2.getRotationMatrix2D((img.shape[1] / 2, img.shape[0] / 2), angle, scale) # center, angle, scale
            img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
        if perspective_transform:
            M_warp, img = random_warp(img, img[0], img[1])
        
        return img
    
    def create_image(ori_img_file,times=1):
        """
        param:  ori_img_file:记录训练数据集相关信息的csv文件路径+名称;
                times:      为数据集增加的倍数
        """
        ori_file = pd.read_csv(root_dir + ori_img_file,index_col=0)
        new_csv = []
        for time in range(times):
            print("图片第{}次生成中...".format(str(time)))
            with tqdm(range(len(ori_file))) as t:
                for idx in t:
                    ori_path = ori_file["path"][idx]
                    path = ori_path.replace(".jpg",  "_aug"+str(time) + "_" +  str(idx)+".jpg").replace("train","train_aug").replace("val","val_aug")
                    classes = ori_file["classes"][idx]
                    species = ori_file["species"][idx]
                    print(ori_path)
                    img = cv2.imread(ori_path)
    
                    try:
                        img = image_data_aug(img)
                        # """
                        #不知道为啥遍历到idx=680时,总会显示error: C:\projects\opencv-python\opencv\modules\highgui\src\window.
                        #cpp:325: error: (-215) size.width>0 && size.height>0 in function cv::imshow
                        #(已经改斜杠,确认路径没有中文,所以用了try...except这个结构)
                        cv2.imwrite(path,img)
                        new_csv.append([str(time)+"_"+ str(idx),path,classes,species])
                        
                    except:
                        continue 
        data_aug = pd.DataFrame(new_csv,columns=["index","path","classes","species"])
        data_aug.to_csv("data_aug.csv",index=0)
    
    

    相关文章

      网友评论

        本文标题:ResNet18迁移学习-动物多任务分类

        本文链接:https://www.haomeiwen.com/subject/boqbwhtx.html