美文网首页
PyTrch深度学习简明实战18 - 图像定位

PyTrch深度学习简明实战18 - 图像定位

作者: 薛东弗斯 | 来源:发表于2023-03-29 21:27 被阅读0次

    学习笔记19:图像定位 - pbc的成长之路 - 博客园 (cnblogs.com)
    数据集:Oxford-IIIT_Pets-OpenDataLab

    image.png
    image.png
    image.png
    image.png
    image.png
    image.png
    image.png
    image.png
    image.png

    数据集分析

    图片路径\dataset\images
    标签单独放在一个路径下面dataset/annotations/xmls/


    image.png

    boundbox:锚框,标注猫狗头部所在位置

    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from torch.utils import data
    
    import numpy as np
    import matplotlib.pyplot as plt
    %matplotlib inline
                                                                                                            
    import torchvision
    from torchvision import transforms
    import os   # 文件夹读取
    
    from lxml import etree    # 解析页面
    from matplotlib.patches import Rectangle  # 绘制正方形,锚框
    import glob  # 获取所有路径
    
    from PIL import Image  # 读取图像
    
    BATCH_SIZE = 16
    
    pil_img = Image.open(r'data/Oxford-IIIT Pets Dataset\dataset\images\Abyssinian_1.jpg')  # 取出第1张图片
    # np_img = np.array(pil_img)    # 绘图前,需要将图片格式转换为numpy的ndarray格式
    # np_img.shape   # (400, 600, 3)
    # plt.imshow(np_img)
    # plt.show()
    
    xml = open(r'data/Oxford-IIIT Pets Dataset/dataset/annotations/xmls/Abyssinian_1.xml').read()  # 取出标注信息
    sel = etree.HTML(xml)   # etree解析网络源文件
    width = sel.xpath('//size/width/text()')[0]    # 根目录 //,后面的路径size/width,取出width标签里面的文本 text(),600
    height = sel.xpath('//size/height/text()')[0]    # 获取高度,# 400
    xmin = sel.xpath('//bndbox/xmin/text()')[0]
    ymin = sel.xpath('//bndbox/ymin/text()')[0]
    xmax = sel.xpath('//bndbox/xmax/text()')[0]
    ymax = sel.xpath('//bndbox/ymax/text()')[0]
    
    width = int(width)
    height = int(height)
    xmin = int(xmin)
    ymin = int(ymin)
    xmax = int(xmax)
    ymax = int(ymax)
    
    # plt.imshow(np_img)
    # # Rectangle()  左下角,宽度,高度。 jupyter notebook用shift + Tab快捷键来显示帮助信息。 
    # rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='red') 
    # ax = plt.gca()   # get current axis 获取当前坐标系
    # ax.axes.add_patch(rect) # 在当前坐标系上添加矩形框
    # plt.show()
    
    img = pil_img.resize((224, 224))
    xmin = xmin/width*224   # 将新的左边点转换为相对于width/height的比值,这样无论resize到什么样的大小,都可以将图绘制出来
    ymin = ymin/height*224  # 无论原图被resize到多少,ymin/height 和 xmin/width 这个比值都是不变的。
    xmax = xmax/width*224   # 使用比值作为目标值,无论resize成多少,图像都可以绘制出来。
    ymax = ymax/height*224
    
    # plt.imshow(img)
    # rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='red')
    # ax = plt.gca()
    # ax.axes.add_patch(rect)
    # plt.show()
    
    # 创建输入
    images = glob.glob(r'data/Oxford-IIIT Pets Dataset\dataset\images\*.jpg')
    # images[:5]
    # ['dataset/images\\Abyssinian_1.jpg',
    #  'dataset/images\\Abyssinian_10.jpg',
    #  'dataset/images\\Abyssinian_100.jpg',
    #  'dataset/images\\Abyssinian_101.jpg',
    #  'dataset/images\\Abyssinian_102.jpg']
    # len(images) #7390
    xmls = glob.glob(r'data/Oxford-IIIT Pets Dataset/dataset/annotations/xmls/*.xml')
    # xmls[:5]
    # ['dataset/annotations/xmls\\Abyssinian_1.xml',
    #  'dataset/annotations/xmls\\Abyssinian_10.xml',
    #  'dataset/annotations/xmls\\Abyssinian_100.xml',
    #  'dataset/annotations/xmls\\Abyssinian_101.xml',
    #  'dataset/annotations/xmls\\Abyssinian_102.xml']
    # len(xmls)  # 3686, 说明并没有对所有的图片进行标注。
    # 需要取出这些文件的名称,只有图片名称与标注文件名称一致的内容 取出来,进行训练/推理
    xmls_names = [x.split('\\')[-1].split('.xml')[0] for x in xmls]
    imgs = [img for img in images if 
            img.split('\\')[-1].split('.jpg')[0] in xmls_names]  # 将图片名称在xmls_name中则接纳,否则抛弃。
    # len(imgs)  #3686
    # 此时imgs与xmls_name 是一一对应的。
    
    # 将xml文件转换为标签的格式。
    scal = 224
    
    def to_labels(path):
        xml = open(r'{}'.format(path)).read()    # 打开并读取路径。r防止转义
        sel = etree.HTML(xml)                    # 解析xml
        width = int(sel.xpath('//size/width/text()')[0])     # 获取图片宽度
        height = int(sel.xpath('//size/height/text()')[0])
        xmin = int(sel.xpath('//bndbox/xmin/text()')[0])
        ymin = int(sel.xpath('//bndbox/ymin/text()')[0])
        xmax = int(sel.xpath('//bndbox/xmax/text()')[0])
        ymax = int(sel.xpath('//bndbox/ymax/text()')[0])
        return [xmin/width, ymin/height, xmax/width, ymax/height]   # 获取各指标所在位置,最小比例值
    
    labels = [to_labels(path) for path in xmls]
    out1_label, out2_label, out3_label, out4_label = list(zip(*labels))
    # len(out1_label), len(out2_label), len(out3_label), len(out4_label)  # (3686, 3686, 3686, 3686)
    index = np.random.permutation(len(imgs))   # 先对所有图片创建与图片长度相同的乱序的序号
    images = np.array(imgs)[index]
    labels = np.array(labels)[index]
    # labels.shape   #(3686, 4)
    labels = labels.astype(np.float32)
    
    # 切分训练集与测试集
    i = int(len(imgs)*0.8)
    train_images = images[:i]
    train_labels = labels[:i]
    test_images = images[i: ]
    test_labels = labels[i:]
    
    transform = transforms.Compose([
                        transforms.Resize((224, 224)),
                        transforms.ToTensor(),
    ])
    
    class Oxford_dataset(data.Dataset):
        def __init__(self, img_paths, labels_list):
            self.imgs = img_paths
            self.labels = labels_list
            
        def __getitem__(self, index):
            img = self.imgs[index]
            pil_img = Image.open(img)
            img_tensor = transform(pil_img)
            label_1,label_2,label_3,lable_4 = self.labels[index]
            return img_tensor,label_1,label_2,label_3,lable_4 
            
        def __len__(self):
            return len(self.imgs)
        
    
    train_dataset = Oxford_dataset(train_images, train_labels)
    test_dataset = Oxford_dataset(test_images, test_labels)
    
    train_dl = data.DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle=True)
    test_dl = data.DataLoader(test_dataset,batch_size=BATCH_SIZE)
    
    imgs_batch,out1_batch,out2_batch,out3_batch,out4_batch = next(iter(train_dl))
    # imgs_batch.shape, out1_batch.shape   # (torch.Size([16, 3, 224, 224]), torch.Size([16]))
    
    # 图像可视化
    # plt.figure(figsize=(12, 8))
    # for i,(img, label1, label2,
    #             label3,label4,) in enumerate(zip(imgs_batch[:2],    # zip 同时进行迭代。 :2 表示对前2张图片迭代
    #                                              out1_batch[:2], 
    #                                              out2_batch[:2], 
    #                                              out3_batch[:2], 
    #                                              out4_batch[:2])):
    #     img = (img.permute(1,2,0).numpy() + 1)/2    # permute 将batch扔到最后
    #     plt.subplot(2, 3, i+1)                       # 2行3列第i+1个位置。i从0开始,但图片位置是从1开始
    #     plt.imshow(img)
    #     xmin, ymin, xmax, ymax = label1*224, label2*224, label3*224, label4*224,    # 现在返回的label1/label2/label3/label4为图像的相对位置,需要乘以224 由相对位置变为实际位置
    #     rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='red')
    #     ax = plt.gca()
    #     ax.axes.add_patch(rect)
    
    # 创建定位模型
    resnet = torchvision.models.resnet101(pretrained=True)
    in_f = resnet.fc.in_features   # in_f与卷积部分的输出一样大,    
    # print(in_f)   # 2048
    # len(list(resnet.children()))   # resnet模型通过resnet.children() 打印子层,层数为10
    # list(resnet.children())[-1]    # Linear(in_features=2048, out_features=1000, bias=True)  最后一层为linear层
    # 现在需要把除去linear层以外的所有层提起出来,用于提取特征 list(resnet.children())[:-1] 
    # 需要用nn.Sequential来创建卷积基,
    class Net(nn.Module):
        def __init__(self):      # 初始化部分,初始化了一个resnet卷积基,4个全连接层用于分别输出4个不同的坐标值(相对位置)
            super(Net, self).__init__()
            self.conv_base = nn.Sequential(*list(resnet.children())[:-1])   # 用* 进行解包,这样就可以得到卷积基。提取除了最后一层以外的所有层
    #       self.conv_base = nn.Sequential(*list(resnet.children())[:5])   #  提取卷积基的前5层
            self.fc1 = nn.Linear(in_f, 1)   # 输出1个标量值
            self.fc2 = nn.Linear(in_f, 1)
            self.fc3 = nn.Linear(in_f, 1)
            self.fc4 = nn.Linear(in_f, 1)
    
        def forward(self, x):
            x = self.conv_base(x)       # 卷积基上面调用
            x = x.view(x.size(0), -1)
            x1 = self.fc1(x)
            x2 = self.fc2(x)
            x3 = self.fc3(x)
            x4 = self.fc4(x)
            return x1, x2, x3, x4 
        
    model = Net()
    
    if torch.cuda.is_available():
        model.to('cuda')
        
    loss_fn = nn.MSELoss()  # 图形定位 本质上是一个回归问题,输出准确的位置,因此用MESLoss
    
    from torch.optim import lr_scheduler
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    
    def fit(epoch, model, trainloader, testloader):
        total = 0
        running_loss = 0
        
        model.train()
        for x, y1, y2, y3, y4 in trainloader:
            if torch.cuda.is_available():
                x, y1, y2, y3, y4 = (x.to('cuda'), 
                                     y1.to('cuda'), y2.to('cuda'),
                                     y3.to('cuda'), y4.to('cuda'))       
            y_pred1, y_pred2, y_pred3, y_pred4 = model(x)
            
            loss1 = loss_fn(y_pred1, y1)
            loss2 = loss_fn(y_pred2, y2)
            loss3 = loss_fn(y_pred3, y3)
            loss4 = loss_fn(y_pred4, y4)
            loss = loss1 + loss2 + loss3 + loss4
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                running_loss += loss.item()
        exp_lr_scheduler.step()
        epoch_loss = running_loss / len(trainloader.dataset)
            
            
        test_total = 0
        test_running_loss = 0 
        
        model.eval()
        with torch.no_grad():
            for x, y1, y2, y3, y4 in testloader:
                if torch.cuda.is_available():
                    x, y1, y2, y3, y4 = (x.to('cuda'), 
                                         y1.to('cuda'), y2.to('cuda'),
                                         y3.to('cuda'), y4.to('cuda'))
                y_pred1, y_pred2, y_pred3, y_pred4 = model(x)
                loss1 = loss_fn(y_pred1, y1)
                loss2 = loss_fn(y_pred2, y2)
                loss3 = loss_fn(y_pred3, y3)
                loss4 = loss_fn(y_pred4, y4)
                loss = loss1 + loss2 + loss3 + loss4
                test_running_loss += loss.item()
                
        epoch_test_loss = test_running_loss / len(testloader.dataset)
            
        print('epoch: ', epoch, 
              'loss: ', round(epoch_loss, 3),
              'test_loss: ', round(epoch_test_loss, 3),
                 )
            
        return epoch_loss, epoch_test_loss
    
    epochs = 10
    
    train_loss = []
    test_loss = []
    
    for epoch in range(epochs):
        epoch_loss, epoch_test_loss = fit(epoch, model, train_dl, test_dl)
        train_loss.append(epoch_loss)
        test_loss.append(epoch_test_loss)
        
    # plt.figure()
    # plt.plot(range(1, len(train_loss)+1), train_loss, 'r', label='Training loss')
    # plt.plot(range(1, len(train_loss)+1), test_loss, 'bo', label='Validation loss')
    # plt.title('Training and Validation Loss')
    # plt.xlabel('Epoch')
    # plt.ylabel('Loss Value')
    # plt.legend()
    # plt.show()
    
    # 模型保存
    PATH = 'location_model.pth'
    torch.save(model.state_dict(), PATH)
    
    # plt.figure(figsize=(8, 24))
    # imgs, _, _, _, _ = next(iter(test_dl))  # 只取出图片
    # if torch.cuda.is_available():
    #     imgs = imgs.to('cuda')
    # out1, out2, out3, out4 = model(imgs)   # 对图片进行预测
    # for i in range(6):
    #     plt.subplot(6, 1, i+1)
    #     plt.imshow(imgs[i].permute(1,2,0).cpu().numpy())
    #     xmin, ymin, xmax, ymax = (out1[i].item()*224, 
    #                               out2[i].item()*224, 
    #                               out3[i].item()*224, 
    #                               out4[i].item()*224)
    #     rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='red')   # 绘制矩形框
    #     ax = plt.gca()
    #     ax.axes.add_patch(rect)
    

    相关文章

      网友评论

          本文标题:PyTrch深度学习简明实战18 - 图像定位

          本文链接:https://www.haomeiwen.com/subject/jefqddtx.html