美文网首页
Pytorch之目标检测(多目标检测,Multi Object

Pytorch之目标检测(多目标检测,Multi Object

作者: 深思海数_willschang | 来源:发表于2021-09-06 14:40 被阅读0次

    示例数据 COCO 2014

    http://cocodataset.org/#home

    image.png
    • 数据下载 get_coco_dataset.sh
    #!/bin/bash
    
    # Clone COCO API
    git clone https://github.com/pdollar/coco
    cd coco
    
    mkdir images
    cd images
    
    # Download Images
    wget -c https://pjreddie.com/media/files/train2014.zip
    wget -c https://pjreddie.com/media/files/val2014.zip
    
    # Unzip
    unzip -q train2014.zip
    unzip -q val2014.zip
    
    cd ..
    
    # Download COCO Metadata
    wget -c https://pjreddie.com/media/files/instances_train-val2014.zip
    wget -c https://pjreddie.com/media/files/coco/5k.part
    wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part
    wget -c https://pjreddie.com/media/files/coco/labels.tgz
    tar xzf labels.tgz
    unzip -q instances_train-val2014.zip
    
    # Set Up Image Lists
    paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt
    paste <(awk "{print \"$PWD\"}" <trainvalno5k.part) trainvalno5k.part | tr -d '\t' > trainvalno5k.txt
    

    window系统下可直接复制sh里的数据地址到浏览器即可实现数据下载,省得再装相关sh执行器。
    https://pjreddie.com/media/files/instances_train-val2014.zip

    YOLO v3 之darknet

    https://github.com/pjreddie/darknet

    image.png

    辅助函数 myutils.py

    import torch
    from torch import nn
    
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    def parse_model_config(path2file):
        cfg_file = open(path2file, 'r')
        lines = cfg_file.read().split('\n')
    
        lines = [x for x in lines if x and not x.startswith('#')]
        lines = [x.rstrip().lstrip() for x in lines] 
        
        blocks_list = []
        for line in lines:
            # start of a new block
            if line.startswith('['): 
                blocks_list.append({})
                blocks_list[-1]['type'] = line[1:-1].rstrip()
            else:
                key, value = line.split("=")
                value = value.strip()
                blocks_list[-1][key.rstrip()] = value.strip()
    
        return blocks_list
    
    
    def create_layers(blocks_list):
        hyperparams = blocks_list[0]
        channels_list = [int(hyperparams["channels"])]
        module_list = nn.ModuleList()
        
        for layer_ind, layer_dict in enumerate(blocks_list[1:]):
            modules = nn.Sequential()
            
            if layer_dict["type"] == "convolutional":
                filters = int(layer_dict["filters"])
                kernel_size = int(layer_dict["size"])
                pad = (kernel_size - 1) // 2
                bn =layer_dict.get("batch_normalize",0)    
                
                
                conv2d = nn.Conv2d(
                            in_channels=channels_list[-1],
                            out_channels=filters,
                            kernel_size=kernel_size,
                            stride=int(layer_dict["stride"]),
                            padding=pad,
                            bias=not bn)
                modules.add_module("conv_{0}".format(layer_ind), conv2d)
                
                if bn:
                    bn_layer = nn.BatchNorm2d(filters,momentum=0.9, eps=1e-5)
                    modules.add_module("batch_norm_{0}".format(layer_ind), bn_layer)
                    
                    
                if layer_dict["activation"] == "leaky":
                    activn = nn.LeakyReLU(0.1)
                    modules.add_module("leaky_{0}".format(layer_ind), activn)
                    
            elif layer_dict["type"] == "upsample":
                stride = int(layer_dict["stride"])
                upsample = nn.Upsample(scale_factor = stride)
                modules.add_module("upsample_{}".format(layer_ind), upsample) 
                
    
            elif layer_dict["type"] == "shortcut":
                backwards =int(layer_dict["from"])
                filters = channels_list[1:][backwards]
                modules.add_module("shortcut_{}".format(layer_ind), EmptyLayer())
                
            elif layer_dict["type"] == "route":
                layers = [int(x) for x in layer_dict["layers"].split(",")]
                filters = sum([channels_list[1:][l] for l in layers])
                modules.add_module("route_{}".format(layer_ind), EmptyLayer())
                
            elif layer_dict["type"] == "yolo":
                anchors = [int(a) for a in layer_dict["anchors"].split(",")]
                anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
    
                mask = [int(m) for m in layer_dict["mask"].split(",")]
                
                anchors = [anchors[i] for i in mask]
                
                num_classes = int(layer_dict["classes"])
                img_size = int(hyperparams["height"])
                
                yolo_layer = YOLOLayer(anchors, num_classes, img_size)
                modules.add_module("yolo_{}".format(layer_ind), yolo_layer)
                
            module_list.append(modules)       
            channels_list.append(filters)
    
        return hyperparams, module_list        
    
    
    
    class EmptyLayer(nn.Module):
        def __init__(self):
            super(EmptyLayer, self).__init__()
            
            
    class YOLOLayer(nn.Module):
    
        def __init__(self, anchors, num_classes, img_dim=416):
            super(YOLOLayer, self).__init__()
            self.anchors = anchors
            self.num_anchors = len(anchors)
            self.num_classes = num_classes
            self.img_dim = img_dim
            self.grid_size = 0 
            
            
        def forward(self, x_in):
            batch_size = x_in.size(0)
            grid_size = x_in.size(2)
            devide = x_in.device
            
            prediction = x_in.view(batch_size, self.num_anchors, 
                                 self.num_classes + 5, grid_size, grid_size)
            prediction = prediction.permute(0, 1, 3, 4, 2)
            prediction = prediction.contiguous()
            
            obj_score = torch.sigmoid(prediction[..., 4]) 
            pred_cls = torch.sigmoid(prediction[..., 5:]) 
            
            if grid_size != self.grid_size:
                self.compute_grid_offsets(grid_size, cuda=x_in.is_cuda)
                
            pred_boxes = self.transform_outputs(prediction) 
            
            output = torch.cat(
                (
                    pred_boxes.view(batch_size, -1, 4),
                    obj_score.view(batch_size, -1, 1),
                    pred_cls.view(batch_size, -1, self.num_classes),
                ), -1,)
            return output        
        
        
            
        def compute_grid_offsets(self, grid_size, cuda=True):
            self.grid_size = grid_size
            self.stride = self.img_dim / self.grid_size
            
            self.grid_x = torch.arange(grid_size, device=device).repeat(1, 1, grid_size, 1 ).type(torch.float32)
            self.grid_y = torch.arange(grid_size, device=device).repeat(1, 1, grid_size, 1).transpose(3, 2).type(torch.float32)
            
            scaled_anchors = [(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors]
            self.scaled_anchors=torch.tensor(scaled_anchors,device=device)
            
            self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
            self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
            
            
            
        def transform_outputs(self,prediction):
            device = prediction.device
            x = torch.sigmoid(prediction[..., 0]) # Center x
            y = torch.sigmoid(prediction[..., 1]) # Center y
            w = prediction[..., 2] # Width
            h = prediction[..., 3] # Height
    
            pred_boxes = torch.zeros_like(prediction[..., :4]).to(device)
            pred_boxes[..., 0] = x.data + self.grid_x
            pred_boxes[..., 1] = y.data + self.grid_y
            pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
            pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
            
            return pred_boxes * self.stride
    

    示例包引入

    %matplotlib inline
    import matplotlib.pyplot as plt
    import matplotlib.pylab as plab
    from PIL import Image, ImageDraw, ImageFont
    import numpy as np
    import pandas as pd
    import os
    import copy
    import random
    import collections
    from sklearn.model_selection import StratifiedShuffleSplit
    
    import torch
    import torch.nn as nn
    from torch.utils.data import Dataset, DataLoader, random_split, Subset
    import torchvision.transforms as transforms
    from torchvision import models,utils, datasets
    import torchvision.transforms.functional as TF
    from torchvision.transforms.functional import to_pil_image
    import torch.nn.functional as F
    from torch import optim
    from torch.optim.lr_scheduler import ReduceLROnPlateau
    from torchsummary import summary
    
    from myutils import parse_model_config, create_layers
    
    
    # CPU or GPU
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    # dataloader里的多进程用到num_workers
    workers = 0 if os.name=='nt' else 4
    

    数据处理 探索

    # 创建cocodataset
    class CocoDataset(Dataset):
        def __init__(self, path_list_file, transform=None, trans_params=None):
            # build image file path
            with open(path_list_file, 'r') as file:
                self.path_imgs = file.readlines()
            self.path_imgs = ['./data/mod' + path for path in self.path_imgs]
            print(self.path_imgs[1])
            # get the labels path
            self.path_labels = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') 
                               for path in self.path_imgs]
            print(self.path_labels[1])
            self.trans_params = trans_params
            self.transform = transform
            
        def __len__(self):
            return len(self.path_imgs)
        
        def __getitem__(self, idx):
            path_img = self.path_imgs[idx % len(self.path_imgs)].rstrip()
            img = Image.open(path_img).convert('RGB')
            path_label = self.path_labels[idx % len(self.path_imgs)].rstrip()
            labels = None
            if os.path.exists(path_label):
                labels = np.loadtxt(path_label).reshape(-1, 5)
            if self.transform:
                img, labels = self.transform(img, labels, self.trans_params)
                
            return img, labels, path_img
        
    # 训练验证dataset
    root_data = './data/mod'
    path_train_list = os.path.join(root_data, 'trainvalno5k.txt')
    coco_train = CocoDataset(path_train_list)
    print(len(coco_train))
    # val dataset
    path_val_list = os.path.join(root_data, '5k.txt')
    coco_val = CocoDataset(path_val_list, transform=None, trans_params=None)
    print(len(coco_val))
    """
    ./data/mod/images/train2014/COCO_train2014_000000000025.jpg
    
    ./data/mod/labels/train2014/COCO_train2014_000000000025.txt
    
    117264
    ./data/mod/images/val2014/COCO_val2014_000000000192.jpg
    
    ./data/mod/labels/val2014/COCO_val2014_000000000192.txt
    
    5000
    """
    

    查看数据情况

    img, labels, path2img = coco_train[1] 
    print("image size:", img.size, type(img))
    print("labels shape:", labels.shape, type(labels))
    print("labels \n", labels)
    
    print('--'*10)
    img, labels, path2img = coco_val[7] 
    print("image size:", img.size, type(img))
    print("labels shape:", labels.shape, type(labels))
    print("labels \n", labels)
    """
    image size: (640, 426) <class 'PIL.Image.Image'>
    labels shape: (2, 5) <class 'numpy.ndarray'>
    labels 
     [[23.        0.770336  0.489695  0.335891  0.697559]
     [23.        0.185977  0.901608  0.206297  0.129554]]
    --------------------
    image size: (640, 427) <class 'PIL.Image.Image'>
    labels shape: (3, 5) <class 'numpy.ndarray'>
    labels 
     [[20.        0.539742  0.521429  0.758641  0.957143]
     [20.        0.403469  0.470714  0.641656  0.695948]
     [20.        0.853039  0.493279  0.293922  0.982061]]
    """
    
    path2cocoNames="./data/mod/coco.names"
    fp = open(path2cocoNames, "r")
    coco_names = fp.read().split("\n")[:-1]
    print("number of classese:", len(coco_names))
    print(coco_names)
    """
    number of classese: 80
    ['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
    
    """
    

    显示图片

    # rescale normalized bounding boxes to the original image size
    def rescale_bbox(bb, W, H):
        x,y,w,h = bb
        return [x*W, y*H, w*W, h*H]
    
    # 显示图片
    COLORS = np.random.randint(0, 255, size=(80, 3),dtype="uint8")
    
    def show_img_bbox(img,targets):
        if torch.is_tensor(img):
            img = to_pil_image(img)
        if torch.is_tensor(targets):
            targets = targets.numpy()[:,1:]
            
        W, H = img.size
        draw = ImageDraw.Draw(img)
        
        for tg in targets:
            id_ = int(tg[0])
            bbox = tg[1:]
            bbox = rescale_bbox(bbox,W,H)
            xc, yc, w, h = bbox
            
            color = [int(c) for c in COLORS[id_]]
            name = coco_names[id_]
            
            draw.rectangle(((xc-w/2, yc-h/2), (xc+w/2, yc+h/2)),outline=tuple(color),width=3)
            draw.text((xc-w/2,yc-h/2),name, fill=(255,255,255,0))
        plt.imshow(np.array(img))
    
    • 训练数据
    np.random.seed(2)
    rnd_ind = np.random.randint(len(coco_train))
    img, labels, path2img = coco_train[rnd_ind] 
    print(img.size, labels.shape)
    
    plt.rcParams['figure.figsize'] = (20, 10)
    show_img_bbox(img,labels)
    """
    (640, 428) (2, 5)
    """
    
    image.png
    • 验证数据
    np.random.seed(0)
    rnd_ind = np.random.randint(len(coco_val))
    img, labels, path2img = coco_val[rnd_ind] 
    print(img.size, labels.shape)
    
    plt.rcParams['figure.figsize'] = (20, 10)
    show_img_bbox(img,labels)
    """
    (640, 480) (3, 5)
    """
    
    image.png

    数据转换

    # 数据转换 data transforming
    # 辅助函数 pad_to_square
    def pad_to_square(img, boxes, pad_value=0, normalized_labels=True):
        w, h = img.size
        w_factor, h_factor = (w,h) if normalized_labels else (1, 1)
        
        dim_diff = np.abs(h - w)
        pad1 = dim_diff // 2
        pad2 = dim_diff - pad1
        
        if h<=w:
            left, top, right, bottom= 0, pad1, 0, pad2
        else:
            left, top, right, bottom= pad1, 0, pad2, 0
        padding = (left, top, right, bottom)
    
        img_padded = TF.pad(img, padding=padding, fill=pad_value)
        w_padded, h_padded = img_padded.size
                
        x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
        y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
        x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
        y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)    
        
        x1 += padding[0] # left
        y1 += padding[1] # top
        x2 += padding[2] # right
        y2 += padding[3] # bottom
                
        boxes[:, 1] = ((x1 + x2) / 2) / w_padded
        boxes[:, 2] = ((y1 + y2) / 2) / h_padded
        boxes[:, 3] *= w_factor / w_padded
        boxes[:, 4] *= h_factor / h_padded
    
        return img_padded, boxes
    
    def hflip(image, labels):
        image = TF.hflip(image)
        labels[:, 1] = 1.0 - labels[:, 1]
        return image, labels
    
    
    def transformer(image, labels, params):
        if params["pad2square"] is True:
            image,labels = pad_to_square(image, labels)
        
        image = TF.resize(image,params["target_size"])
    
        if random.random() < params["p_hflip"]:
            image,labels = hflip(image,labels)
    
        image = TF.to_tensor(image)
        targets = torch.zeros((len(labels), 6))
        targets[:, 1:] = torch.from_numpy(labels)
        
        return image, targets
    
    trans_params_train = {
        "target_size" : (416, 416),
        "pad2square": True,
        "p_hflip" : 1.0,
        "normalized_labels": True,
    }
    coco_train = CocoDataset(path_train_list, 
                            transform=transformer,
                             trans_params=trans_params_train)
    
    trans_params_val = {
        "target_size" : (416, 416),
        "pad2square": True,
        "p_hflip" : 0.0,
        "normalized_labels": True,
    }
    coco_val = CocoDataset(path_val_list,
                          transform=transformer,
                          trans_params=trans_params_val)
    

    数据转换后图片情况

    np.random.seed(2)
    rnd_ind=np.random.randint(len(coco_train))
    img, targets, path2img = coco_train[rnd_ind] 
    print("image shape:", img.shape)
    print("labels shape:", targets.shape) 
    
    plt.rcParams['figure.figsize'] = (20, 10)
    COLORS = np.random.randint(0, 255, size=(80, 3),dtype="uint8")
    show_img_bbox(img,targets)
    
    
    np.random.seed(0)
    rnd_ind=np.random.randint(len(coco_val))
    img, targets, path2img = coco_val[rnd_ind] 
    print("image shape:", img.shape)
    print("labels shape:", targets.shape) 
    
    plt.rcParams['figure.figsize'] = (20, 10)
    COLORS = np.random.randint(0, 255, size=(80, 3),dtype="uint8")
    show_img_bbox(img,targets)
    
    trans train.png
    trans val.png

    定义dataloader

    batch_size = 4
    def collate_fn(batch):
        imgs, targets, paths = list(zip(*batch))
        
        # Remove empty boxes
        targets = [boxes for boxes in targets if boxes is not None]
        
        # set the sample index 
        for b_i, boxes in enumerate(targets):
            boxes[:, 0] = b_i
        targets = torch.cat(targets, 0)
        imgs = torch.stack([img for img in imgs])
        return imgs, targets, paths
    
    train_dl = DataLoader(
            coco_train,
            batch_size=batch_size,
            shuffle=True,
            num_workers=0,
            pin_memory=True,
            collate_fn=collate_fn,
            )
    
    
    val_dl = DataLoader(
            coco_val,
            batch_size=batch_size,
            shuffle=False,
            num_workers=0,
            pin_memory=True,
            collate_fn=collate_fn,
            )
    
    
    # 验证数据加载是否正确
    torch.manual_seed(0)
    for imgs_batch,tg_batch,path_batch in train_dl:
        break
    print(imgs_batch.shape)
    print(tg_batch.shape,tg_batch.dtype)
    
    for imgs_batch,tg_batch,path_batch in val_dl:
        break
    print(imgs_batch.shape)
    print(tg_batch.shape,tg_batch.dtype)
    """
    torch.Size([4, 3, 416, 416])
    torch.Size([30, 6]) torch.float32
    torch.Size([4, 3, 416, 416])
    torch.Size([57, 6]) torch.float32
    """
    

    构建模型

    以下YOLO v3图片及网络结构介绍均转载自:https://blog.csdn.net/leviopku/article/details/82660381

    YOLO v3
    DarkNet53

    DBL: 如上图左下角所示,也就是代码中的Darknetconv2d_BN_Leaky,是yolo_v3的基本组件。就是卷积+BN+Leaky relu。对于v3来说,BN和leaky relu已经是和卷积层不可分离的部分了(最后一层卷积除外),共同构成了最小组件。
    resn:n代表数字,有res1,res2, … ,res8等等,表示这个res_block里含有多少个res_unit。这是yolo_v3的大组件,yolo_v3开始借鉴了ResNet的残差结构,使用这种结构可以让网络结构更深(从v2的darknet-19上升到v3的darknet-53,前者没有残差结构)。对于res_block的解释,可以在图1的右下角直观看到,其基本组件也是DBL。
    concat:张量拼接。将darknet中间层和后面的某一层的上采样进行拼接。拼接的操作和残差层add的操作是不一样的,拼接会扩充张量的维度,而add只是直接相加不会导致张量维度的改变。

    整个v3结构里面,是没有池化层和全连接层的。前向传播过程中,张量的尺寸变换是通过改变卷积核的步长来实现的,比如stride=(2, 2),这就等于将图像边长缩小了一半(即面积缩小到原来的1/4)。在yolo_v2中,要经历5次缩小,会将特征图缩小到原输入尺寸的1 / 2 5 1/2^51/25,即1/32。输入为416x416,则输出为13x13(416/32=13)。
    yolo_v3也和v2一样,backbone都会将输出特征图缩小到输入的1/32。所以,通常都要求输入图片是32的倍数。

    版权声明:本片断内容为CSDN博主「木盏」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
    原文链接:https://blog.csdn.net/leviopku/article/details/82660381

    path_config = "./config/yolov3.cfg"
    # Defining the Darknet Model
    class Darknet(nn.Module):
        def __init__(self, config_path, img_size=416):
            super(Darknet, self).__init__()
            self.blocks_list = parse_model_config(config_path)
            self.hyperparams, self.module_list = create_layers(self.blocks_list)
            self.img_size = img_size
            
        def forward(self, x):
            img_dim = x.shape[2]
            layer_outputs, yolo_outputs = [], []
            
            for block, module in zip(self.blocks_list[1:], self.module_list):
                if block["type"] in ["convolutional", "upsample", "maxpool"]:
                    x = module(x)           
                elif block["type"] == "shortcut":
                    layer_ind = int(block["from"])
                    x = layer_outputs[-1] + layer_outputs[layer_ind]
                elif block["type"] == "yolo":
                    x = module[0](x)
                    yolo_outputs.append(x)
                elif block["type"] == "route":
                    x = torch.cat([layer_outputs[int(l_i)] for l_i in block["layers"].split(",")], 1)
                layer_outputs.append(x)
            yolo_out_cat = torch.cat(yolo_outputs, 1)
            return yolo_out_cat, yolo_outputs
        
    
    model = Darknet(path_config).to(device)
    print(model)
    """
    Darknet(
      (module_list): ModuleList(
        (0): Sequential(
          (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (batch_norm_0): BatchNorm2d(32, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
          (leaky_0): LeakyReLU(negative_slope=0.1)
        )
        (1): Sequential(
          (conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
          (leaky_1): LeakyReLU(negative_slope=0.1)
        )
      ...
        (105): Sequential(
          (conv_105): Conv2d(256, 255, kernel_size=(1, 1), stride=(1, 1))
        )
        (106): Sequential(
          (yolo_106): YOLOLayer()
        )
      )
    )
    """
    
    • 查看一下模型信息
    print(next(model.parameters()).device)
    
    dummy_img = torch.rand(1,3,416,416).to(device)
    with torch.no_grad():
        dummy_out_cat, dummy_out = model.forward(dummy_img)
        print(dummy_out_cat.shape)
        print(dummy_out[0].shape,dummy_out[1].shape,dummy_out[2].shape)
    """
    cuda:0
    torch.Size([1, 10647, 85])
    torch.Size([1, 507, 85]) torch.Size([1, 2028, 85]) torch.Size([1, 8112, 85])
    """
    

    损失函数

    def get_loss_batch(output,targets, params_loss, opt=None):
        ignore_thres = params_loss["ignore_thres"]
        scaled_anchors = params_loss["scaled_anchors"]    
        mse_loss = params_loss["mse_loss"]
        bce_loss = params_loss["bce_loss"]
        
        num_yolos = params_loss["num_yolos"]
        num_anchors = params_loss["num_anchors"]
        obj_scale = params_loss["obj_scale"]
        noobj_scale = params_loss["noobj_scale"]
        
        loss = 0.0
        for yolo_ind in range(num_yolos):
            yolo_out = output[yolo_ind]
            batch_size, num_bbxs, _ = yolo_out.shape
            
            # get grid size
            gz_2 = num_bbxs / num_anchors
            grid_size = int(np.sqrt(gz_2))
            
            yolo_out = yolo_out.view(batch_size,num_anchors,grid_size,grid_size,-1)
            
            pred_boxes = yolo_out[:,:,:,:,:4]
            x,y,w,h = transform_bbox(pred_boxes, scaled_anchors[yolo_ind])
            pred_conf = yolo_out[:,:,:,:,4]
            pred_cls_prob = yolo_out[:,:,:,:,5:]
            
            yolo_targets = get_yolo_targets({
                            "pred_cls_prob": pred_cls_prob,
                            "pred_boxes":pred_boxes,    
                            "targets": targets,    
                            "anchors": scaled_anchors[yolo_ind],    
                            "ignore_thres": ignore_thres,
                        }) 
            
            obj_mask = yolo_targets["obj_mask"]        
            noobj_mask = yolo_targets["noobj_mask"]            
            tx = yolo_targets["tx"]                
            ty = yolo_targets["ty"]                    
            tw = yolo_targets["tw"]                        
            th = yolo_targets["th"]                            
            tcls = yolo_targets["tcls"]                                
            t_conf = yolo_targets["t_conf"]
            
            loss_x = mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = mse_loss(h[obj_mask], th[obj_mask])
            
            loss_conf_obj = bce_loss(pred_conf[obj_mask], t_conf[obj_mask])
            loss_conf_noobj = bce_loss(pred_conf[noobj_mask], t_conf[noobj_mask])
            loss_conf = obj_scale * loss_conf_obj + noobj_scale * loss_conf_noobj
            loss_cls = bce_loss(pred_cls_prob[obj_mask], tcls[obj_mask])
            loss += loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
            
        if opt is not None:
            opt.zero_grad()
            loss.backward()
            opt.step()
            
        return loss.item()
    
    def transform_bbox(bbox, anchors):
        x = bbox[:,:,:,:,0]
        y = bbox[:,:,:,:,1]
        w = bbox[:,:,:,:,2]
        h = bbox[:,:,:,:,3]
        anchor_w = anchors[:, 0].view((1, 3, 1, 1))
        anchor_h = anchors[:, 1].view((1, 3, 1, 1))       
        
        x = x - x.floor()
        y = y - y.floor()
        w = torch.log(w / anchor_w + 1e-16)
        h = torch.log(h / anchor_h + 1e-16)
        return x, y, w, h
    
    
    def get_yolo_targets(params):
        pred_boxes=params["pred_boxes"]
        pred_cls_prob=params["pred_cls_prob"]
        target=params["targets"]
        anchors=params["anchors"] 
        ignore_thres=params["ignore_thres"] 
    
        batch_size = pred_boxes.size(0)
        num_anchors = pred_boxes.size(1)
        grid_size = pred_boxes.size(2)
        num_cls = pred_cls_prob.size(-1)
        
        
        sizeT=batch_size, num_anchors, grid_size, grid_size
        obj_mask = torch.zeros(sizeT,device=device,dtype=torch.uint8)
        noobj_mask = torch.ones(sizeT,device=device,dtype=torch.uint8)
        tx = torch.zeros(sizeT, device=device, dtype=torch.float32)
        ty= torch.zeros(sizeT, device=device, dtype=torch.float32)
        tw= torch.zeros(sizeT, device=device, dtype=torch.float32)
        th= torch.zeros(sizeT, device=device, dtype=torch.float32)
        
        sizeT=batch_size, num_anchors, grid_size, grid_size, num_cls
        tcls= torch.zeros(sizeT, device=device, dtype=torch.float32)
        
        target_bboxes = target[:, 2:] * grid_size
        t_xy = target_bboxes[:, :2]
        t_wh = target_bboxes[:, 2:]
        t_x, t_y = t_xy.t()
        t_w, t_h = t_wh.t()
    
        grid_i, grid_j = t_xy.long().t()
        
        iou_with_anchors=[get_iou_WH(anchor, t_wh) for anchor in anchors]
        iou_with_anchors = torch.stack(iou_with_anchors)
        best_iou_wa, best_anchor_ind = iou_with_anchors.max(0)
        
        batch_inds, target_labels = target[:, :2].long().t()
        obj_mask[batch_inds, best_anchor_ind, grid_j, grid_i] = 1
        noobj_mask[batch_inds, best_anchor_ind, grid_j, grid_i] = 0
    
        for ind, iou_wa in enumerate(iou_with_anchors.t()):
            noobj_mask[batch_inds[ind], iou_wa > ignore_thres, grid_j[ind], grid_i[ind]] = 0
            
            
        tx[batch_inds, best_anchor_ind, grid_j, grid_i] = t_x - t_x.floor()
        ty[batch_inds, best_anchor_ind, grid_j, grid_i] = t_y - t_y.floor()
        
    
        anchor_w=anchors[best_anchor_ind][:, 0]
        tw[batch_inds, best_anchor_ind, grid_j, grid_i] = torch.log(t_w / anchor_w + 1e-16)
        
        anchor_h=anchors[best_anchor_ind][:, 1]
        th[batch_inds, best_anchor_ind, grid_j, grid_i] = torch.log(t_h / anchor_h + 1e-16)
        
        tcls[batch_inds, best_anchor_ind, grid_j, grid_i, target_labels] = 1
        
        output={
            "obj_mask" : obj_mask,
            "noobj_mask" : noobj_mask,
            "tx": tx,
            "ty": ty,
            "tw": tw,
            "th": th,
            "tcls": tcls,
            "t_conf": obj_mask.float(),
        }
        return output
    
    def get_iou_WH(wh1, wh2):
        wh2 = wh2.t()
        w1, h1 = wh1[0], wh1[1]
        w2, h2 = wh2[0], wh2[1]
        inter_area = torch.min(w1, w2) * torch.min(h1, h2)
        union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
        return inter_area / union_area
    

    训练模型

    def get_lr(opt):
        for param_group in opt.param_groups:
            return param_group['lr']
    
    def loss_epoch(model,params_loss,dataset_dl,sanity_check=False,opt=None):
        running_loss=0.0
        len_data=len(dataset_dl.dataset)
        running_metrics= {}
        
        for xb, yb,_ in dataset_dl:
            yb=yb.to(device)
            _,output=model(xb.to(device))
            loss_b=get_loss_batch(output,yb, params_loss,opt)
            running_loss+=loss_b
            if sanity_check is True:
                break 
        loss=running_loss/float(len_data)
        return loss
    
    def train_val(model, params):
        num_epochs=params["num_epochs"]
        params_loss=params["params_loss"]
        opt=params["optimizer"]
        train_dl=params["train_dl"]
        val_dl=params["val_dl"]
        sanity_check=params["sanity_check"]
        lr_scheduler=params["lr_scheduler"]
        path2weights=params["path2weights"]
        
        
        loss_history={
            "train": [],
            "val": [],
        }
        best_model_wts = copy.deepcopy(model.state_dict())
        best_loss=float('inf') 
        
        for epoch in range(num_epochs):
            current_lr=get_lr(opt)
            print('Epoch {}/{}, current lr={}'.format(epoch, num_epochs - 1, current_lr)) 
            model.train()
            train_loss=loss_epoch(model,params_loss,train_dl,sanity_check,opt)
            loss_history["train"].append(train_loss)
            print("train loss: %.6f" %(train_loss))    
            
            model.eval()
            with torch.no_grad():
                val_loss=loss_epoch(model,params_loss,val_dl,sanity_check)
            loss_history["val"].append(val_loss)
            print("val loss: %.6f" %(val_loss))
            
            
            if val_loss < best_loss:
                best_loss = val_loss
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(), path2weights)
                print("Copied best model weights!")
                
            lr_scheduler.step(val_loss)
            if current_lr != get_lr(opt):
                print("Loading best model weights!")
                model.load_state_dict(best_model_wts) 
            print("-"*10) 
        model.load_state_dict(best_model_wts)
        return model, loss_history
    
    • 训练模型
    opt = optim.Adam(model.parameters(), lr=1e-3)
    lr_scheduler = ReduceLROnPlateau(opt, mode='min',factor=0.5, patience=20,verbose=1)
    
    path2models= "./models/mod/"
    if not os.path.exists(path2models):
            os.mkdir(path2models)
            
    scaled_anchors=[model.module_list[82][0].scaled_anchors,
                    model.module_list[94][0].scaled_anchors,
                    model.module_list[106][0].scaled_anchors]
    
    mse_loss = nn.MSELoss(reduction="sum")
    bce_loss = nn.BCELoss(reduction="sum")
    params_loss={
        "scaled_anchors" : scaled_anchors,
        "ignore_thres": 0.5,
        "mse_loss": mse_loss,
        "bce_loss": bce_loss,
        "num_yolos": 3,
        "num_anchors": 3,
        "obj_scale": 1,
        "noobj_scale": 100,
    }
    
    params_train={
        "num_epochs": 5,
        "optimizer": opt,
        "params_loss": params_loss,
        "train_dl": train_dl,
        "val_dl": val_dl,
        "sanity_check": True,
        "lr_scheduler": lr_scheduler,
        "path2weights": path2models+"weights.pt",
    }
    model,loss_hist=train_val(model,params_train)
    """
    Epoch 0/4, current lr=0.001
    train loss: 13.039888
    val loss: 309.578725
    Copied best model weights!
    ----------
    Epoch 1/4, current lr=0.001
    train loss: 11.840441
    val loss: 182.791525
    Copied best model weights!
    ----------
    Epoch 2/4, current lr=0.001
    train loss: 10.949079
    val loss: 143.510638
    Copied best model weights!
    ----------
    Epoch 3/4, current lr=0.001
    train loss: 9.800387
    val loss: 173.621087
    ----------
    Epoch 4/4, current lr=0.001
    train loss: 8.864806
    val loss: 160.650937
    ----------
    """
    

    相关文章

      网友评论

          本文标题:Pytorch之目标检测(多目标检测,Multi Object

          本文链接:https://www.haomeiwen.com/subject/wsjkwltx.html