美文网首页
VIT(vision in transformer)pytorc

VIT(vision in transformer)pytorc

作者: 小黄不头秃 | 来源:发表于2023-07-03 23:38 被阅读0次

    vit的使用方法还是较为简单的。

    首先,我们需要安装一个库。

    pip install vit-pytorch -i https://pypi.tuna.tsinghua.edu.cn/simple
    pip install timm -i https://pypi.tuna.tsinghua.edu.cn/simple
    

    然后就可以在代码中使用Vit了

    from vit_pytorch import ViT 
    import torch 
    
    net = ViT(
        image_size=224,
        patch_size=32,
        num_classes=1000,
        dim=1024, 
        depth=6, 
        heads=16, 
        mlp_dim=2048, 
        dropout=0.1,
        emb_dropout=0.1,
    )
    
    # print(net)
    img = torch.randn(1,3,224,224)
    preds = net(img)
    print(preds.shape)
    

    模型训练:

    import os
    import math
    import argparse
    
    import torch
    import torch.optim as optim
    import torch.optim.lr_scheduler as lr_scheduler
    from torch.utils.tensorboard import SummaryWriter
    from torchvision import transforms
    
    
    from my_dataset import MyDataSet
    from timm.models.vision_transformer import vit_base_patch16_224_in21k as create_model
    from utils import read_split_data, train_one_epoch, evaluate
    
    
    def main(args):
        device = torch.device(args.device if torch.cuda.is_available() else "cpu")
    
        if os.path.exists("./weights") is False:
            os.makedirs("./weights")
    
        tb_writer = SummaryWriter()
    
        train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)
    
        data_transform = {
            "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                         transforms.RandomHorizontalFlip(),
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
            "val": transforms.Compose([transforms.Resize(256),
                                       transforms.CenterCrop(224),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])}
    
        # 实例化训练数据集
        train_dataset = MyDataSet(images_path=train_images_path,
                                  images_class=train_images_label,
                                  transform=data_transform["train"])
    
        # 实例化验证数据集
        val_dataset = MyDataSet(images_path=val_images_path,
                                images_class=val_images_label,
                                transform=data_transform["val"])
    
        batch_size = args.batch_size
        nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
        print('Using {} dataloader workers every process'.format(nw))
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   pin_memory=True,
                                                   num_workers=nw,
                                                   collate_fn=train_dataset.collate_fn)
    
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 pin_memory=True,
                                                 num_workers=nw,
                                                 collate_fn=val_dataset.collate_fn)
    
        model = create_model(num_classes=5, has_logits=False).to(device)
    
        if args.weights != "":
            assert os.path.exists(args.weights), "weights file: '{}' not exist.".format(args.weights)
            weights_dict = torch.load(args.weights, map_location=device)
    
            # 删除不需要的权重
            del_keys = ['head.weight', 'head.bias'] if model.has_logits \
                else ['pre_logits.fc.weight', 'pre_logits.fc.bias', 'head.weight', 'head.bias']
            for k in del_keys:
                del weights_dict[k]
    
            print(model.load_state_dict(weights_dict, strict=False))
    
        if args.freeze_layers:
            for name, para in model.named_parameters():
                # 除head, pre_logits外,其他权重全部冻结 只训练MLP模块
                if "head" not in name and "pre_logits" not in name:
                    para.requires_grad_(False)
                else:
                    print("training {}".format(name))
    
        pg = [p for p in model.parameters() if p.requires_grad]
        optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=5E-5)  #传入需要进行SGD的参数组成的dict
        # Scheduler https://arxiv.org/pdf/1812.01187.pdf
        lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine learning rate decay
        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)  # cosine learning rate decay
    
        for epoch in range(args.epochs):
            # train   返回 平均loss 和 预测正确的样本÷样本总数
            train_loss, train_acc = train_one_epoch(model=model,
                                                    optimizer=optimizer,
                                                    data_loader=train_loader,
                                                    device=device,
                                                    epoch=epoch)
            # validate
            val_loss, val_acc = evaluate(model=model,
                                         data_loader=val_loader,
                                         device=device,
                                         epoch=epoch)
            scheduler.step()
            # 以上循环内三部分写法源于LambdaLR()的torch官方实例
            tags = ["train_loss", "train_acc", "val_loss", "val_acc", "learning_rate"]
            tb_writer.add_scalar(tags[0], train_loss, epoch)
            tb_writer.add_scalar(tags[1], train_acc, epoch)
            tb_writer.add_scalar(tags[2], val_loss, epoch)
            tb_writer.add_scalar(tags[3], val_acc, epoch)
            tb_writer.add_scalar(tags[4], optimizer.param_groups[0]["lr"], epoch)
    
            torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))
    
    
    if __name__ == '__main__':
        parser = argparse.ArgumentParser()
        parser.add_argument('--num_classes', type=int, default=5)
        parser.add_argument('--epochs', type=int, default=10)
        parser.add_argument('--batch-size', type=int, default=8)
        parser.add_argument('--lr', type=float, default=0.001)
        parser.add_argument('--lrf', type=float, default=0.01)
    
        # 数据集所在根目录
        # http://download.tensorflow.org/example_images/flower_photos.tgz
        parser.add_argument('--data-path', type=str,
                            default="/data/xxxx")
        parser.add_argument('--model-name', default='', help='create model name')
    
        # 预训练权重路径,如果不想载入就设置为空字符,这里同时进行了重命名
        parser.add_argument('--weights', type=str, default='./vit_base_patch16_224_in21k.pth',
                            help='initial weights path')
        # 是否冻结权重
        parser.add_argument('--freeze-layers', type=bool, default=True)
        parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')
    
        opt = parser.parse_args()
    
        main(opt)
    

    具体可参考这篇博客:【超详细】初学者包会的Vision Transformer(ViT)的PyTorch实现代码学习vit pytorch_NeverEnough的博客-CSDN博客

    Vit在大量的视觉任务中都表现出了相当优秀的性能。但是和CNN相比,缺少归纳偏置让ViT应用于小数据集时非常以来模型的正则化(model regularization)和数据增强(data augmentation),否则模型容易出现过拟合。

    相关文章

      网友评论

          本文标题:VIT(vision in transformer)pytorc

          本文链接:https://www.haomeiwen.com/subject/roycudtx.html