用vit来做猫狗分类 code
from __future__ import print_function
import glob
from itertools import chain
import os
import random
import zipfile
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from linformer import Linformer
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
# from tqdm.notebook import tqdm
from tqdm import tqdm
from vit_pytorch.efficient import ViT
print(f"Torch: {torch.__version__}")
# Training settings
batch_size = 64
epochs = 40
lr = 3e-5
gamma = 0.7
seed = 42
def seed_everything(seed):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
seed_everything(seed)
device = 'cuda'
os.makedirs('data', exist_ok=True)
train_dir = 'data/train'
test_dir = 'data/test'
with zipfile.ZipFile('train.zip') as train_zip:
train_zip.extractall('data')
with zipfile.ZipFile('test.zip') as test_zip:
test_zip.extractall('data')
train_list = glob.glob(os.path.join(train_dir,'*.jpg'))
test_list = glob.glob(os.path.join(test_dir, '*.jpg'))
print(f"Train Data: {len(train_list)}")
print(f"Test Data: {len(test_list)}")
labels = [path.split('/')[-1].split('.')[0] for path in train_list]
# random_idx = np.random.randint(1, len(train_list), size=9)
# fig, axes = plt.subplots(3, 3, figsize=(16, 12))
# for idx, ax in enumerate(axes.ravel()):
# img = Image.open(train_list[idx])
# ax.set_title(labels[idx])
# ax.imshow(img)
train_list, valid_list = train_test_split(train_list,
test_size=0.2,
stratify=labels,
random_state=seed)
print(f"Train Data: {len(train_list)}")
print(f"Validation Data: {len(valid_list)}")
print(f"Test Data: {len(test_list)}")
train_transforms = transforms.Compose(
[
transforms.Resize((224, 224)),
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
]
)
val_transforms = transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
]
)
test_transforms = transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
]
)
class CatsDogsDataset(Dataset):
def __init__(self, file_list, transform=None):
self.file_list = file_list
self.transform = transform
def __len__(self):
self.filelength = len(self.file_list)
return self.filelength
def __getitem__(self, idx):
img_path = self.file_list[idx]
img = Image.open(img_path)
img_transformed = self.transform(img)
label = img_path.split("/")[-1].split(".")[0]
label = 1 if label == "dog" else 0
return img_transformed, label
train_data = CatsDogsDataset(train_list, transform=train_transforms)
valid_data = CatsDogsDataset(valid_list, transform=test_transforms)
test_data = CatsDogsDataset(test_list, transform=test_transforms)
train_loader = DataLoader(dataset = train_data, batch_size=batch_size, shuffle=True )
valid_loader = DataLoader(dataset = valid_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset = test_data, batch_size=batch_size, shuffle=True)
print(len(train_data), len(train_loader))
print(len(valid_data), len(valid_loader))
efficient_transformer = Linformer(
dim=128,
seq_len=49+1, # 7x7 patches + 1 cls-token
depth=12,
heads=8,
k=64
)
model = ViT(
dim=128,
image_size=224,
patch_size=32,
num_classes=2,
transformer=efficient_transformer,
channels=3,
).to(device)
# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)
# scheduler
scheduler = StepLR(optimizer, step_size=1, gamma=gamma)
for epoch in range(epochs):
epoch_loss = 0
epoch_accuracy = 0
for data, label in tqdm(train_loader):
data = data.to(device)
label = label.to(device)
output = model(data)
loss = criterion(output, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
scheduler.step()
acc = (output.argmax(dim=1) == label).float().mean()
epoch_accuracy += acc
epoch_loss += loss
epoch_accuracy /= len(train_loader)
epoch_loss /= len(train_loader)
with torch.no_grad():
epoch_val_accuracy = 0
epoch_val_loss = 0
for data, label in valid_loader:
data = data.to(device)
label = label.to(device)
val_output = model(data)
val_loss = criterion(val_output, label)
acc = (val_output.argmax(dim=1) == label).float().mean()
epoch_val_accuracy += acc
epoch_val_loss += val_loss
epoch_val_accuracy /= len(valid_loader)
epoch_val_loss /= len(valid_loader)
print(
f"Epoch : {epoch+1} - loss : {epoch_loss:.4f} - acc: {epoch_accuracy:.4f} - val_loss : {epoch_val_loss:.4f} - val_acc: {epoch_val_accuracy:.4f}\n"
)
知识点
(1)linformer和vit_pytorch是个安装包了,pip既可以安装
(2)seed_everthing 中设置一个随机数,保证随机的结果是一致的
(3)os.makedirs('data', exist_ok=True)
exists:是否在目录存在时触发异常。
如果exist_ok为False(默认值),则在目标目录已存在的情况下触发FileExistsError异常;
如果exist_ok为True,则在目标目录已存在的情况下不会触发FileExistsError异常。
这里设置为true,表示目录存在的时候不触发异常,这样就不用检查目录是否存在
(4)python 中 zipfile库
表示把train.zip解压到data目录下。
with zipfile.ZipFile('train.zip') as train_zip:
train_zip.extractall('data')
(5)glob.glob(os.path.join(train_dir,'*.jpg'))
搜索文件的方式
- 这个是比os.listdir(),os.walk()更方便
- 值得注意的是,这里只会搜索train_dir目录下文件名为以.jpg结尾的文件。
- train_dir下如果存在其他的目录,则会忽略该目录
- 返回的是所有文件的路径的list,os.listdir返回的是文件名。
(6)train集划分train-val的方法
利用了sklearn
from sklearn.model_selection import train_test_split
train_list, valid_list = train_test_split(train_list,
test_size=0.2,
stratify=labels,
random_state=seed)
函数sklearn.model_selection.train_test_split 用法
在使用python做机械学习时候,为了制作训练数据(training samples)和测试数据(testing samples),常使用sklearn里面的sklearn.model_selection.train_test_split模块。
train_test_split的使用方法:sklearn.model_selection.train_test_split(*arrays, **options)
train_test_split里面常用的因数(arguments)介绍:
arrays:分割对象同样长度的列表或者numpy arrays,矩阵。
test_size:两种指定方法。1:指定小数。小数范围在0.0~0.1之间,它代表test集占据的比例。2:指定整数。整数的大小必须在这个数据集个数范围内,总不能指定一个数超出了数据集的个数范围吧。要是test_size在没有指定的场合,可以通过train_size来指定。(两个是对应关系)。如果train_size也没有指定,那么默认值是0.25.
train_size:和test_size相似。
random_state:这是将分割的training和testing集合打乱的个数设定。如果不指定的话,也可以通过numpy.random来设定随机数。
shuffle和straify不常用。straify就是将数据分层。
import pandas as pd
from sklearn.model_selection import train_test_split
namelist = pd.DataFrame({
"name" : ["Suzuki", "Tanaka", "Yamada", "Watanabe", "Yamamoto",
"Okada", "Ueda", "Inoue", "Hayashi", "Sato",
"Hirayama", "Shimada"],
"age": [30, 40, 55, 29, 41, 28, 42, 24, 33, 39, 49, 53],
"department": ["HR", "Legal", "IT", "HR", "HR", "IT",
"Legal", "Legal", "IT", "HR", "Legal", "Legal"],
"attendance": [1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1]
})
# print(namelist)
train, test = train_test_split(namelist,test_size=0.3)
print("\ntrain")
print(train)
print("\ntest")
print(test)
train, test = train_test_split(namelist,test_size=5)
print("\ntrain")
print(train)
print("\ntest")
print(test)
train, test = train_test_split(namelist,shuffle=False)
print("\ntrain")
print(train)
print("\ntest")
print(test)
(7)数据集预处理transforms.Compose
包括
- resize
- 随机裁剪、中心裁剪
- 随机翻转(水平翻转、上下翻转)
- 转成totensor(pytorch的数据格式)
train_transforms = transforms.Compose(
[
transforms.Resize((256, 256)),
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
]
)
val_transforms = transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
]
)
test_transforms = transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
]
)
(8)数据集的类
类需要包含
- 初始化的函数。
- len,获得数据集的长度
- getitem,根据索引获得指定的元素,这里一定不要用循环,不然对后面获得数据来说太慢了
from torch.utils.data import DataLoader, Dataset
class CatsDogsDataset(Dataset):
def __init__(self, file_list, transform=None):
self.file_list = file_list
self.transform = transform
def __len__(self):
self.filelength = len(self.file_list)
return self.filelength
def __getitem__(self, idx):
img_path = self.file_list[idx]
img = Image.open(img_path)
img_transformed = self.transform(img)
label = img_path.split("/")[-1].split(".")[0]
label = 1 if label == "dog" else 0
return img_transformed, label
train_data = CatsDogsDataset(train_list, transform=train_transforms)##train_数据集初始化
valid_data = CatsDogsDataset(valid_list, transform=test_transforms)## val_数据集初始化
test_data = CatsDogsDataset(test_list, transform=test_transforms)## test_数据集初始化
(9)DataLoader
目的是的,对可以索引的数据,处理成多种方便读取方式
按照batch_size来读取(包装后,一次返回的不是一个数据,而是一个batch的数据)
按照是否打乱来读取,(如果打乱之后,返回的数据就是打乱之后的一个batch的数据,而不是之前没有打乱的连续数据,这样有助于送入训练的数据集分布更加均匀)
from torch.utils.data import DataLoader, Dataset
train_loader = DataLoader(dataset = train_data, batch_size=batch_size, shuffle=True )
valid_loader = DataLoader(dataset = valid_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset = test_data, batch_size=batch_size, shuffle=True)
(10)初始化模型参数
这里的Linformer,ViT都是从pip安装的库里面得到的,也可以自己写模型。
初始化efficient_transformer的参数
efficient作为参数和其他的参数一起初始化ViT模型
模型转到cuda上运行
from linformer import Linformer
from vit_pytorch.efficient import ViT
efficient_transformer = Linformer(
dim=128,
seq_len=49+1, # 7x7 patches + 1 cls-token
depth=12,
heads=8,
k=64
)
model = ViT(
dim=128,
image_size=224,
patch_size=32,
num_classes=2,
transformer=efficient_transformer,
channels=3,
).to(device)
(11)设置损失函数
# loss function
criterion = nn.CrossEntropyLoss()
(12)设置优化器和优化算法
其他优化算法包括:
opt_SGD = torch.optim.SGD(net_SGD.parameters(),lr=Learning_rate)
opt_Momentum = torch.optim.SGD(net_Momentum.parameters(),lr=Learning_rate,momentum=0.8,nesterov=True)
opt_RMSprop = torch.optim.RMSprop(net_RMSprop.parameters(),lr=Learning_rate,alpha=0.9)
opt_Adam = torch.optim.Adam(net_Adam.parameters(),lr=Learning_rate,betas=(0.9,0.99))
opt_Adagrad = torch.optim.Adagrad(net_Adagrad.parameters(),lr=Learning_rate)
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
# optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)
(13)等间隔调整学习率 StepLR
# scheduler
scheduler = StepLR(optimizer, step_size=1, gamma=gamma) ## 等间隔调整学习率 StepLR
补充知识:
PyTorch学习率调整策略通过torch.optim.lr_scheduler接口实现。PyTorch提供的学习率调整策略分为三大类,分别是
有序调整:等间隔调整(Step),按需调整学习率(MultiStep),指数衰减调整(Exponential)和 余弦退火CosineAnnealing。
自适应调整:自适应调整学习率 ReduceLROnPlateau。
自定义调整:自定义调整学习率 LambdaLR。
等间隔调整学习率 StepLR,等间隔调整学习率,调整倍数为 gamma 倍,调整间隔为 step_size。间隔单位是step。需要注意的是, step 通常是指 epoch,不要弄成 iteration 了。
按需调整学习率 MultiStepLR, 按设定的间隔调整学习率。这个方法适合后期调试使用,观察 loss 曲线,为每个实验定制学习率调整时机。
指数衰减调整学习率 ExponentialLR
余弦退火调整学习率 CosineAnnealingLR
自适应调整学习率 ReduceLROnPlateau
自定义调整学习率 LambdaLR
网友评论