import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import torchvision
from torchvision.transforms import ToTensor
# 下载数据集到本地
train_ds = torchvision.datasets.MNIST('data/',
train=True,
transform=ToTensor(),
download=True
)
test_ds = torchvision.datasets.MNIST('data/',
train=False,
transform=ToTensor(),
download=True
)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=64, shuffle=True)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size=256)
imgs, labels = next(iter(train_dl))
# 创建模型
class Model(nn.Module): # 所有的pytorch都继承至nn.Module
def __init__(self): # 初始化3个层
super().__init__() # 图像输入28*28,经过两次卷积池化后,就变成16*4*4形状的图像,交给linear层进行输出。
self.conv1 = nn.Conv2d(1,6,5) # in_features=1,out_features=6(6个卷积核), kernel_size=5
self.pool = nn.MaxPool2d((2,2)) # kernel_size=2
self.conv2 = nn.Conv2d(6,16,5) # 此时,仍然是立体图像, 卷积之后channel=16,图像大小5*5
self.liner_1 = nn.Linear(16*4*4, 256) # 输入为展平后的长度,卷积之后channel=16,图像大小4*4
self.liner_2 = nn.Linear(256, 10) # 第三层输出10个分类。 softmax 模型输出C个可能值上的概率。C表示类别总数
def forward(self, input): # 具体实现,用forward方法
#x = input.view(-1, 28*28) # 卷积神经网络保留图像的层数属性,再输出到全连接层之前不进行展平
x = F.relu(self.conv1(input)) # 每个中间层都要进行激活
x = self.pool(x) # 经过一次pool,尺寸减半
x = F.relu(self.conv2(x))
x = self.pool(x)
# print(x.size()) # torch.Size([64,16,4,4])
x = x.view(x.size(0),-1) # 展平以后第一个维度是64,batch;-1 表示自动计算。 等价为x=x.view(-1,16*4*4)
x = F.relu(self.liner_1(x)) # 全连接层,输出前也需要通过relu激活
x = self.liner_2(x) # 输出层
return x
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Model().to(device)
loss_fn = torch.nn.CrossEntropyLoss() # 损失函数,输入应该是未经激活的输出。输出targets就是分类索引,而非独热编码方式。
optimizer = torch.optim.SGD(model.parameters(), lr=0.001) # 优化:根据计算得到的损失,调整模型参数,从而降低损失的过程
# train函数
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset) # 获取当前数据集的总样本数。 dataloader.dataset 获取转换为dataloader之前的dataset。num_batches = len(dl) 返回迭代的批次数
train_loss, correct = 0, 0 # train_loss会累计所有批次的损失之和; correct 累计预测正确的样本数
for X, y in dataloader: # X代表输入,y代表target标签
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y) # 返回一个批次所有样本的平均损失
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
correct += (pred.argmax(1) == y).type(torch.float).sum().item() # argmax(0)是batch 位。argmax(1)是实际预测的值
train_loss += loss.item()
train_loss /= size # 每个样本的平均loss
correct /= size # 正确率
return train_loss, correct
# 测试函数
def test(dataloader, model):
size = len(dataloader.dataset)
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= size
correct /= size
return test_loss, correct
epochs = 50
train_loss = []
train_acc = []
test_loss = []
test_acc = []
for epoch in range(epochs):
epoch_loss, epoch_acc = train(train_dl, model, loss_fn, optimizer)
epoch_test_loss, epoch_test_acc = test(test_dl, model)
train_loss.append(epoch_loss)
train_acc.append(epoch_acc)
test_loss.append(epoch_test_loss)
test_acc.append(epoch_test_acc)
template = ("epoch:{:2d}, train_loss: {:.5f}, train_acc: {:.1f}% ,"
"test_loss: {:.5f}, test_acc: {:.1f}%")
print(template.format(
epoch, epoch_loss, epoch_acc*100, epoch_test_loss, epoch_test_acc*100))
print("Done!")
epoch: 0, train_loss: 0.03594, train_acc: 14.9% ,test_loss: 0.00918, test_acc: 19.5%
epoch: 1, train_loss: 0.03579, train_acc: 23.4% ,test_loss: 0.00913, test_acc: 30.6%
epoch: 2, train_loss: 0.03555, train_acc: 37.9% ,test_loss: 0.00904, test_acc: 45.0%
epoch: 3, train_loss: 0.03496, train_acc: 45.1% ,test_loss: 0.00879, test_acc: 45.9%
epoch: 4, train_loss: 0.03270, train_acc: 46.4% ,test_loss: 0.00758, test_acc: 54.1%
epoch: 5, train_loss: 0.02365, train_acc: 64.7% ,test_loss: 0.00420, test_acc: 74.9%
epoch: 6, train_loss: 0.01272, train_acc: 79.0% ,test_loss: 0.00242, test_acc: 83.5%
epoch: 7, train_loss: 0.00871, train_acc: 84.3% ,test_loss: 0.00188, test_acc: 86.5%
epoch: 8, train_loss: 0.00724, train_acc: 86.7% ,test_loss: 0.00163, test_acc: 88.2%
epoch: 9, train_loss: 0.00642, train_acc: 88.0% ,test_loss: 0.00145, test_acc: 89.2%
epoch:10, train_loss: 0.00586, train_acc: 89.0% ,test_loss: 0.00134, test_acc: 90.1%
epoch:11, train_loss: 0.00543, train_acc: 89.8% ,test_loss: 0.00124, test_acc: 90.5%
epoch:12, train_loss: 0.00507, train_acc: 90.4% ,test_loss: 0.00117, test_acc: 91.1%
epoch:13, train_loss: 0.00477, train_acc: 91.0% ,test_loss: 0.00109, test_acc: 91.7%
epoch:14, train_loss: 0.00450, train_acc: 91.5% ,test_loss: 0.00103, test_acc: 92.2%
epoch:15, train_loss: 0.00426, train_acc: 91.9% ,test_loss: 0.00097, test_acc: 92.7%
epoch:16, train_loss: 0.00404, train_acc: 92.4% ,test_loss: 0.00092, test_acc: 93.0%
epoch:17, train_loss: 0.00385, train_acc: 92.6% ,test_loss: 0.00088, test_acc: 93.2%
epoch:18, train_loss: 0.00367, train_acc: 93.1% ,test_loss: 0.00084, test_acc: 93.7%
epoch:19, train_loss: 0.00350, train_acc: 93.3% ,test_loss: 0.00079, test_acc: 94.0%
epoch:20, train_loss: 0.00335, train_acc: 93.6% ,test_loss: 0.00077, test_acc: 94.3%
epoch:21, train_loss: 0.00321, train_acc: 93.9% ,test_loss: 0.00073, test_acc: 94.6%
epoch:22, train_loss: 0.00307, train_acc: 94.2% ,test_loss: 0.00069, test_acc: 95.0%
epoch:23, train_loss: 0.00296, train_acc: 94.3% ,test_loss: 0.00067, test_acc: 95.0%
epoch:24, train_loss: 0.00285, train_acc: 94.7% ,test_loss: 0.00067, test_acc: 95.0%
epoch:25, train_loss: 0.00274, train_acc: 94.8% ,test_loss: 0.00062, test_acc: 95.5%
epoch:26, train_loss: 0.00265, train_acc: 95.1% ,test_loss: 0.00060, test_acc: 95.6%
epoch:27, train_loss: 0.00256, train_acc: 95.2% ,test_loss: 0.00058, test_acc: 95.8%
epoch:28, train_loss: 0.00247, train_acc: 95.2% ,test_loss: 0.00056, test_acc: 96.0%
epoch:29, train_loss: 0.00240, train_acc: 95.5% ,test_loss: 0.00054, test_acc: 96.0%
epoch:30, train_loss: 0.00233, train_acc: 95.6% ,test_loss: 0.00053, test_acc: 96.1%
epoch:31, train_loss: 0.00226, train_acc: 95.7% ,test_loss: 0.00052, test_acc: 96.1%
epoch:32, train_loss: 0.00220, train_acc: 95.8% ,test_loss: 0.00051, test_acc: 96.2%
epoch:33, train_loss: 0.00214, train_acc: 96.0% ,test_loss: 0.00048, test_acc: 96.5%
epoch:34, train_loss: 0.00208, train_acc: 96.0% ,test_loss: 0.00047, test_acc: 96.5%
epoch:35, train_loss: 0.00203, train_acc: 96.2% ,test_loss: 0.00046, test_acc: 96.6%
epoch:36, train_loss: 0.00198, train_acc: 96.2% ,test_loss: 0.00045, test_acc: 96.7%
epoch:37, train_loss: 0.00194, train_acc: 96.3% ,test_loss: 0.00044, test_acc: 96.8%
epoch:38, train_loss: 0.00189, train_acc: 96.4% ,test_loss: 0.00043, test_acc: 96.8%
epoch:39, train_loss: 0.00185, train_acc: 96.4% ,test_loss: 0.00042, test_acc: 96.6%
epoch:40, train_loss: 0.00181, train_acc: 96.5% ,test_loss: 0.00042, test_acc: 96.8%
epoch:41, train_loss: 0.00177, train_acc: 96.6% ,test_loss: 0.00041, test_acc: 97.0%
epoch:42, train_loss: 0.00174, train_acc: 96.7% ,test_loss: 0.00040, test_acc: 96.9%
epoch:43, train_loss: 0.00171, train_acc: 96.8% ,test_loss: 0.00038, test_acc: 97.1%
epoch:44, train_loss: 0.00167, train_acc: 96.8% ,test_loss: 0.00039, test_acc: 97.0%
epoch:45, train_loss: 0.00164, train_acc: 96.9% ,test_loss: 0.00037, test_acc: 97.2%
epoch:46, train_loss: 0.00162, train_acc: 96.9% ,test_loss: 0.00037, test_acc: 97.1%
epoch:47, train_loss: 0.00159, train_acc: 96.9% ,test_loss: 0.00036, test_acc: 97.2%
epoch:48, train_loss: 0.00156, train_acc: 97.0% ,test_loss: 0.00036, test_acc: 97.2%
epoch:49, train_loss: 0.00154, train_acc: 97.0% ,test_loss: 0.00035, test_acc: 97.1%
Done!
- 卷积神经网络的训练时间要比全连接模型长很多
- 训练精度要比全连接模型告高了很多。
网友评论