在LeNet提出的将近20年里,神经网络一度被其他机器学习方法超越,如支持向量机.
1.AlexNet是什么
AlexNet是一个模型网络,
这个名字来源于论文的第一作者Alex Krizhevsky.
2012年,AlexNet横空出世.
他首次证明了学习到的特征可以超越手工设计的特征,从而一举打破计算机视觉研究的现状.
2.AlexNet设计理念
与LeNet相比,
(1)AlexNet包含8层变换,5层卷积+2层全连接隐藏层+1个全连接输出层.
image.png
目标物体占据更多的像素,所以采用更大的卷积窗口来捕获物体.
(2)AlexNet将sigmoid函数改为更加简单的ReLU激活函数.
计算更简单,
模型更加容易训练.
这是由于当sigmoid激活函数输出极接近0或1时,这些区
域的梯度⼏乎为0,从而造成反向传播⽆法继续更新部分模型参数;而ReLU激活函数在正区间的梯度恒为1。因此,若模型参数初始化不当,sigmoid函数可能在正区间得到⼏乎为0的梯度,从而令模型⽆法得到有效训练。
(3)AlexNet通过丢弃法来控制全连接层的模型复杂度.
(4)AlexNet引入了大量的图像增广,如翻转 裁剪 颜色变化,从而进一步扩大数据集来缓解过拟合.
image.png
3.AlexNet的mxnet实现
虽然论文中AlexNet使用的是ImageNet数据集,
但是因为ImageNet数据集训练时间较长,
于是采用Fashion-MNIST数据集来测试AlexNet.
import mxnet as mx
from mxnet import autograd, gluon, image, init, nd
from mxnet.gluon import data as gdata, loss as gloss, nn, utils as gutils
import os
import sys
import time
def _get_batch(batch, ctx):
"""Return features and labels on ctx."""
features, labels = batch
if labels.dtype != features.dtype:
labels = labels.astype(features.dtype)
return (gutils.split_and_load(features, ctx),
gutils.split_and_load(labels, ctx), features.shape[0])
def evaluate_accuracy(data_iter, net, ctx=[mx.cpu()]):
"""Evaluate accuracy of a model on the given data set."""
if isinstance(ctx, mx.Context):
ctx = [ctx]
acc_sum, n = nd.array([0]), 0
for batch in data_iter:
features, labels, _ = _get_batch(batch, ctx)
for X, y in zip(features, labels):
y = y.astype('float32')
acc_sum += (net(X).argmax(axis=1) == y).sum().copyto(mx.cpu())
n += y.size
acc_sum.wait_to_read()
return acc_sum.asscalar() / n
def load_data_fashion_mnist(batch_size, resize=None,
root=os.path.join('~', '.mxnet', 'datasets', 'fashion-mnist')):
"""Download the fashion mnist dataset and then load into memory."""
root = os.path.expanduser(root)
transformer = []
if resize:
transformer += [gdata.vision.transforms.Resize(resize)]
# ToTensor 将小批量图像转成mxnet需要的格式
# 形状为(批量大小,通道数,高,宽),值域0-1,类型32浮点
transformer += [gdata.vision.transforms.ToTensor()]
transformer = gdata.vision.transforms.Compose(transformer)
mnist_train = gdata.vision.FashionMNIST(root=root, train=True)
mnist_test = gdata.vision.FashionMNIST(root=root, train=False)
num_workers = 0 if sys.platform.startswith('win32') else 4
# 将图像增广应用在每个训练样本的第一个元素
train_iter = gdata.DataLoader(mnist_train.transform_first(transformer),
batch_size, shuffle=True,
num_workers=num_workers)
test_iter = gdata.DataLoader(mnist_test.transform_first(transformer),
batch_size, shuffle=False,
num_workers=num_workers)
return train_iter, test_iter
def train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx,
num_epochs):
"""Train and evaluate a model with CPU or GPU."""
print('training on', ctx)
# 交叉熵损失函数
loss = gloss.SoftmaxCrossEntropyLoss()
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
for X, y in train_iter:
X, y = X.as_in_context(ctx), y.as_in_context(ctx)
with autograd.record():
y_hat = net(X) #预测值
l = loss(y_hat, y).sum() #损失
l.backward() #权重梯度
trainer.step(batch_size)
y = y.astype('float32')
train_l_sum += l.asscalar()
train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
n += y.size
test_acc = evaluate_accuracy(test_iter, net, ctx)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
'time %.1f sec'
% (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
time.time() - start))
if __name__ == "__main__":
# 串联层,按顺序计算
net = nn.Sequential()
# 按顺序逐一添加串联的层
net.add(
# 第一层卷积层,窗口形状11*11,是因为imagenet图像高和宽比较大,
# 目标物体占的像素多,所以用更大的卷积窗口来捕获物体.
nn.Conv2D(96,kernel_size=11,strides=4,activation='relu'),
# 池化层缓解卷积层对位置的过度敏感性
nn.MaxPool2D(pool_size=3,strides=2),
# 减小卷积窗口,使用填充为2来使得输入与输出的高和宽相同,且增大输出通道数
nn.Conv2D(256,kernel_size=5,padding=2,activation='relu'),
nn.MaxPool2D(pool_size=3,strides=2),
# 连续3个卷积层,且使用更小的窗口
# 除了最后的卷积层外,进一步增大了输出通道数
# 前两个卷积层后不使用池化层来减小输入的高和宽
nn.Conv2D(384,kernel_size=3,padding=1,activation='relu'),
nn.Conv2D(384,kernel_size=3,padding=1,activation='relu'),
nn.Conv2D(256,kernel_size=3,padding=1,activation='relu'),
nn.MaxPool2D(pool_size=3,strides=2),
# 这里全连接层的输出个数比lenet中的大数倍.
# 这两个巨大的全连接层带来将近1GB的模型参数
# 使用丢弃层来缓解过拟合
nn.Dense(4096,activation='relu'),nn.Dropout(0.5),
nn.Dense(4096,activation='relu'),nn.Dropout(0.5),
# 输出层
# 由于这里使用Fashion-MNIST,所以类别数为10,而非论文中的1000
nn.Dense(10) )
X = nd.random.uniform(shape=(1,1,224,224))
net.initialize()
for layer in net:
X = layer(X)
print(layer.name, 'output shape:\t', X.shape)
# 如出现out of memory的报错信息,可减小batch_size或resize
batch_size = 128
train_iter, test_iter = load_data_fashion_mnist(batch_size,resize=224,root='/home/xxx/Fashion-MNIST')
lr, num_epochs, ctx = 0.01, 5, mx.gpu()
net.initialize(force_reinit=True,ctx = ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(),'sgd', {'learning_rate':lr})
train_ch5(net,train_iter,test_iter,batch_size,trainer,ctx,num_epochs)
结果:
image.png
参考:
- 动手学深度学习第5.3章节
网友评论