一.线性回归模型实现
from IPythonimport display
import matplotlib.pyplotas plt
from mxnetimport autograd, nd
import random
from mxnet.gluonimport dataas gdata
from mxnet.gluonimport nn
from mxnetimport init
from mxnet.gluonimport lossas gloss
from mxnetimport gluon
# 1.**************生成数据
input_num =2
examples_num =1000
original_w = [2, 1.1]
original_b =2.2
features = nd.random_normal(scale=1, shape=(examples_num, input_num))
labels = original_w[0] * features[:, 0] + original_w[1] * features[:, 1] + original_b
# labels加了噪音项 ϵ ,服从均值为 0 和标准差为 0.01 的正态分布
labels += nd.random_normal(scale=0.01, shape=labels.shape)
# 2.**************读取数据, 从生成的样本数据中随机读取batch_size个随机样本的特征和标签
# def data_iter(batch_size, feature, label):
# num = len(feature)
# indices = list(range(num))
# random.shuffle(indices)
# for i in range(0, num, batch_size):
# j = nd.array(indices[i:min(i + batch_size, examples_num)])
# yield feature.take(j), label.take(j)
batch_size =10
# 将训练数据的特征和标签组合
dataset = gdata.ArrayDataset(features, labels)
data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)
# 3.**************定义模型
# def linerRegression(X, weight, bias):
# return nd.dot(X, weight) + bias
# sequential实例是一个可以串联各个层的容器,可以在容器里依次添加层构造模型
net = nn.Sequential()
net.add(nn.Dense(1))
# 4.***************初始化模型参数
# w = nd.random_normal(scale=0.01, shape=(input_num, 1))
# b = nd.zeros(shape=(1, 1))
# # 创建参数的梯度
# w.attach_grad()
# b.attach_grad()
# 制定权重参数每个元素在初始化时随机采样于均值为0的标准差为0.01的正态分布,偏差参数默认初始化为0
net.initialize(init.Normal(sigma=0.01))
# 5.***************定义损失函数
# def squared_loss(y_hat, y):
# return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2
# 平方损失又叫L2范数损失
loss = gloss.L2Loss()
# 6.***************定义优化函数
# def sgd(params, lr, batch_size):
# for param in params:
# param[:] = param - lr * param.grad / batch_size
# trainer可以迭代net实例所有通过add函数嵌套的层所包含的全部参数,collect_params()可以得到net的全部参数
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':0.03})
# 7.****************训练模型
# lr = 0.03
# net = linerRegression
# loss = squared_loss
num_epochs =3
for epochin range(num_epochs):
# 训练模型一共需要 num_epochs 个迭代周期。
# 在一个迭代周期中,使用训练数据集中所有样本一次(假设样本数能够被批量大小整除)。
# X 和 y 分别是小批量样本的特征和标签。
for X, yin data_iter:
with autograd.record():
l = loss(net(X), y)# l 是有关小批量 X 和 y 的损失。
l.backward()# 小批量的损失对模型参数求梯度。
trainer.step(batch_size)# 使用小批量随机梯度下降迭代模型参数。
l = loss(net(features), labels)
print('epoch %d, loss %f' % (epoch +1, l.mean().asnumpy()))
二.Softmax回归实现
import gluonbook as gb
from mxnet import gluon, init
from mxnet.gluon import loss as gloss, nn
# ******************************Gluon实现
# 获取和读取数据
batch_size = 256
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
# 定义和初始化模型
net = nn.Sequential()
net.add(nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))
# Softmax和交叉熵损失函数
loss = gloss.SoftmaxCrossEntropyLoss()
# 定义优化算法
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':0.1})
# 训练模型
num_epochs = 5
gb.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer)
三.多层感知机模型
import gluonbook as gb
from mxnet import gluon, init
from mxnet.gluon import loss as gloss, nn
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'),
nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))
batch_size = 256
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
loss = gloss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.3})
num_epochs = 5
gb.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
None, None, trainer)
四.L2正则化
import gluonbook as gb
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import data as gdata, loss as gloss, nn
n_train, n_test, num_inputs = 20, 100, 200
true_w, true_b = nd.ones((num_inputs, 1)) * 0.01, 0.05
features = nd.random.normal(shape=(n_train + n_test, num_inputs))
labels = nd.dot(features, true_w) + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
train_features, test_features = features[:n_train, :], features[n_train:, :]
train_labels, test_labels = labels[:n_train], labels[n_train:]
def init_params():
w = nd.random.normal(scale=1, shape=(num_inputs, 1))
b = nd.zeros(shape=(1,))
w.attach_grad()
b.attach_grad()
return [w, b]
def l2_penalty(w):
return (w**2).sum() / 2
batch_size, num_epochs, lr = 1, 100, 0.003
net, loss = gb.linreg, gb.squared_loss
train_iter = gdata.DataLoader(gdata.ArrayDataset(train_features, train_labels), batch_size, shuffle=True)
def fit_and_plot(lambd):
w, b = init_params()
train_ls, test_ls = [], []
for _ in range(num_epochs):
for X, y in train_iter:
with autograd.record():
# 添加了 L2 范数惩罚项。
l = loss(net(X, w, b), y) + lambd * l2_penalty(w)
l.backward()
gb.sgd([w, b], lr, batch_size)
train_ls.append(loss(net(train_features, w, b),
train_labels).mean().asscalar())
test_ls.append(loss(net(test_features, w, b),
test_labels).mean().asscalar())
gb.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss',
range(1, num_epochs + 1), test_ls, ['train', 'test'])
print('L2 norm of w:', w.norm().asscalar())
fit_and_plot(lambd=3)
image.png
import gluonbook as gb
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import data as gdata, loss as gloss, nn
n_train, n_test, num_inputs = 20, 100, 200
true_w, true_b = nd.ones((num_inputs, 1)) * 0.01, 0.05
features = nd.random.normal(shape=(n_train + n_test, num_inputs))
labels = nd.dot(features, true_w) + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
train_features, test_features = features[:n_train, :], features[n_train:, :]
train_labels, test_labels = labels[:n_train], labels[n_train:]
def init_params():
w = nd.random.normal(scale=1, shape=(num_inputs, 1))
b = nd.zeros(shape=(1,))
w.attach_grad()
b.attach_grad()
return [w, b]
def l2_penalty(w):
return (w**2).sum() / 2
batch_size, num_epochs, lr = 1, 100, 0.003
net, loss = gb.linreg, gb.squared_loss
train_iter = gdata.DataLoader(gdata.ArrayDataset(train_features, train_labels), batch_size, shuffle=True)
# gluon实现
def fit_and_plot(wd):
net = nn.Sequential()
net.add(nn.Dense(1))
net.initialize(init.Normal(sigma=1))
# 对权重参数衰减。权重名称一般是以 weight 结尾。
trainer_w = gluon.Trainer(net.collect_params('.*weight'), 'sgd', {'learning_rate': lr, 'wd': wd})
# 不对偏差参数衰减。偏差名称一般是以 bias 结尾。
trainer_b = gluon.Trainer(net.collect_params('.*bias'), 'sgd',
{'learning_rate': lr})
train_ls, test_ls = [], []
for _ in range(num_epochs):
for X, y in train_iter:
with autograd.record():
l = loss(net(X), y)
l.backward()
# 对两个 Trainer 实例分别调用 step 函数,从而分别更新权重和偏差。
trainer_w.step(batch_size)
trainer_b.step(batch_size)
train_ls.append(loss(net(train_features),
train_labels).mean().asscalar())
test_ls.append(loss(net(test_features),
test_labels).mean().asscalar())
gb.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss',
range(1, num_epochs + 1), test_ls, ['train', 'test'])
print('L2 norm of w:', net[0].weight.data().norm().asscalar())
fit_and_plot(3)
image.png
五.dropout实现
import gluonbook as gb
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn
drop_prob1, drop_prob2 = 0.2, 0.5
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'),
nn.Dropout(drop_prob1),
nn.Dense(256, activation='relu'),
nn.Dropout(drop_prob2),
nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))
num_epochs, lr, batch_size = 5, 0.5, 256
loss = gloss.SoftmaxCrossEntropyLoss()
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
gb.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
None, None, trainer)
网友评论