1 搭建步骤(单层感知机)
- 输入和标签
tf.placeholder
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
t = tf.placeholderI(tf.float32, [None, 10])
x = tf.reshape(x, [-1, 784])
- 定义参数变量及初始化
tf.Variable
w = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
init_op = tf.global_variables_initializer()
- 定义模型
y = tf.nn.softmax(tf.matmul(x, w) + b)
- 确定损失函数
cross_entropy = -tf.reduce_mean(t * tf.log(y))
- 关于精度
is_correct = tf.equal(tf.argmax(y, 1), tf.argmax(t, 1))
accuracy = tf.reduce_mean(is_correct, tf.float32)
- 优化算法
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.003)
train_step = optimiser.minimize(cross_entropy)
- 训练模型
7.1 参数初始化
with tf.Session() as sess:
sess.run(init_op)
7.2 迭代的执行train_step
with tf.Session() as sess:
for step in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, t: batch_ys})
if step%100 == 0:
acc, loss = sess.run([accuracy, cross_entropy],
feed_dict={x: batch_xs, t: batch_ys})
acc, loss = sess.run([accuracy, cross_entropy],
feed_dict={x:mnist.test.images, t: mnist.test.labels})
2 搭建步骤(多层感知机)
- 输入和标签
tf.placeholder
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
t = tf.placeholderI(tf.float32, [None, 10])
x = tf.reshape(x, [-1, 784])
- 定义参数变量及初始化
tf.Variable
, 如果使用ReLU 激活函数,偏置向量常常初始化为小的正值,使得神经元在一
开始就会工作在ReLU 的非零区域内。
K = 200
L = 100
M = 60
N = 30
w1 = tf.Variable(tf.truncated_normal([784, K], stddev = 0.1))
b1= tf.Variable(tf.ones([K]/10))
w2 = tf.Variable(tf.truncated_normal([K, L], stddev = 0.1))
b2= tf.Variable(tf.ones([L]/10))
w3 = tf.Variable(tf.truncated_normal([L, M], stddev = 0.1))
b3= tf.Variable(tf.ones([M]/10))
w4 = tf.Variable(tf.truncated_normal([M, N], stddev = 0.1))
b4= tf.Variable(tf.ones([N]/10))
w5 = tf.Variable(tf.truncated_normal([N, 10], stddev = 0.1))
b5= tf.Variable(tf.ones([10]/10))
init_op = tf.global_variables_initializer()
- 定义模型, 在训练时对隐藏层的输出实施dropout 操作,以1 - pkeep
的概率随机丢弃神经元的输出,并在反向传播梯度时不再更新相应的权重。而在推理时恢
复所有神经元的输出,间接改善了网络的泛化能力。
pkeerp = tf.placeholder(tf.float32)
y1 = tf.nn.relu(tf.matmul(x, w1) + b1)
y1d = tf.nn.dropout(y1, pkeep)
y2 = tf.nn.relu(tf.matmul(y1, w2) + b2)
y2d = tf.nn.dropout(y2, pkeep)
y3 = tf.nn.relu(tf.matmul(y2, w3) + b3)
y3d = tf.nn.dropout(y3, pkeep)
y4 = tf.nn.relu(tf.matmul(y3, w4) + b4)
y4d = tf.nn.dropout(y4, pkeep)
y = tf.nn.softmax(tf.matmul(y4, w5) + b5)
- 确定损失函数
logits = tf.matmul(y4, w5) + b5
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels=t)
- 关于精度
is_correct = tf.equal(tf.argmax(y, 1), tf.argmax(t, 1))
accuracy = tf.reduce_mean(is_correct, tf.float32)
- 优化算法, 可以采用更好的优化算法,例如AdamOptimizer。随着迭代过程的次数,学习速率将指
数级衰减,在模型训练后期可以得到一个更稳定的精度和损失曲线。
lr = tf.placeholder(tf.float32)
optimizer = tf.train.AdamOptimizer(lr)
train_step = optimiser.minimize(cross_entropy)
关于学习率的衰减,
def lr(step):
max_lr = 0.003
min_lr = 0.0001
decay_speed = 2000.0
return min_lr + (max_lr - min_lr)*math.exp(-step/decay_speed)
- 训练模型
回顾上述,共有4个placeholder
, 所以在feed_dict
时,需要给喂入4个数据。
7.1 参数初始化
with tf.Session() as sess:
sess.run(init_op)
7.2 迭代的执行train_step
with tf.Session() as sess:
for step in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, t: batch_ys, pkeep:0.75, lr:lr(step)})
#在测试时,dropout取的概率取1,即保留所有的神经元。
if step%100 == 0:
acc, loss = sess.run([accuracy, cross_entropy],
feed_dict={x: batch_xs, t: batch_ys, pkeep: 1})
acc, loss = sess.run([accuracy, cross_entropy],
feed_dict={x:mnist.test.images, t: mnist.test.labels, pkeep: 1 })
3 搭建步骤(卷积神经网络)
- 输入和标签
tf.placeholder
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
t = tf.placeholderI(tf.float32, [None, 10])
x = tf.reshape(x, [-1, 784])
- 定义参数变量及初始化
tf.Variable
, 如果使用ReLU 激活函数,偏置向量常常初始化为小的正值,使得神经元在一
开始就会工作在ReLU 的非零区域内。
K = 6
L = 12
M = 24
N = 200
w1 = tf.Variable(tf.truncated_normal([6, 6, 1, K], stddev = 0.1))
b1= tf.Variable(tf.ones([K]/10))
w2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev = 0.1))
b2= tf.Variable(tf.ones([L]/10))
w3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev = 0.1))
b3= tf.Variable(tf.ones([M]/10))
w4 = tf.Variable(tf.truncated_normal([7*7*M, N], stddev = 0.1))
b4= tf.Variable(tf.ones([N]/10))
w5 = tf.Variable(tf.truncated_normal([N, 10], stddev = 0.1))
b5= tf.Variable(tf.ones([10]/10))
init_op = tf.global_variables_initializer()
- 定义模型, 在训练时对全连接层的输出实施dropout 操作,以1 - pkeep
的概率随机丢弃神经元的输出,并在反向传播梯度时不再更新相应的权重。而在推理时恢
复所有神经元的输出,间接改善了网络的泛化能力。
pkeerp = tf.placeholder(tf.float32)
y1 = tf.nn.relu(tf.nn.conv2d(x, w1, stride = [1, 1, 1, 1], padding = 'SAME') + b1)
y2 = tf.nn.relu(tf.nn.conv2d(y1, w2, stride = [1, 2, 2, 1], padding='SAME')+ b2)
y3 = tf.nn.relu(tf.nn.conv2d(y2, w3, stride=[1, 2, 2, 1], padding='SAME') + b3)
yy = tf.reshape(y3, shape=[-1, 7*7*M])
y4 = tf.nn.relu(tf.matmul(yy, w4) + b4)
y4d = tf.nn.dropout(y4, pkeep)
logits = tf.matmul(y4, w5) + b5
y = tf.nn.softmax(tf.matmul(y4, w5) + b5)
- 确定损失函数
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels=t)
- 关于精度
is_correct = tf.equal(tf.argmax(y, 1), tf.argmax(t, 1))
accuracy = tf.reduce_mean(is_correct, tf.float32)
- 优化算法, 可以采用更好的优化算法,例如AdamOptimizer。随着迭代过程的次数,学习速率将指
数级衰减,在模型训练后期可以得到一个更稳定的精度和损失曲线。
lr = tf.placeholder(tf.float32)
optimizer = tf.train.AdamOptimizer(lr)
train_step = optimiser.minimize(cross_entropy)
关于学习率的衰减,
def lr(step):
max_lr = 0.003
min_lr = 0.0001
decay_speed = 2000.0
return min_lr + (max_lr - min_lr)*math.exp(-step/decay_speed)
- 训练模型
回顾上述,共有4个placeholder
, 所以在feed_dict
时,需要给喂入4个数据。
7.1 参数初始化
with tf.Session() as sess:
sess.run(init_op)
7.2 迭代的执行train_step
with tf.Session() as sess:
for step in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, t: batch_ys, pkeep:0.75, lr:lr(step)})
#在测试时,dropout取的概率取1,即保留所有的神经元。
if step%100 == 0:
acc, loss = sess.run([accuracy, cross_entropy],
feed_dict={x: batch_xs, t: batch_ys, pkeep: 1})
acc, loss = sess.run([accuracy, cross_entropy],
feed_dict={x:mnist.test.images, t: mnist.test.labels, pkeep: 1 })
4 tensorflow语句笔记
- 生成在[minval, maxval)之间,符合均匀分布的数组。
tf.random.uniform(
shape,
minval=0,
maxval=None,
dtype=tf.dtypes.float32,
seed=None,
name=None
)
- 生成截断正规分布随机数组。
tf.random.truncated_normal(
shape,
mean=0.0,
stddev=1.0,
dtype=tf.dtypes.float32,
seed=None,
name=None
)
- 每一层都创建于一个唯一的
tf.name_scope
之下,创建于该作用域之下的所有元素都将带有其前缀。
with tf.name_scope('hidden1') as scope:
- 在张量中插入一维。
axis
表示插入这一维的位置。
tf.expand_dims(
input,
axis=None,
name=None,
dim=None
)
- 全局实例
tf.Graph
with tf.Graph().as_default():
- 读取变量是:文件名列表 + 内存列表
其中文件名列表使用tf.train.string_input_producer
,传入的是文件名list,返回文件名列表。
tf.train.string_input_producer(
string_tensor,
num_epochs=None,
shuffle=True,
seed=None,
capacity=32,
shared_name=None,
name=None,
cancel_op=None
)
开始将数据喂入的语句是,tf.train.start_queue_runners
。
网友评论