深度学习Hello，world

作者: UlissesJr | 来源:发表于2018-11-01 20:43 被阅读10次

深度学习Hello，world
好还是坏：人工智能二分类问题
神经网络手写数字识别
[机器学习入门] 李宏毅机器学习笔记-9 （“Hello wor
Caffe学习笔记2：LeNet拒绝官方脚本！从数据准备到训练再
TensorFlow自学第2篇——线性回归
深度学习的核心：掌握训练数据的方法
常用markdown语法
hello
Markdown

用tensorflow框架，写一个一隐层的多层感知机，用到dropout、Adagrad剃度下降，ReLu激活函数。准确率0.9825，tf入门的可以看一看。

# Create the model
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
sess = tf.InteractiveSession()

#设置权重参数
in_units = 784
h1_units = 300
W1 = tf.Variable(tf.truncated_normal([in_units, h1_units], stddev=0.1))
b1 = tf.Variable(tf.zeros([h1_units]))
W2 = tf.Variable(tf.zeros([h1_units, 10]))
b2 = tf.Variable(tf.zeros([10]))

#x输入，dropout参数
x = tf.placeholder(tf.float32, [None, in_units])
keep_prob = tf.placeholder(tf.float32)

#设置隐藏层
hidden1 = tf.nn.relu(tf.matmul(x, W1) + b1)
hidden1_drop = tf.nn.dropout(hidden1, keep_prob)#dropout
y = tf.nn.softmax(tf.matmul(hidden1_drop, W2) + b2)#输出层

#定义损失函数，选择优化器
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))#reduction_indices结果降为1维
train_step = tf.train.AdagradOptimizer(0.3).minimize(cross_entropy)

#训练
tf.global_variables_initializer().run()
for i in range(5000):
  batch_xs, batch_ys = mnist.train.next_batch(200)
  train_step.run({x: batch_xs, y_: batch_ys, keep_prob: 0.70})

# 测试模型
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval({x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

模型保存

# 有时候需要把模型保持起来，有时候需要做一些checkpoint在训练中
# 以致于如果计算机宕机，我们还可以从之前checkpoint的位置去继续
# TensorFlow使得我们去保存和加载模型非常方便，仅需要在构建阶段最后创建Saver节点
# 然后在计算阶段去调用save()方法

from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf

# mn.SOURCE_URL = "http://yann.lecun.com/exdb/mnist/"
my_mnist = input_data.read_data_sets("MNIST_data_bak/", one_hot=True)

# The MNIST data is split into three parts:
# 55,000 data points of training data (mnist.train)
# 10,000 points of test data (mnist.test), and
# 5,000 points of validation data (mnist.validation).

# Each image is 28 pixels by 28 pixels

# 输入的是一堆图片，None表示不限输入条数，784表示每张图片都是一个784个像素值的一维向量
# 所以输入的矩阵是None乘以784二维矩阵
x = tf.placeholder(dtype=tf.float32, shape=(None, 784))
# 初始化都是0，二维矩阵784乘以10个W值
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

y = tf.nn.softmax(tf.matmul(x, W) + b)

# 训练
# labels是每张图片都对应一个one-hot的10个值的向量
y_ = tf.placeholder(dtype=tf.float32, shape=(None, 10))
# 定义损失函数，交叉熵损失函数
# 对于多分类问题，通常使用交叉熵损失函数
# reduction_indices等价于axis，指明按照每行加，还是按照每列加
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y),
                                              reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# 初始化变量
init = tf.global_variables_initializer()
# 创建Saver()节点
saver = tf.train.Saver()

n_epoch = 1000

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epoch):
        if epoch % 100 == 0:
            save_path = saver.save(sess, "./ckpt/my_model.ckpt")

        batch_xs, batch_ys = my_mnist.train.next_batch(100)
        sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

    best_theta = W.eval()
    save_path = saver.save(sess, "./ckpt/my_model_final.ckpt")

重新将模板加载使用

from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf

# mn.SOURCE_URL = "http://yann.lecun.com/exdb/mnist/"
my_mnist = input_data.read_data_sets("MNIST_data_bak/", one_hot=True)

# The MNIST data is split into three parts:
# 55,000 data points of training data (mnist.train)
# 10,000 points of test data (mnist.test), and
# 5,000 points of validation data (mnist.validation).

# Each image is 28 pixels by 28 pixels

# 输入的是一堆图片，None表示不限输入条数，784表示每张图片都是一个784个像素值的一维向量
# 所以输入的矩阵是None乘以784二维矩阵
x = tf.placeholder(dtype=tf.float32, shape=(None, 784))
# 初始化都是0，二维矩阵784乘以10个W值
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

y = tf.nn.softmax(tf.matmul(x, W) + b)
# labels是每张图片都对应一个one-hot的10个值的向量
y_ = tf.placeholder(dtype=tf.float32, shape=(None, 10))

saver = tf.train.Saver()

with tf.Session() as sess:
    saver.restore(sess, "./ckpt/my_model_final.ckpt")

    # 评估
    # tf.argmax()是一个从tensor中寻找最大值的序号，tf.argmax就是求各个预测的数字中概率最大的那一个
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))

    # 用tf.cast将之前correct_prediction输出的bool值转换为float32，再求平均
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # 测试
    print(accuracy.eval({x: my_mnist.test.images, y_: my_mnist.test.labels}))

模块化

对逻辑结构相似的计算图，应该进行模块化，如下面代码，relu1和relu2，结构相同，只是参数不同，我们应该进行封装。

import tensorflow as tf


n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')

w1 = tf.Variable(tf.random_uniform((n_features, 1)), name='weights1')
w2 = tf.Variable(tf.random_uniform((n_features, 1)), name='weights2')
b1 = tf.Variable(0.0, name='bias1')
b2 = tf.Variable(0.0, name='bias2')

z1 = tf.add(tf.matmul(X, w1), b1, name='z1')
z2 = tf.add(tf.matmul(X, w2), b2, name='z2')

relu1 = tf.maximum(z1, 0., name='relu1')
relu2 = tf.maximum(z2, 0., name='relu2')

output = tf.add(relu1, relu2, name='output')

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    result = output.eval(feed_dict={X: [[1, 2, 3], [4, 5, 6], [7, 8, 9]]})
    print(result)

封装之后的效果

import tensorflow as tf


def relu(X):
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_uniform(w_shape), name='weights')
    b = tf.Variable(0.0, name='bias')
    z = tf.add(tf.matmul(X, w), b, name='z')
    return tf.maximum(z, 0., name='relu')


n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name='output')

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    result = output.eval(feed_dict={X: [[1, 2, 3], [4, 5, 6], [7, 8, 9]]})
    print(result)