tensorflow多层神经网络学习-实现mnist数字识别

Mnsit数字识别是机器学习的入门学习任务，因为最近在学习tensorflow，本着more practice的原则，用tensorflow也手写一次。

用tensorflow实现其实非常简单，包含下面几个步骤

因为是监督学习，所以要定义输入和输出，在这里输入是mnist数字集的图片特征，输出是具体0-9数字中的一个，所以输出是10个
定义参数，一般是指weights和bias,这里定义了二层全链接的参数，一般定义时就给定初始值了
定义代价函数,对于概率问题，一般会交叉熵损失函数-p(x)logq(x)
优化方法，一般是梯度下降
其实这个代码损失函数写了三种，一种求均值熵，一种是求和熵，还有均方差，其实均方差也可以的，只不过，可能得不到全局最小值，因为均方差函数的图像可能是一个振荡的波形，不过在这里是可以收敛的，要给定大的学习率，才能达到另外2个的收敛速度。

下面给出具体代码

from tensorflow.examples.tutorials.mnist import input_data

import tensorflow as tf


# define Parameter
learning_rate = 0.01
train_step = 20000
batch_size = 100
input_node = 28 * 28
output_node = 10
layer1_node = 500


def train(mnist):
    x = tf.placeholder(tf.float32, shape=[None, input_node], name="x-input")
    y_ = tf.placeholder(
        tf.float32, shape=[None, output_node], name='label-input')
    # define variable
    w1 = tf.Variable(tf.truncated_normal(
        [input_node, layer1_node],  stddev=0.1, dtype=tf.float32))
    b1 = tf.Variable(tf.constant(0, dtype=tf.float32, shape=[layer1_node]))

    w2 = tf.Variable(
        tf.truncated_normal([layer1_node, output_node], stddev=0.1, dtype=tf.float32))
    b2 = tf.Variable(tf.constant(0, dtype=tf.float32, shape=[output_node]))

    layer1 = tf.nn.relu(tf.matmul(x, w1) + b1)
    y = tf.matmul(layer1, w2) + b2

    # define optimizie and loss function
    cross_entropy = - y_ * \
        tf.log(tf.clip_by_value(tf.nn.softmax(y), 1e-10, 1.0))

#     cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
#         labels=y_, logits=y)
# #
    loss = tf.reduce_sum(cross_entropy)

#     loss = tf.reduce_mean(tf.square(y - y_))

    global_step = tf.Variable(0, trainable=False)
    train_op = tf.train.GradientDescentOptimizer(
        learning_rate).minimize(loss, global_step=global_step)

    correct_predict = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        validate_feed = {
            x: mnist.validation.images, y_: mnist.validation.labels}
        test_feed = {x: mnist.test.images, y_: mnist.test.labels}

        for i in range(train_step):
            if i % 100 == 0:
                validate_acc = session.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using sum model is %g " % (
                    i, validate_acc))
            xs, ys = mnist.train.next_batch(batch_size)
            session.run(train_op, feed_dict={x: xs, y_: ys})
        test_acc = session.run(accuracy, feed_dict=test_feed)
        print("After %d training step(s), test accuracy using sum model is %g" % (
            train_step, test_acc))


def main(argv=None):
    mnist = input_data.read_data_sets("D:/download/minst", one_hot=True)
    train(mnist)

if __name__ == '__main__':
    tf.app.run()